Skip to content

Commit af0b02e

Browse files
committed
[Marvell BYOC]: Marvell AI Accelerator Integration - Phase 1
1 parent 274c368 commit af0b02e

File tree

23 files changed

+3437
-0
lines changed

23 files changed

+3437
-0
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ tvm_option(USE_CLML_GRAPH_EXECUTOR "Build with CLML graph runtime" OFF)
128128
tvm_option(USE_UMA "Build with UMA support" OFF)
129129
tvm_option(USE_VERILATOR "Build with Verilator support" OFF)
130130
tvm_option(USE_MSC "Enable Multi-System Compiler" OFF)
131+
tvm_option(USE_MRVL "Build with MRVL TVM support" OFF)
131132

132133
# include directories
133134
include_directories(${CMAKE_INCLUDE_PATH})
@@ -581,6 +582,7 @@ include(cmake/modules/contrib/vllm.cmake)
581582
include(cmake/modules/Git.cmake)
582583
include(cmake/modules/LibInfo.cmake)
583584
include(cmake/modules/RustExt.cmake)
585+
include(cmake/modules/contrib/Mrvl.cmake)
584586

585587
set(LIBINFO_FILE ${CMAKE_CURRENT_LIST_DIR}/src/support/libinfo.cc)
586588
add_lib_info(${LIBINFO_FILE})

cmake/config.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,9 @@ set(USE_HEXAGON_RPC OFF)
358358
# Valid values are v65, v66, v68, v69, v73.
359359
set(USE_HEXAGON_ARCH "v68")
360360

361+
# Whether use MRVL codegen
362+
set(USE_MRVL OFF)
363+
361364
# Whether to use QHL library
362365
set(USE_HEXAGON_QHL OFF)
363366

cmake/modules/LibInfo.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ function(add_lib_info src_file)
9999
TVM_INFO_USE_MICRO="${USE_MICRO}"
100100
TVM_INFO_USE_MIOPEN="${USE_MIOPEN}"
101101
TVM_INFO_USE_MKL="${USE_MKL}"
102+
TVM_INFO_USE_MRVL="${USE_MRVL}"
102103
TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
103104
TVM_INFO_USE_NNPACK="${USE_NNPACK}"
104105
TVM_INFO_USE_OPENCL="${USE_OPENCL}"

cmake/modules/contrib/Mrvl.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
include(ExternalProject)
18+
if(USE_MRVL)
19+
# Mrvl Module
20+
message(STATUS "Build with Mrvl support")
21+
file(GLOB RUNTIME_MRVL_SRCS
22+
src/runtime/contrib/mrvl/mrvl_runtime.cc
23+
)
24+
list(APPEND RUNTIME_SRCS ${RUNTIME_MRVL_SRCS})
25+
file(GLOB COMPILER_MRVL_SRCS
26+
src/relay/backend/contrib/mrvl/codegen.cc
27+
src/relay/backend/contrib/mrvl/compiler_attr.cc
28+
)
29+
list(APPEND COMPILER_SRCS ${COMPILER_MRVL_SRCS})
30+
endif(USE_MRVL)

docker/Dockerfile.demo_mrvl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# prebuild ci-cpu image
19+
FROM tlcpack/ci-cpu:20230604-060130-0af9ff90e

docs/how_to/deploy/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ target device without relying on RPC. See the following resources on how to do s
176176
tensorrt
177177
vitis_ai
178178
bnns
179+
mrvl
179180

180181
Additional Deployment How-Tos
181182
-----------------------------

docs/how_to/deploy/mrvl.rst

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
.. Licensed to the Apache Software Foundation (ASF) under one
2+
or more contributor license agreements. See the NOTICE file
3+
distributed with this work for additional information
4+
regarding copyright ownership. The ASF licenses this file
5+
to you under the Apache License, Version 2.0 (the
6+
"License"); you may not use this file except in compliance
7+
with the License. You may obtain a copy of the License at
8+
9+
.. http://www.apache.org/licenses/LICENSE-2.0
10+
11+
.. Unless required by applicable law or agreed to in writing,
12+
software distributed under the License is distributed on an
13+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
KIND, either express or implied. See the License for the
15+
specific language governing permissions and limitations
16+
under the License.
17+
18+
19+
Marvell Machine Learning Integration
20+
====================================
21+
22+
1. Introduction
23+
---------------
24+
Marvell(R) supports a family of high performance Data Processing
25+
Units (DPUs) with integrated compute, high speed I/O and workload
26+
accelerators. These workload accelerators includes Marvell's
27+
Machine Learning Inference Processor (MLIP), a highly optimized,
28+
integrated inference engine.
29+
30+
TVM supports Marvell's MLIP using the "mrvl" library. This partitions and
31+
compiles supported operations for accelerated execution on MLIP, or LLVM
32+
for general compute.
33+
34+
For runtime, the library supports native execution on MLIP hardware
35+
as well as Marvell's ML simulator (mlModel).
36+
37+
The library supports Marvell's Octeon family of processors with ML accelarators.
38+
39+
This guide demonstrates building TVM with codegen and
40+
runtime enabled. It also provides example code to compile and run
41+
models using 'mrvl' runtime.
42+
43+
2. Building TVM with mrvl support
44+
---------------------------------
45+
46+
2.1 Clone TVM repo
47+
-------------------
48+
49+
Refer to the following TVM documentation for cloning TVM
50+
https://tvm.apache.org/docs/install/from_source.html
51+
52+
2.2 Build and start the TVM - mrvl docker container
53+
----------------------------------------------------
54+
55+
.. code:: bash
56+
57+
./docker/build.sh demo_mrvl bash # Build the docker container
58+
./docker/bash.sh tvm.demo_mrvl --env PYTHONPATH=$PWD/python # Load the docker image
59+
60+
61+
3. Build TVM inside the docker container with mrvl (inside tvm directory)
62+
-------------------------------------------------------------------------
63+
64+
.. code:: bash
65+
66+
./tests/scripts/task_config_build_mrvl.sh build
67+
cd build
68+
cmake ..
69+
make -j$(nproc) # nproc = 4/8/.. (Number of Parallel jobs)
70+
71+
4. Compiling a model using TVMC command line
72+
--------------------------------------------
73+
Models can be compiled and run for mrvl target using TVMC
74+
which is optimized for performance.
75+
76+
Refer to the following TVMC documentation, for tvmc generic options.
77+
https://tvm.apache.org/docs/tutorial/tvmc_command_line_driver.html
78+
79+
Additional mrvl-specific options may be added as attributes if
80+
necessary. The advanced usage is described in this document below.
81+
82+
4.1 TVMC Compilation Flow for a model
83+
-------------------------------------
84+
85+
Refer to the following TVM documentation, for compilation flow
86+
https://tvm.apache.org/docs/arch/index.html#example-compilation-flow
87+
88+
89+
4.2. TVMC - Command line option(s): Syntax for mrvl target
90+
----------------------------------------------------------
91+
92+
Compiling an ONNX model using the tvmc for mrvl target.
93+
94+
**Syntax:**
95+
96+
.. code:: python
97+
98+
python3 -m tvm.driver.tvmc compile --target="mrvl, llvm"
99+
--target-llvm-<options>
100+
--target-mrvl-<options>
101+
--<tvm-generic-options>
102+
model_file.onnx
103+
104+
Following is an example TVMC Compile command for an ARMv9 core and
105+
integrated MLIP cn10ka processor, using only 4 tiles in the block.
106+
107+
**Example:**
108+
109+
.. code:: python
110+
111+
python3 -m tvm.driver.tvmc compile --target="mrvl, llvm" \
112+
--target-llvm-mtriple=aarch64-linux-gnu --target-llvm-mcpu=neoverse-n2 \
113+
--target-mrvl-num_tiles=4 \
114+
--cross-compiler aarch64-linux-gnu-gcc \
115+
--output model.tar \
116+
mnist-12.onnx
117+
118+
119+
4.3. TVMC Compiler: mrvl specific Command Line Options
120+
------------------------------------------------------
121+
122+
.. code:: python
123+
124+
--target-mrvl-mcpu
125+
--target-mrvl-num_tiles
126+
--target-mrvl-mattr
127+
128+
**Description of mrvl options**
129+
130+
* mcpu:
131+
The CPU class of Marvell(R) ML Inference Processor;
132+
possible values = {cn10ka, cnf10kb}; defaults to cn10ka
133+
134+
* num_tiles:
135+
Maximum number of tiles that may be used, possible values = {1,2,4,8}, defaults to 8
136+
137+
* mattr:
138+
Attributes for mrvl; possible values = {quantize, wb_pin_ocm}
139+
140+
mattr specifies the data type, code generation options and optimizations.
141+
142+
*List of supported attributes are:*
143+
144+
**1. quantize**
145+
146+
Specify the data type. Possible values = {fp16, int8}.
147+
Default is fp16, int8 is WIP and full support will be added in a future PR.
148+
149+
**2. wb_pin_ocm**
150+
151+
Optimize runtime by preloading a model's weights and bias into
152+
the on chip memory. Possible values = {0, 1}. Default is 0 (no preload)
153+
154+
5. Compilation - Generating model partitions
155+
--------------------------------------------
156+
157+
In the TVMC mrvl flow, the model is partitioned into Marvell and LLVM regions.
158+
Building each partitioned Marvell subgraph generates serialized nodes.json and
159+
const.json. Partitioned nodes.json is the representation of the model graph which is
160+
suitable for the Marvell mmlc compiler. It is distributed separately via CDK
161+
162+
**Model Partition**
163+
164+
.. code:: bash
165+
166+
python3 -m tvm.driver.tvmc compile --target="mrvl, llvm \
167+
-mtriple=aarch64-linux-gnu -mcpu=neoverse-n2" \
168+
--cross-compiler aarch64-linux-gnu-gcc \
169+
--target-mrvl-num_tiles=4 --output model.tar model.onnx
170+
171+
172+
6. Compiling a model using Python APIs
173+
--------------------------------------
174+
175+
In addition to using TVMC, models can also be compiled and run using
176+
TVM Python API. Below is an example to compile the MNIST model. Support
177+
to run the model will be part of next PR by mrvl
178+
179+
**Download MNIST model from the web**
180+
181+
.. code:: bash
182+
183+
cd $HOME
184+
wget https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx
185+
186+
**Import the TVM and other dependent modules**
187+
188+
.. code:: python
189+
190+
import tvm, onnx, os
191+
import numpy as np
192+
import tvm.relay as relay
193+
from tvm.relay.op.contrib.mrvl import partition_for_mrvl
194+
from tvm.relay.build_module import build
195+
from keras.datasets import mnist
196+
197+
**Load model onnx file**
198+
199+
.. code:: python
200+
201+
onnx_model = onnx.load("mnist-12.onnx")
202+
203+
**Create a Relay graph from MNIST model**
204+
205+
.. code:: python
206+
207+
shape_dict = {'Input3' : (1,1,28,28)}
208+
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
209+
210+
**Define option dictionary and Partition the Model**
211+
212+
Annotate and partition the graph for mrvl. All operations which are supported
213+
by the mrvl will be marked and offloaded to mrvl hardware accelerator. The rest of the
214+
operations will go through the regular LLVM compilation and code generation for ARM.
215+
216+
.. code:: python
217+
218+
tvm_target = "llvm"
219+
220+
option_dict = {'num_tiles': 4}
221+
222+
mod = partition_for_mrvl(mod, params, **option_dict)
223+
224+
**Build the Relay Graph**
225+
226+
Build the Relay graph, using the new module returned by partition_for_mrvl.
227+
The target must always be a LLVM (ARM) target. ``partition_for_mrvl`` will
228+
pass the options from dictionary into the config parameters needed by the
229+
compiler backend, so there is no need to modify it - just pass it along
230+
to the PassContext so the values can be read during compilation.
231+
232+
.. code:: python
233+
234+
with tvm.transform.PassContext(opt_level=3, config={"relay.ext.mrvl.options" : option_dict}):
235+
model_lib = relay.build(mod, tvm_target, params=params)

0 commit comments

Comments
 (0)