Skip to content

Commit f55609b

Browse files
tmoreau89jroesch
authored andcommitted
[VTA] Refactor to increase platform coverage (Ultra96 etc.) (#3496)
* hardware refactor for increased FPGA coverage, small optimizations * fix header * cleaning up parameters that won't be needed for now * streamlining makefile, and simplifying tcl scripts * moving parameter derivation into pkg_config.py, keeping tcl scripts lightweight * refactoring tcl script to avoid global variables * deriving AXI signals in pkg_config.py * unifying address map definition for hardware and software drivers * single channel design for ultra96 to simplify build * enable alu by default, no mul opcode for now * hardware fix * new bitstream; vta version * avoid error when env variable is not set * ultra96 cleanup * further cleaning up tcl script for bitstream generation * preliminary rpc server support on ultra96 * rpc server tracker scripts * ultra96 ldflag * ultra96 support * ultra96 support * cleanup line * cmake support for ultra96 * simplify memory instantiation * cleaning up IP parameter initialization * fix queue instantiation * 2019.1 transition * fix macro def * removing bus width from config * cleanup * fix * turning off testing for now * cleanup ultra96 ps insantiation * minor refactor * adding comments * upgrading to tophub v0.6 * model used in TVM target now refers to a specific version of VTA for better autoTVM scheduling * revert change due to bug * rename driver files to be for zynq-type devices * streamlining address mapping * unifying register map offset values between driver and hardware generator * rely on cma library for cache flush/invalidation * coherence management * not make buffer packing depend on data types that can be wider than 64bits * refactor config derivation to minimize free parameters * fix environment/pkg config interaction * adding cfg dump property to pkgconfig: * fix rpc reconfig * fix spacing * cleanup * fix spacing * long line fix * fix spacing and lint * fix line length * cmake fix * environment fix * renaming after pynq since the driver stack relies on the pynq library - see pynq.io * update doc * adding parameterization to name * space * removing reg width * vta RPC * update doc on how to edit vta_config.json * fix path * fix path
1 parent bca8ac1 commit f55609b

File tree

33 files changed

+1610
-2329
lines changed

33 files changed

+1610
-2329
lines changed
File renamed without changes.

apps/pynq_rpc/start_rpc_server_to_tracker.sh renamed to apps/vta_rpc/start_rpc_server_to_tracker.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717
# under the License.
1818
PROJROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../" && pwd )"
1919

20+
# Derive target specified by vta_config.json
21+
VTA_CONFIG=${PROJROOT}/vta/config/vta_config.py
22+
TARGET=$(python ${VTA_CONFIG} --target)
2023

2124
export PYTHONPATH=${PYTHONPATH}:${PROJROOT}/python:${PROJROOT}/vta/python
2225
export PYTHONPATH=${PYTHONPATH}:/home/xilinx/pynq
23-
python3 -m vta.exec.rpc_server --tracker fleet:9190 --key pynq
26+
python3 -m vta.exec.rpc_server --tracker fleet:9190 --key $TARGET

cmake/modules/VTA.cmake

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,25 @@ elseif(PYTHON)
3838
string(REGEX MATCHALL "(^| )-D[A-Za-z0-9_=.]*" VTA_DEFINITIONS "${__vta_defs}")
3939

4040
file(GLOB VTA_RUNTIME_SRCS vta/src/*.cc)
41-
file(GLOB __vta_target_srcs vta/src/${VTA_TARGET}/*.cc)
41+
# Add sim driver sources
42+
if(${VTA_TARGET} STREQUAL "sim")
43+
file(GLOB __vta_target_srcs vta/src/sim/*.cc)
44+
endif()
45+
# Add pynq driver sources
46+
if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
47+
file(GLOB __vta_target_srcs vta/src/pynq/*.cc)
48+
endif()
4249
list(APPEND VTA_RUNTIME_SRCS ${__vta_target_srcs})
43-
44-
add_library(vta SHARED ${VTA_RUNTIME_SRCS})
45-
50+
# Add tsim driver sources
4651
if(${VTA_TARGET} STREQUAL "tsim")
4752
target_compile_definitions(vta PUBLIC USE_TSIM)
4853
include_directories("vta/include")
4954
file(GLOB RUNTIME_DPI_SRCS vta/src/dpi/module.cc)
5055
list(APPEND RUNTIME_SRCS ${RUNTIME_DPI_SRCS})
5156
endif()
5257

58+
add_library(vta SHARED ${VTA_RUNTIME_SRCS})
59+
5360
target_include_directories(vta PUBLIC vta/include)
5461

5562
foreach(__def ${VTA_DEFINITIONS})
@@ -62,7 +69,7 @@ elseif(PYTHON)
6269
endif(APPLE)
6370

6471
# PYNQ rules for Pynq v2.4
65-
if(${VTA_TARGET} STREQUAL "pynq")
72+
if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
6673
find_library(__cma_lib NAMES cma PATH /usr/lib)
6774
target_link_libraries(vta ${__cma_lib})
6875
endif()

docs/vta/dev/config.rst

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@ below.
3636
+=======================+============+========================================================+
3737
| ``TARGET`` | String | The TVM device target. |
3838
+-----------------------+------------+--------------------------------------------------------+
39-
| ``HW_TARGET`` | Int | FPGA frequency in MHz. |
40-
+-----------------------+------------+--------------------------------------------------------+
41-
| ``HW_CLK_TARGET`` | Int | FPGA clock period in ns target for HLS tool. |
42-
+-----------------------+------------+--------------------------------------------------------+
4339
| ``HW_VER`` | String | VTA hardware version number. |
4440
+-----------------------+------------+--------------------------------------------------------+
4541
| ``LOG_INP_WIDTH`` | Int (log2) | Input data type signed integer width. |
@@ -48,13 +44,9 @@ below.
4844
+-----------------------+------------+--------------------------------------------------------+
4945
| ``LOG_ACC_WIDTH`` | Int (log2) | Accumulator data type signed integer width. |
5046
+-----------------------+------------+--------------------------------------------------------+
51-
| ``LOG_OUT_WIDTH`` | Int (log2) | Output data type signed integer width. |
52-
+-----------------------+------------+--------------------------------------------------------+
53-
| ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic output dimension 0. |
54-
+-----------------------+------------+--------------------------------------------------------+
55-
| ``LOG_BLOCK_IN`` | Int (log2) | VTA matrix multiply reduction dimension. |
47+
| ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic input/output dimension 0.|
5648
+-----------------------+------------+--------------------------------------------------------+
57-
| ``LOG_BLOCK_OUT`` | Int (log2) | VTA matrix multiply intrinsic output dimension 1. |
49+
| ``LOG_BLOCK`` | Int (log2) | VTA matrix multiply inner dimensions. |
5850
+-----------------------+------------+--------------------------------------------------------+
5951
| ``LOG_UOP_BUFF_SIZE`` | Int (log2) | Micro-op on-chip buffer in Bytes. |
6052
+-----------------------+------------+--------------------------------------------------------+
@@ -75,13 +67,8 @@ below.
7567

7668
We provide additional detail below regarding each parameter:
7769

78-
- ``TARGET``: Can be set to ``"pynq"`` or ``"sim"``.
79-
- ``HW_TARGET``: In pynq mode, can be set to ``100``, ``142``, ``167``, or ``200`` MHz.
80-
- ``HW_CLK_TARGET``: The lower the target, the more pipeline stages HLS will insert to achieve timing closure during place and route (this can also slightly decrease performance).
70+
- ``TARGET``: Can be set to ``"pynq"``, ``"ultra96"``, ``"sim"`` (fast simulator), or ``"tsim"`` (cycle accurate sim with verilator).
8171
- ``HW_VER``: Hardware version which increments everytime the VTA hardware design changes. This parameter is used to uniquely idenfity hardware bitstreams.
82-
- ``LOG_OUT_WIDTH``: We recommend matching ``LOG_OUT_WIDTH`` to ``LOG_INP_WIDTH``.
83-
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension.
84-
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension.
85-
- ``LOG_BLOCK_IN``: Equivalent to B in multiplication of shape (A, B) x (B, C), or typically, the input channel dimension.
86-
- ``LOG_BLOCK_OUT``: Equivalent to C in multiplication of shape (A, B) x (B, C), or typically, the output channel dimension.
72+
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension of inner tensor computation.
73+
- ``LOG_BLOCK``: Equivalent to B and C in multiplication of shape (A, B) x (B, C), or typically, the input/output channel dimensions of the innter tensor computation.
8774

docs/vta/install.md

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ To do so,
6161

6262
```bash
6363
cd <tvm root>
64-
cp vta/config/vta_config.json vta_config.json
64+
vim vta/config/vta_config.json
6565
# edit vta_config.json
6666
make vta
6767
```
@@ -118,7 +118,7 @@ cd /home/xilinx/tvm
118118
mkdir build
119119
cp cmake/config.cmake build/.
120120
# Copy pynq specific configuration
121-
cp vta/config/pynq_sample.json build/vta_config.json
121+
cp vta/config/pynq_sample.json vta/config/vta_config.json
122122
cd build
123123
cmake ..
124124
make runtime vta -j2
@@ -147,13 +147,12 @@ export VTA_PYNQ_RPC_PORT=9091
147147
```
148148

149149
In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`.
150-
Alternatively, you can copy the default `vta/config/pynq_sample.json` into the TVM root as `vta_config.json`.
151150
> Note: in contrast to our simulation setup, there are no libraries to compile on the host side since the host offloads all of the computation to the Pynq board.
152151
153152
```bash
154153
# On the Host-side
155154
cd <tvm root>
156-
cp vta/config/pynq_sample.json vta_config.json
155+
cp vta/config/pynq_sample.json vta/config/vta_config.json
157156
```
158157

159158
This time again, we will run the 2D convolution testbench.
@@ -187,28 +186,28 @@ This third and last guide allows users to generate custom VTA bitstreams using f
187186

188187
### Xilinx Toolchain Installation
189188

190-
We recommend using `Vivado 2018.2` since our scripts have been tested to work on this version of the Xilinx toolchains.
189+
We recommend using `Vivado 2019.1` since our scripts have been tested to work on this version of the Xilinx toolchains.
191190
Our guide is written for Linux (Ubuntu) installation.
192191

193-
You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2018.2](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain.
192+
You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2019.1](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain.
194193

195194
#### Obtaining and Launching the Vivado GUI Installer
196195

197-
1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2018-2.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2018.2: WebPACK and Editions.
196+
1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2019-1.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2019.1: WebPACK and Editions.
198197
2. You’ll have to sign in with a Xilinx account. This requires a Xilinx account creation that will take 2 minutes.
199-
3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin`.
198+
3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin`.
200199
4. Now that the file is downloaded, go to your `Downloads` directory, and change the file permissions so it can be executed:
201200
```bash
202-
chmod u+x Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin
201+
chmod u+x Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
203202
```
204203
5. Now you can execute the binary:
205204
```bash
206-
./Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin
205+
./Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
207206
```
208207

209208
#### Xilinx Vivado GUI Installer Steps
210209

211-
At this point you've launched the Vivado 2018.2 Installer GUI program.
210+
At this point you've launched the Vivado 2019.1 Installer GUI program.
212211

213212
1. Click “Next” on the *Welcome* screen.
214213
2. On the *Select Install Type* screen, enter your Xilinx user credentials under the “User Authentication” box and select the “Download and Install Now” option before clicking “Next” .
@@ -230,8 +229,8 @@ At this point you've launched the Vivado 2018.2 Installer GUI program.
230229

231230
The last step is to update your `~/.bashrc` with the following lines. This will include all of the Xilinx binary paths so you can launch compilation scripts from the command line.
232231
```bash
233-
# Xilinx Vivado 2018.2 environment
234-
export XILINX_VIVADO=${XILINX_PATH}/Vivado/2018.2
232+
# Xilinx Vivado 2019.1 environment
233+
export XILINX_VIVADO=${XILINX_PATH}/Vivado/2019.1
235234
export PATH=${XILINX_VIVADO}/bin:${PATH}
236235
```
237236

python/tvm/autotvm/tophub.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
'opencl': "v0.02",
4545
'mali': "v0.05",
4646

47-
'vta': "v0.05",
47+
'vta': "v0.06",
4848
}
4949

5050
logger = logging.getLogger('autotvm')

vta/config/pynq_sample.json

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
{
22
"TARGET" : "pynq",
3-
"HW_FREQ" : 100,
4-
"HW_CLK_TARGET" : 8,
5-
"HW_VER" : "0.0.0",
3+
"HW_VER" : "0.0.1",
64
"LOG_INP_WIDTH" : 3,
75
"LOG_WGT_WIDTH" : 3,
86
"LOG_ACC_WIDTH" : 5,
9-
"LOG_OUT_WIDTH" : 3,
107
"LOG_BATCH" : 0,
11-
"LOG_BLOCK_IN" : 4,
12-
"LOG_BLOCK_OUT" : 4,
8+
"LOG_BLOCK" : 4,
139
"LOG_UOP_BUFF_SIZE" : 15,
14-
"LOG_INP_BUFF_SIZE" : 15,
10+
"LOG_INP_BUFF_SIZE" :15,
1511
"LOG_WGT_BUFF_SIZE" : 18,
1612
"LOG_ACC_BUFF_SIZE" : 17
1713
}

vta/config/ultra96_sample.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"TARGET" : "ultra96",
3+
"HW_VER" : "0.0.1",
4+
"LOG_INP_WIDTH" : 3,
5+
"LOG_WGT_WIDTH" : 3,
6+
"LOG_ACC_WIDTH" : 5,
7+
"LOG_BATCH" : 0,
8+
"LOG_BLOCK" : 4,
9+
"LOG_UOP_BUFF_SIZE" : 15,
10+
"LOG_INP_BUFF_SIZE" :15,
11+
"LOG_WGT_BUFF_SIZE" : 18,
12+
"LOG_ACC_BUFF_SIZE" : 17
13+
}

vta/config/vta_config.json

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
{
22
"TARGET" : "sim",
3-
"HW_FREQ" : 100,
4-
"HW_CLK_TARGET" : 7,
5-
"HW_VER" : "0.0.0",
3+
"HW_VER" : "0.0.1",
64
"LOG_INP_WIDTH" : 3,
75
"LOG_WGT_WIDTH" : 3,
86
"LOG_ACC_WIDTH" : 5,
9-
"LOG_OUT_WIDTH" : 3,
107
"LOG_BATCH" : 0,
11-
"LOG_BLOCK_IN" : 4,
12-
"LOG_BLOCK_OUT" : 4,
8+
"LOG_BLOCK" : 4,
139
"LOG_UOP_BUFF_SIZE" : 15,
1410
"LOG_INP_BUFF_SIZE" : 15,
1511
"LOG_WGT_BUFF_SIZE" : 18,

0 commit comments

Comments
 (0)