Skip to content

Commit df374cf

Browse files
committed
Refactor, refactor code structure, fix pynq rpc (apache#29)
1 parent cfaed3d commit df374cf

File tree

8 files changed

+185
-169
lines changed

8 files changed

+185
-169
lines changed

examples/resnet18/pynq/imagenet_predict.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@
3737
vta.program_fpga(remote, BITSTREAM_FILE)
3838

3939
if verbose:
40-
logging.basicConfig(level=logging.INFO)
40+
logging.basicConfig(level=logging.DEBUG)
4141

42-
# Change to -device=vta-cpu to run cpu only inference.
43-
target = "llvm -device=vta"
42+
# Change to -device=vtacpu to run cpu only inference.
43+
target = tvm.target.create("llvm -device=vta")
4444
target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+neon"
4545

4646
synset = eval(open(os.path.join(CATEG_FILE)).read())
@@ -109,7 +109,7 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
109109
sym = vta.graph.remove_stochastic(sym)
110110
sym = vta.graph.clean_cast(sym)
111111
sym = vta.graph.clean_conv_fuse(sym)
112-
if "vta" in target:
112+
if target.device_name == "vta":
113113
sym = vta.graph.pack(sym, shape_dict, factor)
114114

115115
graph_attr.set_shape_inputs(sym, shape_dict)
@@ -118,10 +118,10 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
118118
sym = sym.apply("InferType")
119119

120120
with nnvm.compiler.build_config(opt_level=3):
121-
if "vta" not in target:
121+
if target.device_name != "vta":
122122
graph, lib, params = nnvm.compiler.build(
123-
sym, target, shape_dict, dtype_dict,
124-
params=params, target_host=target_host)
123+
sym, target_host, shape_dict, dtype_dict,
124+
params=params)
125125
else:
126126
with vta.build_config():
127127
graph, lib, params = nnvm.compiler.build(
@@ -133,7 +133,7 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
133133
lib.save(temp.relpath("graphlib.o"))
134134
remote.upload(temp.relpath("graphlib.o"))
135135
lib = remote.load_module("graphlib.o")
136-
ctx = remote.ext_dev(0) if "vta" in target else remote.cpu(0)
136+
ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)
137137

138138
print("Build complete...")
139139

python/vta/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33

44

55
from .environment import get_env, Environment
6-
from . import arm_conv2d, vta_conv2d
7-
from .build_module import build_config, lower, build
86
from .rpc_client import reconfig_runtime, program_fpga
97

8+
109
try:
10+
from . import top
11+
from .build_module import build_config, lower, build
1112
from . import graph
1213
except (ImportError, RuntimeError):
1314
pass

python/vta/exec/rpc_server.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,20 @@ def reconfig_runtime(cfg_json):
7575
pkg = PkgConfig(cfg, proj_root)
7676
# check if the configuration is already the same
7777
if os.path.isfile(cfg_path):
78-
old_cfg = json.load(open(cfg_path))
78+
old_cfg = json.loads(open(cfg_path, "r").read())
7979
if pkg.same_config(old_cfg):
80-
logging.info("Skip reconfiguration because runtime config is the same")
80+
logging.info("Skip reconfig_runtime due to same config.")
8181
return
82-
cflags += ["-O2", "-std=c++11"]
82+
cflags = ["-O2", "-std=c++11"]
8383
cflags += pkg.cflags
8484
ldflags = pkg.ldflags
8585
lib_name = dll_path
86-
source = env.pkg_config.lib_source
86+
source = pkg.lib_source
8787
logging.info("Rebuild runtime: output=%s, cflags=%s, source=%s, ldflags=%s",
8888
dll_path, str(cflags), str(source), str(ldflags))
8989
cc.create_shared(lib_name, source, cflags + ldflags)
9090
with open(cfg_path, "w") as outputfile:
91-
json.dump(pkg.cfg_json, outputfile)
91+
outputfile.write(pkg.cfg_json)
9292

9393

9494
def main():

python/vta/top/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""TVM TOPI connector, eventually most of these should go to TVM repo"""
2+
3+
from .vta_conv2d import packed_conv2d, schedule_packed_conv2d
4+
from . import vta_conv2d
5+
from . import arm_conv2d

python/vta/arm_conv2d.py renamed to python/vta/top/arm_conv2d.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
Im2ColPack(7, 4, 1, 16, False),
4545
]
4646

47-
@_get_schedule.register(["tcpu", "vta"])
47+
@_get_schedule.register(["vtacpu", "vta"])
4848
def _schedule_conv2d(wkl):
4949
if wkl not in _WORKLOADS:
5050
raise ValueError("no schedule for such workload: {}".format(wkl))
@@ -53,10 +53,10 @@ def _schedule_conv2d(wkl):
5353
return sch
5454

5555

56-
@conv2d.register(["tcpu", "vta"])
56+
@conv2d.register(["vtacpu", "vta"])
5757
def _declaration_conv2d(data, kernel, stride, padding, layout, out_dtype):
58-
assert layout == 'NCHW', "only support NCHW convolution on tcpu"
59-
assert data.shape[0].value == 1, "only support batch size=1 convolution on tcpu"
58+
assert layout == 'NCHW', "only support NCHW convolution on vtacpu"
59+
assert data.shape[0].value == 1, "only support batch size=1 convolution on vtacpu"
6060
wkl = _get_workload(data, kernel, stride, padding, out_dtype)
6161
sch = _get_schedule(wkl)
6262
return _SCH_TO_DECL_FUNC[type(sch)](data, kernel, stride, padding, out_dtype)
@@ -284,7 +284,7 @@ def _schedule_im2col_conv2d(s, data, data_pad, data_col, data_vec,
284284

285285
return s
286286

287-
@generic.schedule_conv2d_nchw.register(["tcpu", "vta"])
287+
@generic.schedule_conv2d_nchw.register(["vtacpu", "vta"])
288288
def schedule_conv2d(outs):
289289
"""Create schedule for tensors"""
290290
s = tvm.create_schedule([x.op for x in outs])

python/vta/vta_conv2d.py renamed to python/vta/top/vta_conv2d.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Namespace for supporting packed_conv2d + ewise variant of nnvm."""
2+
from __future__ import absolute_import as _abs
23

34
from collections import namedtuple
45

@@ -7,7 +8,7 @@
78
import topi
89

910
from nnvm.top import registry as reg, OpPattern
10-
from . import environment as vta
11+
from ..environment import get_env
1112

1213

1314
Workload = namedtuple("Conv2DWorkload",
@@ -219,7 +220,7 @@ def _traverse(op):
219220
wrkld = _get_workload(data, pad_data, kernel, output)
220221

221222
plan = _WL2PLAN[wrkld]
222-
env = vta.get_env()
223+
env = get_env()
223224

224225
load_inp = load_wgt = load_out = store_out = env.dma_copy
225226
alu = env.alu
@@ -251,7 +252,7 @@ def _traverse(op):
251252

252253
# tile
253254
oc_factor = (plan.oc_factor if plan.oc_factor
254-
else wrkld.out_filter // vta.BLOCK_OUT)
255+
else plan.out_filter // env.BLOCK_OUT)
255256
h_factor = (plan.h_factor if plan.h_factor else oshape[2])
256257
w_factor = (plan.w_factor if plan.w_factor else oshape[3])
257258

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
"""Testing if we can generate code in topi style"""
2+
3+
import tvm
4+
from tvm.contrib import util
5+
from tvm.contrib.pickle_memoize import memoize
6+
import topi
7+
import topi.testing
8+
import vta
9+
import vta.testing
10+
import numpy as np
11+
12+
Workload = vta.top.vta_conv2d.Workload
13+
14+
@tvm.tag_scope(tag=topi.tag.ELEMWISE)
15+
def my_clip(x, a_min, a_max):
16+
"""Unlike topi's current clip, put min and max into two stages."""
17+
const_min = tvm.const(a_min, x.dtype)
18+
const_max = tvm.const(a_max, x.dtype)
19+
x = tvm.compute(x.shape, lambda *i: tvm.min(x(*i), const_max), name="clipA")
20+
x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
21+
return x
22+
23+
24+
def test_vta_conv2d():
25+
def run_vta_conv2d(env, remote, key, batch_size, wl, profile=True):
26+
data_shape = (batch_size, wl.in_filter // env.BLOCK_IN,
27+
wl.height, wl.width, env.BLOCK_IN)
28+
kernel_shape = (wl.out_filter // env.BLOCK_OUT,
29+
wl.in_filter // env.BLOCK_IN,
30+
wl.hkernel, wl.wkernel,
31+
env.BLOCK_OUT, env.BLOCK_IN)
32+
bias_shape = (wl.out_filter // env.BLOCK_OUT, 1, 1, env.BLOCK_OUT)
33+
34+
35+
fout_height = (wl.height + 2 * wl.hpad - wl.hkernel) // wl.hstride + 1
36+
fout_width = (wl.width + 2 * wl.wpad - wl.wkernel) // wl.wstride + 1
37+
data = tvm.placeholder(data_shape, name="data", dtype=env.inp_dtype)
38+
kernel = tvm.placeholder(kernel_shape, name="kernel", dtype=env.wgt_dtype)
39+
bias = tvm.placeholder(bias_shape, name="kernel", dtype=env.acc_dtype)
40+
41+
res_conv = vta.top.packed_conv2d(
42+
data, kernel, padding=(wl.hpad, wl.wpad), strides=(wl.hstride, wl.wstride))
43+
res = topi.right_shift(res_conv, 8)
44+
res = topi.broadcast_add(res, bias)
45+
res = my_clip(res, 0, 127)
46+
res = topi.cast(res, "int8")
47+
48+
num_ops = fout_height * fout_width * wl.hkernel * wl.wkernel * wl.out_filter * wl.in_filter
49+
50+
a_shape = (batch_size, wl.in_filter, wl.height, wl.width)
51+
w_shape = (wl.out_filter, wl.in_filter, wl.hkernel, wl.wkernel)
52+
stride = (wl.hstride, wl.wstride)
53+
data_dtype = data.dtype
54+
acc_dtype = env.acc_dtype
55+
assert wl.hpad == wl.wpad
56+
padding = wl.hpad
57+
58+
@memoize("vta.tests.test_benchmark_topi.conv2d,verify_nhwc")
59+
def get_ref_data():
60+
a_np = (np.random.uniform(size=a_shape) * 4).astype(data_dtype)
61+
w_np = (np.random.uniform(size=w_shape) * 4).astype(data_dtype)
62+
a_np = np.abs(a_np)
63+
w_np = np.abs(w_np)
64+
b_np = topi.testing.conv2d_nchw_python(
65+
a_np.astype(acc_dtype), w_np.astype(acc_dtype), stride, padding).astype(acc_dtype)
66+
return a_np, w_np, b_np
67+
68+
69+
def verify(s, check_correctness):
70+
mod = vta.build(s, [data, kernel, bias, res], "ext_dev",
71+
env.target_host, name="conv2d")
72+
temp = util.tempdir()
73+
74+
mod.save(temp.relpath("conv2d.o"))
75+
remote.upload(temp.relpath("conv2d.o"))
76+
f = remote.load_module("conv2d.o")
77+
# verify
78+
ctx = remote.ext_dev(0)
79+
# Data in original format
80+
data_orig, kernel_orig, res_ref = get_ref_data()
81+
bias_orig = (np.random.uniform(size=(wl.out_filter,)) * 4).astype("int32")
82+
bias_orig = np.abs(bias_orig)
83+
84+
data_packed = data_orig.reshape(
85+
batch_size, wl.in_filter // env.BLOCK_IN, env.BLOCK_IN,
86+
wl.height, wl.width).transpose((0, 1, 3, 4, 2))
87+
kernel_packed = kernel_orig.reshape(
88+
wl.out_filter // env.BLOCK_OUT, env.BLOCK_OUT,
89+
wl.in_filter // env.BLOCK_IN, env.BLOCK_IN,
90+
wl.hkernel, wl.wkernel).transpose((0, 2, 4, 5, 1, 3))
91+
bias_packed = bias_orig.reshape(
92+
wl.out_filter // env.BLOCK_OUT, 1, 1, env.BLOCK_OUT)
93+
res_shape = topi.util.get_const_tuple(res.shape)
94+
95+
res_np = np.zeros(res_shape).astype(res.dtype)
96+
data_arr = tvm.nd.array(data_packed, ctx)
97+
kernel_arr = tvm.nd.array(kernel_packed, ctx)
98+
bias_arr = tvm.nd.array(bias_packed, ctx)
99+
res_arr = tvm.nd.array(res_np, ctx)
100+
time_f = f.time_evaluator("conv2d", ctx, number=5)
101+
cost = time_f(data_arr, kernel_arr, bias_arr, res_arr)
102+
res_unpack = res_arr.asnumpy().transpose(
103+
(0, 1, 4, 2, 3)).reshape(batch_size, wl.out_filter, fout_height, fout_width)
104+
if check_correctness:
105+
assert wl.hpad == wl.wpad
106+
stride = (wl.hstride, wl.wstride)
107+
padding = wl.hpad
108+
res_ref = res_ref >> 8
109+
res_ref += bias_orig.reshape(wl.out_filter, 1, 1)
110+
res_ref = np.clip(res_ref, 0, 127).astype("int8")
111+
np.testing.assert_allclose(res_unpack, res_ref)
112+
return cost
113+
114+
def conv_normal(print_ir):
115+
print("----- CONV2D End-to-End Test-------")
116+
with vta.build_config():
117+
s = vta.top.schedule_packed_conv2d([res])
118+
if print_ir:
119+
print(vta.lower(s, [data, kernel, bias, res], simple_mode=True))
120+
cost = verify(s, True)
121+
gops = (num_ops / cost.mean) / float(10 ** 9)
122+
print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
123+
124+
conv_normal(False)
125+
126+
def _run(env, remote):
127+
# ResNet18 workloads
128+
resnet = {
129+
# Workloads of resnet18 on imagenet
130+
0: Workload(224, 224, 16, 64, 7, 7, 3, 3, 2, 2),
131+
1: Workload(56, 56, 64, 64, 3, 3, 1, 1, 1, 1),
132+
2: Workload(56, 56, 64, 64, 1, 1, 0, 0, 1, 1),
133+
3: Workload(56, 56, 64, 128, 3, 3, 1, 1, 2, 2),
134+
4: Workload(56, 56, 64, 128, 1, 1, 0, 0, 2, 2),
135+
5: Workload(28, 28, 128, 128, 3, 3, 1, 1, 1, 1),
136+
6: Workload(28, 28, 128, 256, 3, 3, 1, 1, 2, 2),
137+
7: Workload(28, 28, 128, 256, 1, 1, 0, 0, 2, 2),
138+
8: Workload(14, 14, 256, 256, 3, 3, 1, 1, 1, 1),
139+
9: Workload(14, 14, 256, 512, 3, 3, 1, 1, 2, 2),
140+
10: Workload(14, 14, 256, 512, 1, 1, 0, 0, 2, 2),
141+
11: Workload(7, 7, 512, 512, 3, 3, 1, 1, 1, 1),
142+
}
143+
144+
batch_size = 1
145+
for i in range(0, len(resnet)):
146+
wl = resnet[i]
147+
key = "resnet-cfg[%d]" % i
148+
print("key=%s" % key)
149+
print(wl)
150+
run_vta_conv2d(env, remote, key, batch_size, wl)
151+
vta.testing.run(_run)
152+
153+
154+
if __name__ == "__main__":
155+
test_vta_conv2d()

0 commit comments

Comments
 (0)