1818"""
1919import numpy as np
2020import tvm
21+ from tvm import autotvm
2122from tvm import relay
2223from tvm .relay import transform
2324from tvm .relay .testing import ctx_list
25+ from tvm .contrib import util
2426import topi .testing
2527
2628def run_infer_type (expr ):
@@ -133,6 +135,46 @@ def run_test_conv2d(dtype, out_dtype, scale, dshape, kshape,
133135 op_res1 = intrp1 .evaluate (func )(data , kernel )
134136 tvm .testing .assert_allclose (op_res1 .asnumpy (), ref_res , rtol = 1e-5 , atol = 1e-5 )
135137
138+ def compile_test_conv2d_arm_cpu (dtype , out_dtype , scale , dshape , kshape ,
139+ padding = (1 , 1 ),
140+ groups = 1 ,
141+ dilation = (1 , 1 ),
142+ ** attrs ):
143+ x = relay .var ("x" , shape = dshape , dtype = dtype )
144+ w = relay .var ("w" , dtype = dtype )
145+ y = relay .nn .conv2d (x , w ,
146+ padding = padding ,
147+ dilation = dilation ,
148+ groups = groups ,
149+ ** attrs )
150+ func = relay .Function ([x , w ], y )
151+ mod = tvm .relay .Module ()
152+ mod ["main" ] = func
153+
154+ test_schedule = '{"i": ["llvm -device=arm_cpu", "topi_nn_depthwise_conv2d_nchw", \
155+ [["TENSOR", [1, 512, 32, 32], "float32"], \
156+ ["TENSOR", [512, 1, 3, 3], "float32"], \
157+ [1, 1], [1, 1], [1, 1], "float32"], {}, \
158+ ["depthwise_conv2d_nchw", [1, 512, 32, 32, "float32"], \
159+ [512, 1, 3, 3, "float32"], [1, 1], [1, 1], [1, 1], "float32"], \
160+ {"i": 743640, "t": "contrib_spatial_pack", "c": null, \
161+ "e": [["tile_co", "sp", [512, 1]], ["tile_oh", "sp", [8, 1]], \
162+ ["tile_ow", "sp", [1, 8]], \
163+ ["reorder_0", "re", [0, 1, 2, 3, 4, 5, 8, 6, 7]], \
164+ ["reorder_1", "re", [0, 1, 2, 3, 6, 4, 5]], \
165+ ["ann_reduce", "an", ["unroll", "none"]], \
166+ ["ann_spatial", "an", ["unroll", "unroll", "vec"]], \
167+ ["data_pad_inline", "ot", 4], ["data_vec_inline", "ot", 1], \
168+ ["conv_inline", "ot", 0]]}], "r": [[0.0002933163], \
169+ 0, 3.1976189613342285, 1570811630.6058347], "v": 0.1}'
170+ temp = util .tempdir ()
171+ with open (temp .relpath ("temp.log" ), "w" ) as log_file :
172+ log_file .write (test_schedule )
173+ with autotvm .apply_history_best (temp .relpath ("temp.log" )):
174+ with relay .build_config (opt_level = 3 ):
175+ print ('Compiling...' )
176+ graph_json , mod , params = tvm .relay .build (mod , target = "llvm -device=arm_cpu" )
177+
136178 # depthwise conv2d
137179 dshape = (1 , 32 , 18 , 18 )
138180 kshape = (32 , 1 , 3 , 3 )
@@ -141,6 +183,13 @@ def run_test_conv2d(dtype, out_dtype, scale, dshape, kshape,
141183 fref = lambda x , w : topi .testing .depthwise_conv2d_python_nchw (
142184 x , w , (1 , 1 ), "SAME" ))
143185
186+ # depthwise conv2d for arm_cpu
187+ dshape = (1 , 512 , 32 , 32 )
188+ kshape = (512 , 1 , 3 , 3 )
189+ compile_test_conv2d_arm_cpu ("float32" , "float32" , 1 , dshape , kshape ,
190+ padding = (1 , 1 ), channels = 512 ,
191+ groups = 512 , kernel_size = (3 ,3 ))
192+
144193 # CUDA is disabled for 'direct' schedule:
145194 # https://github.com/dmlc/tvm/pull/3070#issuecomment-486597553
146195 # group conv2d
0 commit comments