Skip to content

Commit e32b5b7

Browse files
committed
[target] Use native architecture for llvm target
Set the default `-device=` key for llvm targets based on the native architecture rather than hard coding to `cpu` which is x86 specific. This means that when llvm target triples are not specified we will test `arm_cpu` schedules on Arm®-based architectures and `cpu` schedules on x86 based architectures. Fix any schedule test failures that result from this fix.
1 parent e280e01 commit e32b5b7

File tree

19 files changed

+145
-69
lines changed

19 files changed

+145
-69
lines changed

include/tvm/target/target_kind.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,19 @@ constexpr const char* kIsExternalCodegen = "is_external_codegen";
440440
*/
441441
constexpr const char* kRelayToTIR = "RelayToTIR";
442442

443+
/*!
444+
* \brief String representation of the host's target architecture.
445+
*
446+
* Currently this is set to "arm_cpu" on Arm®-based host architectures and "cpu"
447+
* (which is synonymous with x86) everywhere else.
448+
*
449+
* TODO(@FranklandJack) dynamically detect host architecture and generalize for all targets.
450+
*/
451+
#if defined(__arm__) || defined(__aarch64__)
452+
constexpr const char* kHostCPU = "arm_cpu";
453+
#else
454+
constexpr const char* kHostCPU = "cpu";
455+
#endif
443456
} // namespace attr
444457

445458
/*!

python/tvm/relay/op/strategy/arm_cpu.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
152152
is_winograd_applicable = (
153153
"float" in data.dtype
154154
and "float" in kernel.dtype
155+
and not data.dtype.count("custom")
155156
and kh == 3
156157
and kw == 3
157158
and stride_h == 1
@@ -284,8 +285,21 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
284285
name="depthwise_conv2d_nchw.x86",
285286
)
286287
elif layout == "NHWC":
287-
assert kernel_layout == "HWOI"
288-
if target.features.has_asimd:
288+
# TODO(@FranklandJack)
289+
# Handle HWOI in arm_cpu schedules/compute definition.
290+
if kernel_layout != "HWOI":
291+
logger.warning(
292+
"""depthwise_conv2d with layout NHWC and HWOI
293+
kernel layout is not optimized for arm_cpu target.
294+
"""
295+
)
296+
strategy.add_implementation(
297+
wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc, need_kernel_layout=True),
298+
wrap_topi_schedule(conv2d_generic.schedule_depthwise_conv2d_nhwc),
299+
name="depthwise_conv2d_nhwc.generic",
300+
)
301+
302+
elif target.features.has_asimd:
289303
strategy.add_implementation(
290304
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
291305
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
@@ -304,8 +318,11 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
304318
and kernel.shape[3] == 1 # channel_multiplier == 1
305319
and out_type.dtype == "int32"
306320
and (
307-
(data.shape[3] % 4 == 0 and data.dtype == "int8" and target.features.has_dsp)
308-
or (data.shape[3] % 2 == 0 and data.dtype == "int16")
321+
(
322+
(data.shape[3] % 4 == 0 and data.dtype == "int8")
323+
or (data.shape[3] % 2 == 0 and data.dtype == "int16")
324+
)
325+
and target.features.has_dsp
309326
)
310327
and (padding != "SAME" or data.shape[1] % stride_h == data.shape[2] % stride_w == 0)
311328
# Ideally we should check that kernel is a Relay constant, but strategy functions

python/tvm/relay/qnn/op/legalizations.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ def qnn_conv2d_transpose_legalize(attrs, inputs, types):
111111
# Otherwise it needs to be broadcast.
112112
else:
113113
shift_data = relay.nn.bias_add(
114-
relay.cast(data, dtype="int16"),
115-
-relay.cast(input_zero_point, dtype="int16"),
114+
relay.cast(data, dtype="int16"), -relay.cast(input_zero_point, dtype="int16")
116115
)
117116

118117
# If kernel zero point is a scalar, we can directly subtract it.
@@ -123,8 +122,7 @@ def qnn_conv2d_transpose_legalize(attrs, inputs, types):
123122
# Otherwise it needs to be broadcast.
124123
else:
125124
shift_kernel = relay.nn.bias_add(
126-
relay.cast(kernel, dtype="int16"),
127-
-relay.cast(kernel_zero_point, dtype="int16"),
125+
relay.cast(kernel, dtype="int16"), -relay.cast(kernel_zero_point, dtype="int16")
128126
)
129127

130128
return relay.nn.conv2d_transpose(shift_data, shift_kernel, **attrs)
@@ -486,7 +484,10 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
486484
if target.features.has_asimd and not other_options:
487485
return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d)
488486
# ARM prefers the dtypes to be same.
489-
return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
487+
if types[0].dtype in ["int8", "uint8"]:
488+
return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
489+
490+
return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d)
490491

491492

492493
@qnn_dense_legalize.register("arm_cpu")
@@ -495,7 +496,10 @@ def _qnn_dense_legalize_arm_cpu(attrs, inputs, types):
495496
if target.features.has_asimd and not target.features.has_dotprod:
496497
return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense)
497498
# ARM prefers the dtypes to be same.
498-
return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
499+
if types[0].dtype in ["int8", "uint8"]:
500+
return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
501+
502+
return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense)
499503

500504

501505
##########################

python/tvm/topi/arm_cpu/conv2d.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
from tvm import autotvm
2424
import tvm.contrib.nnpack
2525

26-
from ..utils import traverse_inline, get_const_tuple
26+
from ..utils import traverse_inline, get_const_tuple, conv2d_infer_layout_helper
2727
from .. import nn
2828
from ..nn.utils import get_const_int, get_pad_tuple
2929
from ..nn.winograd_util import winograd_transform_matrices
30+
from ..nn.conv2d import conv2d_infer_layout
3031
from .conv2d_spatial_pack import (
3132
conv2d_spatial_pack_nchw,
3233
conv2d_spatial_pack_nhwc,
@@ -509,3 +510,8 @@ def conv2d_nhwc_dsp(cfg, data, kernel, strides, padding, dilation, out_dtype):
509510
def schedule_conv2d_nhwc_dsp(cfg, outs):
510511
"""Create schedule for conv2d_nhwc_dsp"""
511512
return conv2d_nhwc_dsp_schedule(cfg, outs)
513+
514+
515+
@conv2d_infer_layout.register("arm_cpu")
516+
def _conv2d_infer_layout(workload, cfg):
517+
return conv2d_infer_layout_helper(workload, cfg)

python/tvm/topi/arm_cpu/injective.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ def schedule_injective(outs):
6969
if list(s[x].op.axis):
7070
# do not vectorize for broadcast
7171
dtype = "uint16" if x.dtype == "bfloat16" else x.dtype
72-
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // np.dtype(dtype).itemsize)
73-
s[x].vectorize(ii)
72+
# do not vectorize for custom data types
73+
if 0 == dtype.count("custom"):
74+
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // np.dtype(dtype).itemsize)
75+
s[x].vectorize(ii)
7476
tvm.te.schedule.AutoInlineInjective(s)
7577

7678
if not is_empty_shape(x.shape):

python/tvm/topi/intel_graphics/conv2d_alter_op.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from tvm import relay
2323
from tvm import autotvm
2424

25-
from ..utils import get_const_tuple
25+
from ..utils import get_const_tuple, conv2d_infer_layout_helper
2626
from ..nn import conv2d_alter_layout, conv2d_infer_layout
2727
from .conv2d import _get_default_config
2828

@@ -102,14 +102,4 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
102102

103103
@conv2d_infer_layout.register("intel_graphics")
104104
def _conv2d_infer_layout(workload, cfg):
105-
_, data, kernel, strides, padding, dilation, layout, dtype = workload
106-
batch_size, in_channel, in_height, in_width = data[1]
107-
out_channel, _, k_height, k_width = kernel[1]
108-
out_height = (in_height + 2 * padding[0] - k_height) // strides[0] + 1
109-
out_width = (in_width + 2 * padding[1] - k_width) // strides[1] + 1
110-
tile_ic, tile_oc = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
111-
in_shape = (batch_size, in_channel // tile_ic, in_height, in_width, tile_ic)
112-
in_layout = f"NCHW{tile_ic}c"
113-
out_shape = (batch_size, out_channel // tile_oc, out_height, out_width, tile_oc)
114-
out_layout = f"NCHW{tile_oc}c"
115-
return ((in_shape, in_layout),), ((out_shape, out_layout),)
105+
return conv2d_infer_layout_helper(workload, cfg)

python/tvm/topi/testing/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
_reduce_schedule = {
3636
"generic": topi.generic.schedule_reduce,
3737
"cpu": topi.x86.schedule_reduce,
38+
# TODO(@FranklandJack) Write arm_cpu specific reduction schedule.
39+
"arm_cpu": topi.x86.schedule_reduce,
3840
"gpu": topi.cuda.schedule_reduce,
3941
"hls": topi.cuda.schedule_reduce,
4042
}

python/tvm/topi/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import tvm
2525
from tvm import te
2626
from tvm.tir import Any, SizeVar, bijective_layout, layout
27+
import tvm.topi
2728

2829
from . import cpp, tag
2930

@@ -526,3 +527,26 @@ def is_target(names):
526527
def is_dynamic_shape(shape):
527528
"""Checks if any part of a shape is dynamic"""
528529
return any([isinstance(x, (Any, SizeVar)) for x in shape])
530+
531+
532+
def conv2d_infer_layout_helper(workload, cfg):
533+
"""Infers input and output layouts for a conv2d operator
534+
scheduled using "tile_ic" and "tile_oc" scheduling configuration knobs which
535+
is the case for cpu, arm_cpu and intel_graphics targets."""
536+
_, data, kernel, strides, padding, dilation, _, _, _ = workload
537+
batch_size, in_channel, in_height, in_width = data[1]
538+
out_channel, _, k_height, k_width = kernel[1]
539+
idxdiv = tvm.tir.indexdiv
540+
541+
pt, pl, pb, pr = tvm.topi.nn.get_pad_tuple(padding, (k_height, k_width))
542+
hdilation, wdilation = dilation if isinstance(dilation, (tuple, list)) else (dilation, dilation)
543+
dilated_kernel_h = (k_height - 1) * hdilation + 1
544+
dilated_kernel_w = (k_width - 1) * wdilation + 1
545+
out_height = idxdiv(in_height + pt + pb - dilated_kernel_h, strides[0]) + 1
546+
out_width = idxdiv(in_width + pl + pr - dilated_kernel_w, strides[1]) + 1
547+
tile_ic, tile_oc = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
548+
in_shape = (batch_size, idxdiv(in_channel, tile_ic), in_height, in_width, tile_ic)
549+
in_layout = f"NCHW{tile_ic}c"
550+
out_shape = (batch_size, idxdiv(out_channel, tile_oc), out_height, out_width, tile_oc)
551+
out_layout = f"NCHW{tile_oc}c"
552+
return ((in_shape, in_layout),), ((out_shape, out_layout),)

python/tvm/topi/x86/conv2d.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from ..nn.conv2d import unpack_NCHWc_to_nchw
3131
from ..nn.depthwise_conv2d import _get_workload as _get_depthwise_conv2d_workload
3232
from ..nn.utils import get_pad_tuple
33-
from ..utils import get_const_tuple, traverse_inline
33+
from ..utils import get_const_tuple, traverse_inline, conv2d_infer_layout_helper
3434
from . import conv2d_avx_1x1, conv2d_avx_common
3535

3636
logger = logging.getLogger("topi")
@@ -65,23 +65,7 @@ def _get_default_config(
6565

6666
@conv2d_infer_layout.register("cpu")
6767
def _conv2d_infer_layout(workload, cfg):
68-
_, data, kernel, strides, padding, dilation, layout, _, dtype = workload
69-
batch_size, in_channel, in_height, in_width = data[1]
70-
out_channel, _, k_height, k_width = kernel[1]
71-
idxdiv = tvm.tir.indexdiv
72-
73-
pt, pl, pb, pr = get_pad_tuple(padding, (k_height, k_width))
74-
hdilation, wdilation = dilation if isinstance(dilation, (tuple, list)) else (dilation, dilation)
75-
dilated_kernel_h = (k_height - 1) * hdilation + 1
76-
dilated_kernel_w = (k_width - 1) * wdilation + 1
77-
out_height = idxdiv(in_height + pt + pb - dilated_kernel_h, strides[0]) + 1
78-
out_width = idxdiv(in_width + pl + pr - dilated_kernel_w, strides[1]) + 1
79-
tile_ic, tile_oc = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1]
80-
in_shape = (batch_size, idxdiv(in_channel, tile_ic), in_height, in_width, tile_ic)
81-
in_layout = f"NCHW{tile_ic}c"
82-
out_shape = (batch_size, idxdiv(out_channel, tile_oc), out_height, out_width, tile_oc)
83-
out_layout = f"NCHW{tile_oc}c"
84-
return ((in_shape, in_layout),), ((out_shape, out_layout),)
68+
return conv2d_infer_layout_helper(workload, cfg)
8569

8670

8771
def schedule_conv2d_nhwc(outs):

src/target/target_kind.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
293293
.add_attr_option<Integer>("opt-level")
294294
// LLVM command line flags, see below
295295
.add_attr_option<Array<String>>("cl-opt")
296-
.set_default_keys({"cpu"})
296+
.set_default_keys({attr::kHostCPU})
297297
// Force the external codegen kind attribute to be registered, even if no external
298298
// codegen targets are enabled by the TVM build.
299299
.set_attr<Bool>(tvm::attr::kIsExternalCodegen, Bool(false))

0 commit comments

Comments
 (0)