|
26 | 26 | from tvm.autotvm.task import get_config |
27 | 27 | from .. import generic, tag |
28 | 28 | from .. import nn |
29 | | -from ..util import get_const_tuple, get_shape |
30 | | -from ..nn.conv2d import conv2d, conv2d_NCHWc, conv2d_NCHWc_int8, \ |
31 | | - conv2d_alter_layout, conv2d_infer_layout, _get_workload as _get_conv2d_workload |
| 29 | +from ..nn.conv2d import conv2d, conv2d_NCHWc, \ |
| 30 | + conv2d_infer_layout, _get_workload as _get_conv2d_workload |
32 | 31 | from ..nn.depthwise_conv2d import _get_workload as _get_depthwise_conv2d_workload |
33 | | -from ..nn.depthwise_conv2d import depthwise_conv2d_NCHWc, depthwise_conv2d_nchw |
34 | 32 | from ..nn.pad import pad |
| 33 | +from ..util import get_const_tuple |
35 | 34 |
|
36 | 35 | from . import conv2d_avx_1x1, conv2d_avx_common |
37 | 36 |
|
38 | 37 | logger = logging.getLogger('topi') |
39 | 38 |
|
40 | | -def _is_int8_hw_support(data_dtype, kernel_dtype, target): |
41 | | - """ |
42 | | - Checks to ensure that we can use Intel DLBoost instructions |
43 | | - 1) The datatypes are correct. |
44 | | - 2) LLVM version has support for the instructions. |
45 | | - 3) Target is skylake and above. |
46 | | - """ |
47 | | - # 1) Check datatypes |
48 | | - is_dtype_support = data_dtype == 'uint8' and kernel_dtype == 'int8' |
49 | | - |
50 | | - # 2) Check LLVM support |
51 | | - llvm_intrin_fast_int8 = "llvm.x86.avx512.pmaddubs.w.512" |
52 | | - llvm_id = tvm.codegen.llvm_lookup_intrinsic_id(llvm_intrin_fast_int8) |
53 | | - is_llvm_support = llvm_id != 0 |
54 | | - |
55 | | - # 3) Check target |
56 | | - is_target_support = False |
57 | | - for opt in target.options: |
58 | | - if opt == '-mcpu=skylake-avx512': |
59 | | - is_target_support = True |
60 | | - |
61 | | - return is_dtype_support and is_llvm_support and is_target_support |
62 | | - |
63 | 39 | def _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depthwise=False, |
64 | 40 | layout='NCHW'): |
65 | 41 | """ |
@@ -353,138 +329,6 @@ def _topi_nn_conv2d_NCHWc(*args, **kwargs): |
353 | 329 | return s, [new_data, new_kernel, C] |
354 | 330 |
|
355 | 331 |
|
356 | | -@conv2d_alter_layout.register("cpu") |
357 | | -def _alter_conv2d_layout(attrs, inputs, tinfo, F): |
358 | | - |
359 | | - copy_inputs = [s for s in inputs] |
360 | | - new_attrs = {k : attrs[k] for k in attrs.keys()} |
361 | | - |
362 | | - if F.__name__ == 'tvm.relay.op': |
363 | | - # Derive channels for frontends (e.g ONNX) that miss "channel" field. |
364 | | - new_attrs["channels"] = inputs[1].checked_type.shape[attrs['kernel_layout'].index('O')] |
365 | | - |
366 | | - data, kernel = tinfo[0], tinfo[1] |
367 | | - batch_size, in_channel, height, width = get_const_tuple(data.shape) |
368 | | - |
369 | | - groups = attrs.get_int("groups") |
370 | | - out_channel = attrs.get_int("channels") \ |
371 | | - if F.__name__ == 'nnvm.symbol' else new_attrs["channels"] |
372 | | - padding = attrs.get_int_tuple("padding") |
373 | | - strides = attrs.get_int_tuple("strides") |
374 | | - dilation = attrs.get_int_tuple("dilation") |
375 | | - out_dtype = attrs["out_dtype"] |
376 | | - |
377 | | - layout_name = 'layout' if F.__name__ == 'nnvm.symbol' else 'data_layout' |
378 | | - |
379 | | - layout = attrs[layout_name] |
380 | | - kh, kw = attrs.get_int_tuple("kernel_size") |
381 | | - |
382 | | - dtype = data.dtype |
383 | | - out_dtype = dtype if out_dtype in ("same", "") else out_dtype |
384 | | - |
385 | | - kshape = get_shape(kernel.shape, attrs["kernel_layout"], "OIHW") |
386 | | - is_depthwise = groups == kshape[0] and kshape[1] == 1 |
387 | | - |
388 | | - # only optimize for NCHW |
389 | | - if layout != 'NCHW' or attrs["kernel_layout"] != "OIHW": |
390 | | - return None |
391 | | - |
392 | | - if groups != 1 and not is_depthwise: |
393 | | - return None |
394 | | - |
395 | | - dispatch_ctx = autotvm.task.DispatchContext.current |
396 | | - target = tvm.target.current_target() |
397 | | - # query schedule and fallback if necessary |
398 | | - workload = autotvm.task.args_to_workload( |
399 | | - [data, kernel, strides, padding, dilation, out_dtype], depthwise_conv2d_nchw) \ |
400 | | - if is_depthwise else \ |
401 | | - autotvm.task.args_to_workload( |
402 | | - [data, kernel, strides, padding, dilation, layout, out_dtype], conv2d) |
403 | | - cfg = dispatch_ctx.query(target, workload) |
404 | | - if cfg.is_fallback: |
405 | | - _get_default_config(cfg, data, kernel, strides, padding, out_dtype, is_depthwise) |
406 | | - |
407 | | - ic_bn, oc_bn = cfg["tile_ic"].size[-1], cfg["tile_oc"].size[-1] |
408 | | - |
409 | | - new_attrs[layout_name] = 'NCHW%dc' % ic_bn |
410 | | - new_attrs['out_layout'] = 'NCHW%dc' % oc_bn |
411 | | - |
412 | | - # Remove attached compilation target because conv2d_NCHWc needs to create |
413 | | - # a conv2d_nchwc op and target is not one of conv2d's parameters. |
414 | | - if "target" in new_attrs: |
415 | | - del new_attrs["target"] |
416 | | - |
417 | | - new_data = tvm.placeholder((batch_size, in_channel//ic_bn, height, width, ic_bn), |
418 | | - dtype=data.dtype) |
419 | | - |
420 | | - if is_depthwise: |
421 | | - new_attrs['kernel_layout'] = 'OIHW1i%do' % oc_bn |
422 | | - # Store altered operator's config |
423 | | - new_kernel = tvm.placeholder((out_channel//oc_bn, 1, kh, kw, 1, oc_bn), dtype=kernel.dtype) |
424 | | - new_workload = autotvm.task.args_to_workload( |
425 | | - [new_data, new_kernel, strides, padding, dilation, new_attrs[layout_name], |
426 | | - new_attrs['out_layout'], out_dtype], depthwise_conv2d_NCHWc) |
427 | | - dispatch_ctx.update(target, new_workload, cfg) |
428 | | - if F.__name__ == 'nnvm.symbol': |
429 | | - logging.warning("Use native layout for depthwise convolution on NNVM.") |
430 | | - return None |
431 | | - return F.nn.contrib_depthwise_conv2d_nchwc(*copy_inputs, **new_attrs) |
432 | | - |
433 | | - if _is_int8_hw_support(data.dtype, kernel.dtype, target): |
434 | | - # Convert kernel data layout from 4D to 7D |
435 | | - n_elems = 4 |
436 | | - out_channel, _, kh, kw = get_const_tuple(kernel.shape) |
437 | | - data_expr, kernel_expr = inputs |
438 | | - kernel_IHWO = F.transpose(kernel_expr, axes=(1, 2, 3, 0)) |
439 | | - kernel_IHWOo = F.reshape(kernel_IHWO, (in_channel, kh, kw, out_channel//oc_bn, oc_bn)) |
440 | | - kernel_OHWoI = F.transpose(kernel_IHWOo, axes=(3, 1, 2, 4, 0)) |
441 | | - kernel_OHWoIi = F.reshape(kernel_OHWoI, (out_channel//oc_bn, kh, kw, oc_bn, |
442 | | - in_channel//ic_bn, ic_bn)) |
443 | | - kernel_OHWoIie = F.reshape(kernel_OHWoIi, (out_channel//oc_bn, kh, kw, oc_bn, |
444 | | - in_channel//ic_bn, ic_bn//n_elems, n_elems)) |
445 | | - kernel_OIHWioe = F.transpose(kernel_OHWoIie, axes=(0, 4, 1, 2, 5, 3, 6)) |
446 | | - copy_inputs = [data_expr, kernel_OIHWioe] |
447 | | - |
448 | | - # Store altered operator's config. New kernel layout OIHWio4 |
449 | | - new_kernel = tvm.placeholder((out_channel // oc_bn, |
450 | | - in_channel // ic_bn, |
451 | | - kh, |
452 | | - kw, |
453 | | - ic_bn // n_elems, |
454 | | - oc_bn, |
455 | | - n_elems), dtype=kernel.dtype) |
456 | | - |
457 | | - new_workload = autotvm.task.args_to_workload([new_data, |
458 | | - new_kernel, |
459 | | - strides, |
460 | | - padding, |
461 | | - dilation, |
462 | | - new_attrs[layout_name], |
463 | | - new_attrs['out_layout'], |
464 | | - out_dtype], |
465 | | - conv2d_NCHWc_int8) |
466 | | - dispatch_ctx.update(target, new_workload, cfg) |
467 | | - if F.__name__ == 'nnvm.symbol': |
468 | | - logging.warning("Use native layout for int8 convolution on NNVM.") |
469 | | - return None |
470 | | - return F.nn.contrib_conv2d_nchwc_int8(*copy_inputs, **new_attrs) |
471 | | - |
472 | | - out_channel, _, kh, kw = get_const_tuple(kernel.shape) |
473 | | - # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc) |
474 | | - new_attrs['kernel_layout'] = 'OIHW%di%do' % (ic_bn, oc_bn) |
475 | | - # Store altered operator's config |
476 | | - new_kernel = tvm.placeholder((out_channel//oc_bn, in_channel//ic_bn, |
477 | | - kh, kw, ic_bn, oc_bn), dtype=kernel.dtype) |
478 | | - new_workload = autotvm.task.args_to_workload( |
479 | | - [new_data, new_kernel, strides, padding, dilation, new_attrs[layout_name], |
480 | | - new_attrs['out_layout'], out_dtype], conv2d_NCHWc) |
481 | | - dispatch_ctx.update(target, new_workload, cfg) |
482 | | - |
483 | | - if F.__name__ == 'nnvm.symbol': |
484 | | - return F.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs) |
485 | | - return F.nn.contrib_conv2d_nchwc(*copy_inputs, **new_attrs) |
486 | | - |
487 | | - |
488 | 332 | @conv2d_infer_layout.register("cpu") |
489 | 333 | def _conv2d_infer_layout(workload, cfg): |
490 | 334 | _, data, kernel, strides, padding, dilation, layout, dtype = workload |
|
0 commit comments