Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions tests/python/contrib/test_hexagon/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,8 @@
values from testing parameters """

import tvm
from .infrastructure import get_packed_filter_layout


@tvm.testing.fixture
def shape_nhwc(batch, in_channel, in_size):
return (batch, in_size, in_size, in_channel)


@tvm.testing.fixture
def shape_oihw(out_channel, in_channel, kernel):
return (out_channel, in_channel, kernel, kernel)


@tvm.testing.fixture
def shape_oihw8i32o4i(out_channel, in_channel, kernel):
return get_packed_filter_layout(out_channel, in_channel, kernel, kernel)
153 changes: 108 additions & 45 deletions tests/python/contrib/test_hexagon/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,47 +18,65 @@
""" Hexagon testing infrastructure """

import tvm
from tvm import te
import numpy


def ceildiv(o, d):
assert o >= 0
assert d >= 0
return tvm.tir.floordiv(o + d - 1, d)


def get_packed_activation_layout(shape_nhwc, block_shape, packed_C=True):
assert len(shape_nhwc) == 4
shape = [shape_nhwc[0]]
off_h, off_w, off_c = block_shape
shape.append(ceildiv(shape_nhwc[1], off_h))
shape.append(ceildiv(shape_nhwc[2], off_w))
if packed_C:
shape.append(ceildiv(shape_nhwc[3], off_c))
shape.extend(block_shape)
else:
shape.extend([off_h, off_w, shape_nhwc[3]])
return shape


# defines inner block shape: 8h8w32c
def get_block_shape():
return 8, 8, 32


# defines inner filter block shape: 8i32o41
def get_filter_block_shape():
return 8, 32, 4


def get_packed_filter_layout(out_channel, in_channel, kernel_h, kernel_w):
filter_Cio, filter_Ki, filter_Cii = get_filter_block_shape()
# input: locgical shape in nhwc layout
# output: physical packed shape in nhw8h8w32c layout
def get_packed_shape(logical_shape_nhwc):
assert len(logical_shape_nhwc) == 4
physical_shape_nhwc8h8w32c = [logical_shape_nhwc[0]]
block_shape = get_block_shape()
off_h, off_w, off_c = block_shape
physical_shape_nhwc8h8w32c.append(ceildiv(logical_shape_nhwc[1], off_h))
physical_shape_nhwc8h8w32c.append(ceildiv(logical_shape_nhwc[2], off_w))
physical_shape_nhwc8h8w32c.append(ceildiv(logical_shape_nhwc[3], off_c))
physical_shape_nhwc8h8w32c.extend(block_shape)
return physical_shape_nhwc8h8w32c


# input: physical packed shape in nhw8h8w32c layout
# output: logical shape in nhwc layout
def get_logical_shape(physical_shape_nhwc8h8w32c):
assert len(physical_shape_nhwc8h8w32c) == 7
logical_shape_nhwc = [physical_shape_nhwc8h8w32c[0]]
logical_shape_nhwc.append(physical_shape_nhwc8h8w32c[1] * physical_shape_nhwc8h8w32c[4])
logical_shape_nhwc.append(physical_shape_nhwc8h8w32c[2] * physical_shape_nhwc8h8w32c[5])
logical_shape_nhwc.append(physical_shape_nhwc8h8w32c[3] * physical_shape_nhwc8h8w32c[6])
return logical_shape_nhwc


# input: logical shape in oihw layout
# output: physical packed shape in oihw8i3204i layout
def get_packed_filter_shape(logical_shape_oihw):
assert len(logical_shape_oihw) == 4
filter_block_shape = get_filter_block_shape()
filter_Cio, filter_Ki, filter_Cii = filter_block_shape
filter_Ci = filter_Cio * filter_Cii
return (
int(ceildiv(out_channel, filter_Ki)),
int(ceildiv(in_channel, filter_Ci)),
kernel_h,
kernel_w,
filter_Cio,
filter_Ki,
filter_Cii,
)
physical_shape_oihw8i32o4i = []
physical_shape_oihw8i32o4i.append(int(ceildiv(logical_shape_oihw[0], filter_Ki)))
physical_shape_oihw8i32o4i.append(int(ceildiv(logical_shape_oihw[1], filter_Ci)))
physical_shape_oihw8i32o4i.append(logical_shape_oihw[2])
physical_shape_oihw8i32o4i.append(logical_shape_oihw[3])
physical_shape_oihw8i32o4i.extend(filter_block_shape)
return physical_shape_oihw8i32o4i


def build_and_run(inputs, func, target, target_host, *args, **kwargs):
Expand Down Expand Up @@ -93,26 +111,10 @@ def get_conv2d_nhwc_shape(shape_nhwc, kernel_size, strides, padding, dilation, o
)


def verify_conv2d(output, ref_output, dtype):
# nhwc8h8w32c
if len(output.shape) == 7:
# nhwc8h8w32c -> nhwc
output = output.transpose(0, 1, 4, 2, 5, 3, 6).reshape(
output.shape[0],
output.shape[1] * output.shape[4],
output.shape[2] * output.shape[5],
output.shape[3] * output.shape[6],
)

# nhwhwc
else:
# nhwhwc -> nhwc
output = output.transpose(0, 1, 3, 2, 4, 5).reshape(
output.shape[0],
output.shape[1] * output.shape[3],
output.shape[2] * output.shape[4],
output.shape[5],
)
def conv2d_verify(output, ref_output, dtype):
# nhwc8h8w32c -> nhwc
logical_output_shape = get_logical_shape(output.shape)
output = output.transpose(0, 1, 4, 2, 5, 3, 6).reshape(logical_output_shape)

# slice output to match ref_output shape
# e.g. 8x8 spatial 3x3 filter = 6x6 ref output
Expand All @@ -129,3 +131,64 @@ def verify_conv2d(output, ref_output, dtype):
elif dtype == "float32":
tol = {"rtol": 1e-4, "atol": 2e-4}
tvm.testing.assert_allclose(output, ref_output, **tol)


def conv2d_compute(X, filt, pad, stride, dilation):
block_shape = get_block_shape()
block_H, block_W, block_C = block_shape
filter_Cio, filter_Ki, filter_Cii = get_filter_block_shape()
filter_Ci = filter_Cio * filter_Cii

shape_filter = filt.shape
kernel_size = tuple(shape_filter[2:4])
out_channels = shape_filter[0] * shape_filter[5]

logical_input_shape = get_logical_shape(X.shape)
logical_output_shape = get_conv2d_nhwc_shape(
logical_input_shape,
kernel_size,
stride,
pad,
dilation,
out_channels,
)

output_shape = get_packed_shape(logical_output_shape)
n, ho, wo, ko, hi, wi, ki = output_shape
rh = te.reduce_axis((0, kernel_size[0]), name="rh")
rw = te.reduce_axis((0, kernel_size[1]), name="rw")
rc = te.reduce_axis((0, logical_input_shape[3]), name="rc")

def compute(n, ho, wo, ko, hi, wi, ki):
h = ho * block_H + hi
h_contig = h * stride[0] + rh
h_block_id = h_contig // block_H
h_block_offset = h_contig % block_H

w = wo * block_W + wi
w_contig = w * stride[1] + rw
w_block_id = w_contig // block_W
w_block_offset = w_contig % block_W

c_block_id = rc // block_C
c_block_offset = rc % block_C

rco = rc // filter_Ci
rcio = (rc % filter_Ci) // filter_Cii
rcii = rc % filter_Cii

return te.sum(
X[
n,
h_block_id,
w_block_id,
c_block_id,
h_block_offset,
w_block_offset,
c_block_offset,
]
* filt[ko, rco, rh, rw, rcio, ki, rcii],
axis=[rh, rw, rc],
)

return output_shape, compute
74 changes: 36 additions & 38 deletions tests/python/contrib/test_hexagon/test_conv2d_blocked.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,23 @@ This is a baseline 1x1 conv2d schedule for Hexagon.

## Command

pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-1-1-0-float32-1-1-1-64-64-128-llvm]"
pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-1-128-1-1-float32-llvm]"

## Parameters

| Parameter | Value |
| --------- | ----------- |
| Batch | 1 |
| Filter | 1x1 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Output Ch | 128 |
| Stride | 1 |
| Padding | 0 |
| Layout | NHWC8h8w32c |
| Parameter | Value |
| --------- | ----- |
| Batch | 1 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Padding | 0 |
| Stride | 1 |
| Filter | 1x1 |
| Output Ch | 128 |

## Assumptions

* Pattern matching for microkernels is not senstive to cache reads and writes between the outer height (ho) and outer width (wo) loops.
* n/a

## To Do

Expand Down Expand Up @@ -174,26 +173,25 @@ The key changes in TIR versus the above are...

## Command

pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-1-1-0-float32-2-2-1-64-64-128-llvm]"
pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-1-128-2-2-float32-llvm]"

## Parameters

| Parameter | Value |
| --------- | ----------- |
| Batch | 1 |
| Filter | 1x1 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Output Ch | 128 |
| Stride | 1 |
| Padding | 0 |
| Layout | NHWC8h8w32c |
| k_split | 2 |
| h_split | 2 |
| Parameter | Value |
| --------- | ----- |
| Batch | 1 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Padding | 0 |
| Stride | 1 |
| Filter | 1x1 |
| Output Ch | 128 |
| k_split | 2 |
| h_split | 2 |

## Assumptions

* n/a - With the loop splits on `ko` and `ho` the compute schedule is now over `ko.inner` `ho.inner` `wo` etc. This should fit the pattern matching for microkernels.
* n/a

## To Do

Expand Down Expand Up @@ -350,21 +348,21 @@ The `if` statement above indicates NOT to prefetch the vertically adjacent slice

## Command

pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[conv2d_nhwc8h8w32c-3-1-0-float32-2-2-1-64-64-128-llvm]"
pytest -sv "tests/python/contrib/test_hexagon/test_conv2d_blocked.py::TestConv2dPackedFilter::test_conv2d[1-64-64-0-1-3-128-2-2-float32-llvm]"

## Parameters

| Parameter | Value |
| --------- | ----------- |
| Batch | 1 |
| Filter | 3x3 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Output Ch | 128 |
| Stride | 1 |
| Padding | 0 |
| Layout | NHWC8h8w32c |
| h_split | 2 |
| Parameter | Value |
| --------- | ----- |
| Batch | 1 |
| Spatial | 64x64 |
| Input Ch | 64 |
| Padding | 0 |
| Stride | 1 |
| Filter | 1x1 |
| Output Ch | 128 |
| k_split | 2 |
| h_split | 2 |

## Assumptions

Expand Down
Loading