Skip to content

Commit 76f0f39

Browse files
joshherr-quiccsullivan
authored andcommitted
[Hexagon]Float and quantized dense operators with schedules (apache#12873)
This PR implements dense operators for float types and quantized types. The quantized implementation uses floating point numbers for its intermediate compute type, fixed point will be investigated in the future. float16 accuracy is questionable. Needs further investigation in an actual model (not just a unittest).
1 parent 8a0d6d1 commit 76f0f39

File tree

7 files changed

+654
-15
lines changed

7 files changed

+654
-15
lines changed

python/tvm/topi/hexagon/qnn/__init__.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,13 @@
1717

1818
""" Computes and schedules for Hexagon quantized ops """
1919

20+
from .adaptive_avg_pool1d import *
2021
from .avg_pool2d import qnn_avg_pool2d_compute, qnn_avg_pool2d_schedule
21-
from .qadd_qsub_qmul import *
22-
from .dequantize import (
23-
dequantize_compute,
24-
dequantize_schedule,
25-
)
26-
27-
from .quantize import quantize_compute, tir_quantize_schedule
22+
from .conv2d_alter_op import *
23+
from .dequantize import dequantize_compute, dequantize_schedule
24+
from .global_avg_pool2d import *
2825
from .nn import *
26+
from .qadd_qsub_qmul import *
27+
from .qdense import *
2928
from .qdepthwise_conv2d_slice import qdepthwise_conv2d_compute, qdepthwise_conv2d_schedule
30-
from .adaptive_avg_pool1d import *
31-
from .global_avg_pool2d import *
32-
from .conv2d_alter_op import *
29+
from .quantize import quantize_compute, tir_quantize_schedule
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
"""Schedule for dense operator"""
19+
20+
from tvm import te, tir
21+
from tvm.topi import tag
22+
from ..utils import get_layout_transform_fn
23+
24+
25+
def qdense_compute(
26+
tensor_a,
27+
tensor_b,
28+
zero_a,
29+
scale_a,
30+
zero_b,
31+
scale_b,
32+
zero_out=None,
33+
scale_out=None,
34+
bias=None,
35+
q_dtype=None,
36+
):
37+
"""Hexagon's implementation of a sliced dense operator in Topi.
38+
Uses matmul.
39+
40+
Parameters
41+
----------
42+
tensor_a : tvm.te.Tensor
43+
data 2-D with shape [batch, in_dim]
44+
45+
tensor_b : tvm.te.Tensor
46+
weight 2-D with shape [in_dim, out_dim]
47+
48+
zero_a : integer
49+
quantization zero point for tensor a.
50+
51+
scale_a : float
52+
quantization scale for tensor a.
53+
54+
zero_b : integer
55+
quantization zero point for tensor b.
56+
57+
scale_b : float
58+
quantization scale for tensor b.
59+
60+
zero_out : Optional[integer]
61+
quantization zero point for output.
62+
63+
scale_out : Optional[float]
64+
quantization scale for output.
65+
66+
bias : Optional[tvm.te.Tensor]
67+
1-D with shape [out_dim]
68+
69+
q_dtype : Optional[str]
70+
The output type.
71+
72+
Returns
73+
-------
74+
mat : tvm.te.Tensor
75+
2-D with shape [batch, out_dim]
76+
77+
"""
78+
if bias is not None:
79+
assert len(bias.shape) == 1
80+
if q_dtype is None:
81+
q_dtype = tensor_a.dtype
82+
83+
batch, in_dim = tensor_a.shape
84+
out_dim, red_dim = tensor_b.shape
85+
86+
# cmp should be done by values
87+
assert int(in_dim) == int(red_dim)
88+
89+
k = te.reduce_axis((0, in_dim), name="k")
90+
compute_lambda = lambda n, m: te.sum(
91+
scale_a
92+
* (tensor_a[n, k].astype("float32") - zero_a)
93+
* scale_b
94+
* (tensor_b[k, m].astype("float32") - zero_b),
95+
axis=k,
96+
)
97+
compute_name = "qmatmul_sliced"
98+
99+
out = te.compute(
100+
(batch, out_dim),
101+
compute_lambda,
102+
name=compute_name,
103+
attrs={"layout_free_placeholders": [tensor_b]},
104+
)
105+
106+
if bias is not None:
107+
out = te.compute(
108+
(batch, out_dim),
109+
lambda i, j: out[i, j] + bias[j],
110+
tag=tag.BROADCAST,
111+
name="bias",
112+
)
113+
114+
# Requantization of dense
115+
if scale_out is not None:
116+
out = te.compute(
117+
(batch, out_dim),
118+
lambda *i: (out[i] / scale_out + zero_out).astype(q_dtype),
119+
name="requantize",
120+
)
121+
122+
return out
123+
124+
125+
def qdense_schedule(outs, ins, output_layout: str, input_layout: str):
126+
"""Schedule for dense op.
127+
128+
Parameters
129+
----------
130+
outs: Array of Tensor
131+
The computation graph description of dense in the format
132+
of an array of tensors.
133+
134+
ins: Array of Tensor
135+
Input tensors into graph.
136+
137+
output_layout: str
138+
Descriptor string for physical layout
139+
140+
input_layout: str
141+
Descriptor string for physical layout
142+
143+
Returns
144+
-------
145+
sch: Schedule
146+
The computation schedule for the op.
147+
"""
148+
if not isinstance(ins, list):
149+
ins = [ins]
150+
if not isinstance(outs, list):
151+
outs = [outs]
152+
153+
func = te.create_prim_func([*ins, *outs])
154+
s = tir.Schedule(func)
155+
156+
matmul = s.get_block("qmatmul_sliced")
157+
try:
158+
requantize = s.get_block("requantize")
159+
except tir.schedule.schedule.ScheduleError:
160+
requantize = None
161+
try:
162+
bias = s.get_block("bias")
163+
except tir.schedule.schedule.ScheduleError:
164+
bias = None
165+
166+
input_transform_fn = get_layout_transform_fn(input_layout)
167+
output_transform_fn = get_layout_transform_fn(output_layout)
168+
169+
# Transform input and output buffer
170+
s.transform_layout(matmul, ("read", 0), input_transform_fn)
171+
if requantize is not None:
172+
s.transform_layout(requantize, ("write", 0), output_transform_fn)
173+
elif bias is not None:
174+
s.transform_layout(bias, ("write", 0), output_transform_fn)
175+
else:
176+
s.transform_layout(matmul, ("write", 0), output_transform_fn)
177+
178+
# Vectorize
179+
_, matmul_c, _ = s.get_loops(matmul)
180+
_, matmul_c_inner = s.split(matmul_c, [None, 128])
181+
s.vectorize(matmul_c_inner)
182+
183+
# Compute everything inline
184+
if bias is not None and requantize is not None:
185+
_, bias_c = s.get_loops(bias)
186+
s.compute_at(matmul, bias_c)
187+
_, out_c = s.get_loops(requantize)
188+
s.compute_at(bias, out_c)
189+
elif bias is not None and requantize is None:
190+
_, out_c = s.get_loops(bias)
191+
s.compute_at(matmul, out_c)
192+
193+
return s

python/tvm/topi/hexagon/slice_ops/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@
3737
from .dwconv2d import *
3838
from .depth_to_space import d2s_compute, d2s_schedule
3939
from .global_avg_pool2d import *
40+
from .dense import *
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
"""Schedule for dense operator"""
19+
20+
from tvm import te, tir
21+
from tvm.topi import tag
22+
from ..utils import get_layout_transform_fn
23+
24+
25+
def dense_compute(tensor_a, tensor_b, bias=None, out_dtype=None):
26+
"""Hexagon's implementation of a sliced dense operator in Topi.
27+
Uses matmul.
28+
29+
Parameters
30+
----------
31+
tensor_a : tvm.te.Tensor
32+
data 2-D with shape [batch, in_dim]
33+
34+
tensor_b : tvm.te.Tensor
35+
weight 2-D with shape [in_dim, out_dim]
36+
37+
bias : Optional[tvm.te.Tensor]
38+
1-D with shape [out_dim]
39+
40+
out_dtype : Optional[str]
41+
The output type. This is used for mixed precision.
42+
43+
Returns
44+
-------
45+
output : tvm.te.Tensor
46+
2-D with shape [batch, out_dim]
47+
48+
"""
49+
if bias is not None:
50+
assert len(bias.shape) == 1
51+
if out_dtype is None:
52+
out_dtype = tensor_a.dtype
53+
54+
batch, in_dim = tensor_a.shape
55+
out_dim, red_dim = tensor_b.shape
56+
57+
# cmp should be done by values
58+
assert int(in_dim) == int(red_dim)
59+
60+
k = te.reduce_axis((0, in_dim), name="k")
61+
compute_lambda = lambda n, m: te.sum(
62+
tensor_a[n, k].astype(out_dtype) * tensor_b[k, m].astype(out_dtype), axis=k
63+
)
64+
compute_name = "matmul_sliced"
65+
compute_tag = "matmul"
66+
67+
mat = te.compute(
68+
(batch, out_dim),
69+
compute_lambda,
70+
name=compute_name,
71+
tag=compute_tag,
72+
attrs={"layout_free_placeholders": [tensor_b]},
73+
)
74+
75+
if bias is not None:
76+
mat = te.compute(
77+
(batch, out_dim),
78+
lambda i, j: mat[i, j] + bias[j],
79+
tag=tag.BROADCAST,
80+
name="bias",
81+
)
82+
83+
return mat
84+
85+
86+
def dense_schedule(outs, ins, output_layout: str, input_layout: str):
87+
"""Schedule for dense op.
88+
89+
Parameters
90+
----------
91+
outs: Array of Tensor
92+
The computation graph description of dense in the format
93+
of an array of tensors.
94+
95+
ins: Array of Tensor
96+
Input tensors into graph.
97+
98+
output_layout: str
99+
Descriptor string for physical layout
100+
101+
input_layout: str
102+
Descriptor string for physical layout
103+
104+
Returns
105+
-------
106+
sch: Schedule
107+
The computation schedule for the op.
108+
"""
109+
if not isinstance(ins, list):
110+
ins = [ins]
111+
if not isinstance(outs, list):
112+
outs = [outs]
113+
114+
func = te.create_prim_func([*ins, *outs])
115+
s = tir.Schedule(func)
116+
117+
matmul = s.get_block("matmul_sliced")
118+
try:
119+
bias = s.get_block("bias")
120+
except tir.schedule.schedule.ScheduleError:
121+
bias = None
122+
123+
input_transform_fn = get_layout_transform_fn(input_layout)
124+
output_transform_fn = get_layout_transform_fn(output_layout)
125+
126+
# No bias
127+
if bias is None:
128+
s.transform_layout(matmul, ("read", 0), input_transform_fn)
129+
# s.transform_layout(matmul, ("read", 1), input_transform_fn)
130+
s.transform_layout(matmul, ("write", 0), output_transform_fn)
131+
else:
132+
s.transform_layout(matmul, ("read", 0), input_transform_fn)
133+
s.transform_layout(bias, ("write", 0), output_transform_fn)
134+
135+
_, matmul_c, _ = s.get_loops(matmul)
136+
_, matmul_c_inner = s.split(matmul_c, [None, 64])
137+
s.vectorize(matmul_c_inner)
138+
139+
if bias is not None:
140+
_, bias_c = s.get_loops(bias)
141+
_, bias_c_inner = s.split(bias_c, [None, 64])
142+
s.vectorize(bias_c_inner)
143+
144+
return s

0 commit comments

Comments
 (0)