Skip to content

Commit 4f6e6bf

Browse files
committed
[QNN] Convolution 2D Implementation.
1 parent d0fdd1c commit 4f6e6bf

File tree

10 files changed

+1262
-126
lines changed

10 files changed

+1262
-126
lines changed

include/tvm/relay/attrs/reduce.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* \file tvm/relay/attrs/reduce.h
22+
* \brief Reduce operators.
23+
*/
24+
#ifndef TVM_RELAY_ATTRS_REDUCE_H_
25+
#define TVM_RELAY_ATTRS_REDUCE_H_
26+
27+
#include <tvm/attrs.h>
28+
#include <tvm/relay/base.h>
29+
30+
namespace tvm {
31+
namespace relay {
32+
33+
34+
/*! \brief Attributes for Reduce operators */
35+
struct ReduceAttrs : public tvm::AttrsNode<ReduceAttrs> {
36+
Array<Integer> axis;
37+
bool keepdims;
38+
bool exclude;
39+
40+
TVM_DECLARE_ATTRS(ReduceAttrs, "relay.attrs.ReduceAttrs") {
41+
TVM_ATTR_FIELD(axis).set_default(NullValue<Array<Integer>>())
42+
.describe(R"code(The axis or axes along which to perform the reduction.
43+
44+
The default, `axis=()`, will compute over all elements into a
45+
scalar array with shape `(1,)`.
46+
47+
If `axis` is int, a reduction is performed on a particular axis.
48+
49+
If `axis` is a tuple of ints, a reduction is performed on all the axes
50+
specified in the tuple.
51+
52+
If `exclude` is true, reduction will be performed on the axes that are
53+
NOT in axis instead.)code");
54+
55+
TVM_ATTR_FIELD(keepdims).set_default(false)
56+
.describe("If this is set to `True`, the reduced axes are left "
57+
"in the result as dimension with size one.");
58+
TVM_ATTR_FIELD(exclude).set_default(false)
59+
.describe("Whether to perform reduction on axis that are NOT in axis instead.");
60+
}
61+
};
62+
63+
64+
65+
} // namespace relay
66+
} // namespace tvm
67+
#endif // TVM_RELAY_ATTRS_REDUCE_H_

include/tvm/relay/qnn/attrs.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,68 @@ struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
6565
}
6666
};
6767

68+
/*! \brief Attribute for quantized conv2d operator */
69+
struct QConv2DAttrs : public tvm::AttrsNode<QConv2DAttrs> {
70+
// Traditional conv2d attributes.
71+
Array<IndexExpr> strides;
72+
Array<IndexExpr> padding;
73+
Array<IndexExpr> dilation;
74+
int groups;
75+
IndexExpr channels;
76+
Array<IndexExpr> kernel_size;
77+
std::string data_layout;
78+
std::string kernel_layout;
79+
std::string out_layout;
80+
DataType out_dtype;
81+
82+
// Quantization related attributes.
83+
int32_t input_zero_point;
84+
int32_t kernel_zero_point;
85+
86+
TVM_DECLARE_ATTRS(QConv2DAttrs, "relay.attrs.QConv2DAttrs") {
87+
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
88+
.describe("Specifies the strides of the convolution.");
89+
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
90+
.describe("If padding is non-zero, then the input is implicitly zero-padded"
91+
"on both sides for padding number of points");
92+
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
93+
.describe("Specifies the dilation rate to use for dilated convolution.");
94+
TVM_ATTR_FIELD(groups).set_default(1)
95+
.describe("Controls the connections between inputs and outputs."
96+
"At groups=1, all inputs are convolved to all outputs."
97+
"At groups=2, the operation becomes equivalent to having two convolution"
98+
"layers side by side, each seeing half the input channels, and producing"
99+
"half the output channels, and both subsequently concatenated.");
100+
TVM_ATTR_FIELD(channels)
101+
.describe("The number of output channels in the convolution."
102+
" If it is not set, inferred by shape of the weight.")
103+
.set_default(NullValue<IndexExpr>());
104+
TVM_ATTR_FIELD(kernel_size)
105+
.describe("Specifies the dimensions of the convolution window.")
106+
.set_default(NullValue<Array<IndexExpr> >());
107+
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
108+
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
109+
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
110+
"dimensions respectively. Convolution is applied on the 'H' and"
111+
"'W' dimensions.");
112+
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
113+
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
114+
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
115+
"dimensions respectively.");
116+
TVM_ATTR_FIELD(out_layout).set_default("")
117+
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
118+
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
119+
"dimensions respectively. Default to be same as input layout.");
120+
TVM_ATTR_FIELD(out_dtype)
121+
.set_default(NullValue<DataType>())
122+
.describe("Output data type, set to explicit type under mixed precision setting");
123+
TVM_ATTR_FIELD(input_zero_point)
124+
.describe("The zero point of the input tensor.");
125+
TVM_ATTR_FIELD(kernel_zero_point)
126+
.describe("The zero point of the kernel tensor.");
127+
}
128+
};
129+
68130
} // namespace qnn
69131
} // namespace relay
70132
} // namespace tvm

python/tvm/relay/qnn/op/qnn.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,83 @@ def requantize(data,
7171
output_zero_point,
7272
rounding,
7373
out_dtype)
74+
75+
76+
def conv2d(quantized_data,
77+
quantized_weight,
78+
input_zero_point,
79+
kernel_zero_point,
80+
strides=(1, 1),
81+
padding=(0, 0),
82+
dilation=(1, 1),
83+
groups=1,
84+
channels=None,
85+
kernel_size=None,
86+
data_layout="NCHW",
87+
kernel_layout="OIHW",
88+
out_layout="",
89+
out_dtype="int32"):
90+
r"""Quantized 2D convolution.
91+
92+
This operator takes the quantized_weight as the convolution kernel
93+
and convolves it with quantized_data to produce an output quantized tensor.
94+
The scale of the output quantized tensor is the product of the weight_scale
95+
and input_scale of the input quantized tensors. The zero point of the output
96+
quantized tensor is 0. By default, the dtype of output is int32. Please also
97+
see Requantize operator to understand the dtype scaling back to (u)int8.
98+
99+
100+
Parameters
101+
----------
102+
quantized_data : tvm.relay.Expr
103+
The input quantized_data to the operator.
104+
105+
quantized_weight : tvm.relay.Expr
106+
The quantized_weight expressions.
107+
108+
input_zero_point: int
109+
The zero point of the quantized_data distribution.
110+
111+
kernel_zero_point: int
112+
The zero point of the quantized_kernel distribution.
113+
114+
strides : tuple of int, optional
115+
The strides of convolution.
116+
117+
padding : tuple of int, optional
118+
The padding of convolution on both sides of inputs before convolution.
119+
120+
dilation : tuple of int, optional
121+
Specifies the dilation rate to be used for dilated convolution.
122+
123+
groups : int, optional
124+
Number of groups for grouped convolution.
125+
126+
channels : int, optional
127+
Number of output channels of this convolution.
128+
129+
kernel_size : tuple of int, optional
130+
The spatial of the convolution kernel.
131+
132+
data_layout : str, optional
133+
Layout of the input.
134+
135+
kernel_layout : str, optional
136+
Layout of the quantized_weight.
137+
138+
out_layout : str, optional
139+
Layout of the output, by default, out_layout is the same as data_layout
140+
141+
out_dtype : str, optional
142+
Specifies the output quantized_data type for mixed precision conv2d.
143+
144+
Returns
145+
-------
146+
result : tvm.relay.Expr
147+
The computed result.
148+
"""
149+
return _make.conv2d(quantized_data, quantized_weight,
150+
input_zero_point, kernel_zero_point,
151+
strides, padding, dilation,
152+
groups, channels, kernel_size,
153+
data_layout, kernel_layout, out_layout, out_dtype)

src/relay/op/nn/convolution.cc

Lines changed: 3 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <tvm/relay/attrs/nn.h>
2828
#include <vector>
2929

30+
#include "nn.h"
3031
#include "../../pass/alter_op_layout.h"
3132

3233
namespace tvm {
@@ -35,99 +36,6 @@ namespace relay {
3536
// relay.nn.conv2d
3637
TVM_REGISTER_NODE_TYPE(Conv2DAttrs);
3738

38-
bool Conv2DRel(const Array<Type>& types,
39-
int num_inputs,
40-
const Attrs& attrs,
41-
const TypeReporter& reporter) {
42-
CHECK_EQ(types.size(), 3);
43-
const auto* data = types[0].as<TensorTypeNode>();
44-
const auto* weight = types[1].as<TensorTypeNode>();
45-
if (data == nullptr) return false;
46-
static const Layout kNCHW("NCHW");
47-
static const Layout kOIHW("OIHW");
48-
49-
const Conv2DAttrs* param = attrs.as<Conv2DAttrs>();
50-
CHECK(param != nullptr);
51-
const Layout in_layout(param->data_layout);
52-
const Layout kernel_layout(param->kernel_layout);
53-
54-
const auto trans_in_layout = BijectiveLayoutNode::make(in_layout, kNCHW);
55-
CHECK(trans_in_layout.defined())
56-
<< "Conv only support input layouts that are convertible from NCHW."
57-
<< " But got " << in_layout;
58-
59-
const auto trans_kernel_layout = BijectiveLayoutNode::make(kernel_layout, kOIHW);
60-
CHECK(trans_kernel_layout.defined())
61-
<< "Conv only support kernel layouts that are convertible from OIHW."
62-
<< " But got "<< kernel_layout;
63-
64-
Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout);
65-
const auto trans_out_layout = BijectiveLayoutNode::make(out_layout, kNCHW);
66-
CHECK(trans_out_layout.defined())
67-
<< "Conv only support output layouts that are convertible from NCHW."
68-
<< " But got " << out_layout;
69-
70-
Array<IndexExpr> dshape_nchw = trans_in_layout.ForwardShape(data->shape);
71-
72-
IndexExpr channels, dilated_ksize_y, dilated_ksize_x;
73-
// infer weight if the kernel_size and channels are defined
74-
if (param->kernel_size.defined() && param->channels.defined()) {
75-
CHECK_EQ(param->kernel_size.size(), 2);
76-
CHECK_EQ(param->dilation.size(), 2);
77-
Array<IndexExpr> wshape(
78-
{param->channels,
79-
dshape_nchw[1] / param->groups,
80-
param->kernel_size[0],
81-
param->kernel_size[1]});
82-
wshape = trans_kernel_layout.BackwardShape(wshape);
83-
channels = param->channels;
84-
dilated_ksize_y = 1 + (param->kernel_size[0] - 1) * param->dilation[0];
85-
dilated_ksize_x = 1 + (param->kernel_size[1] - 1) * param->dilation[1];
86-
DataType weight_dtype = data->dtype;
87-
if (weight != nullptr) {
88-
weight_dtype = weight->dtype;
89-
}
90-
// assign result to reporter
91-
reporter->Assign(types[1], TensorTypeNode::make(wshape, weight_dtype));
92-
} else {
93-
// use weight to infer the conv shape.
94-
if (weight == nullptr) return false;
95-
auto wshape = trans_kernel_layout.ForwardShape(weight->shape);
96-
if (param->kernel_size.defined()) {
97-
CHECK_EQ(param->kernel_size.size(), 2);
98-
// check the size
99-
CHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) &&
100-
reporter->AssertEQ(param->kernel_size[1], wshape[3]))
101-
<< "Conv2D: shape of weight is inconsistent with kernel_size, "
102-
<< " kernel_size=" << param->kernel_size
103-
<< " wshape=" << wshape;
104-
}
105-
if (param->channels.defined()) {
106-
CHECK(reporter->AssertEQ(param->channels, wshape[0]))
107-
<< "Conv2D: shape of weight is inconsistent with channels, "
108-
<< " channels=" << param->channels
109-
<< " wshape=" << wshape;
110-
}
111-
CHECK(reporter->AssertEQ(dshape_nchw[1] / param->groups, wshape[1]));
112-
channels = wshape[0];
113-
dilated_ksize_y = 1 + (wshape[2] - 1) * param->dilation[0];
114-
dilated_ksize_x = 1 + (wshape[3] - 1) * param->dilation[1];
115-
}
116-
// dilation
117-
Array<IndexExpr> oshape({dshape_nchw[0], channels, 0, 0});
118-
119-
oshape.Set(2, (dshape_nchw[2] + param->padding[0] * 2 - dilated_ksize_y) / param->strides[0] + 1);
120-
oshape.Set(3, (dshape_nchw[3] + param->padding[1] * 2 - dilated_ksize_x) / param->strides[1] + 1);
121-
DataType out_dtype = param->out_dtype;
122-
if (out_dtype.bits() == 0) {
123-
out_dtype = data->dtype;
124-
}
125-
oshape = trans_out_layout.BackwardShape(oshape);
126-
// assign output type
127-
reporter->Assign(types[2], TensorTypeNode::make(oshape, out_dtype));
128-
return true;
129-
}
130-
13139
template<typename T>
13240
Array<Array<Layout> > Conv2DInferCorrectLayout(
13341
const Attrs& attrs,
@@ -195,7 +103,7 @@ with the layer input to produce a tensor of outputs.
195103
.add_argument("data", "Tensor", "The input tensor.")
196104
.add_argument("weight", "Tensor", "The weight tensor.")
197105
.set_support_level(2)
198-
.add_type_rel("Conv2D", Conv2DRel)
106+
.add_type_rel("Conv2D", Conv2DRel<Conv2DAttrs>)
199107
.set_attr<FInferCorrectLayout>("FInferCorrectLayout", Conv2DInferCorrectLayout<Conv2DAttrs>);
200108

201109

@@ -755,7 +663,7 @@ RELAY_REGISTER_OP("nn.contrib_depthwise_conv2d_NCHWc")
755663
.add_argument("data", "Tensor", "The input tensor.")
756664
.add_argument("weight", "Tensor", "The weight tensor.")
757665
.set_support_level(10)
758-
.add_type_rel("Conv2D", Conv2DRel)
666+
.add_type_rel("Conv2D", Conv2DRel<Conv2DAttrs>)
759667
.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
760668
Conv2DInferCorrectLayout<Conv2DAttrs>);
761669

0 commit comments

Comments
 (0)