Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ if(MSVC)
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
if(USE_MSVC_MT)
foreach(flag_var
Expand Down
28 changes: 24 additions & 4 deletions include/tvm/relay/qnn/attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,25 @@ namespace tvm {
namespace relay {
namespace qnn {

/*! \brief Attribute for qnn add operator */
struct QnnAddAttrs : public tvm::AttrsNode<QnnAddAttrs> {
std::string rounding;

TVM_DECLARE_ATTRS(QnnAddAttrs, "relay.attrs.QnnAddAttrs") {
TVM_ATTR_FIELD(rounding).set_default("UPWARD").describe(
"Defines the rounding direction when the value is midway between"
"two representable values. There are two 3 modes - UPWARD, TONEAREST"
"or TFLITE. UP/TONEAREST modes behave exactly same except at the"
"midpoints between the two representable values. At the midpoint,"
"UPWARD rounds towards positive infinity (for example -1.5 will be"
"rounded to -1). TONEAREST is the standard rounding where the"
"value is rounded away from zero at midpoints (for example, -1.5"
"rounds to -2). More context can be found at following glibc manual"
"https://www.gnu.org/software/libc/manual/html_node/Rounding.html."
"TFLITE mode is more complicated, referring to tflite implementation.");
}
};

/*! \brief Attribute for requantize operator */
struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
int axis;
Expand All @@ -46,14 +65,15 @@ struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
.set_default(-1);
TVM_ATTR_FIELD(rounding).set_default("UPWARD").describe(
"Defines the rounding direction when the value is midway between"
"two representable values. There are two supported modes - UPWARD"
"or TONEAREST. Both modes behave exactly same except at the"
"two representable values. There are two 3 modes - UPWARD, TONEAREST"
"or TFLITE. UP/TONEAREST modes behave exactly same except at the"
"midpoints between the two representable values. At the midpoint,"
"UPWARD rounds towards positive infinity (for example -1.5 will be"
"rounded to -1). TONEAREST is the standard rounding where the"
"value is rounded away from zero at midpoints (for example, -1.5"
"rounds to -2). More context can be found at following gblic manual"
"https://www.gnu.org/software/libc/manual/html_node/Rounding.html.");
"rounds to -2). More context can be found at following glibc manual"
"https://www.gnu.org/software/libc/manual/html_node/Rounding.html."
"TFLITE mode is more complicated, referring to tflite implementation.");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
Expand Down
50 changes: 50 additions & 0 deletions include/tvm/tir/analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <tvm/ir/module.h>
#include <tvm/ir/transform.h>
#include <tvm/tir/expr.h>
#include <tvm/tir/expr_functor.h>
#include <tvm/tir/function.h>
#include <tvm/tir/op_attr_types.h>
#include <tvm/tir/stmt.h>
Expand Down Expand Up @@ -56,6 +57,55 @@ struct ExprDeepEqual {
TVM_DLL bool operator()(const PrimExpr& lhs, const PrimExpr& rhs) const;
};

#define PLUS_ONE(OP) \
void VisitExpr_(const OP* op) final { num_symbols_++; }

#define PLUS_ONE_BINARY(OP) \
void VisitExpr_(const OP* op) final { \
num_symbols_++; \
VisitExpr(op->a); \
VisitExpr(op->b); \
}

/*!
* \brief Calculate the expresion complexity based on number of symbols it contains.
*/
class ExprComplexity : public ExprVisitor {
public:
TVM_DLL size_t Eval(const PrimExpr& expr) {
VisitExpr(expr);
return num_symbols_;
}

PLUS_ONE_BINARY(AddNode)
PLUS_ONE_BINARY(SubNode)
PLUS_ONE_BINARY(MulNode)
PLUS_ONE_BINARY(DivNode)
PLUS_ONE_BINARY(ModNode)
PLUS_ONE_BINARY(FloorDivNode)
PLUS_ONE_BINARY(FloorModNode)
PLUS_ONE_BINARY(MinNode)
PLUS_ONE_BINARY(MaxNode)
PLUS_ONE_BINARY(EQNode)
PLUS_ONE_BINARY(NENode)
PLUS_ONE_BINARY(LTNode)
PLUS_ONE_BINARY(LENode)
PLUS_ONE_BINARY(GTNode)
PLUS_ONE_BINARY(GENode)
PLUS_ONE_BINARY(AndNode)
PLUS_ONE_BINARY(OrNode)
PLUS_ONE(VarNode)
PLUS_ONE(FloatImmNode)
PLUS_ONE(IntImmNode)
void VisitExpr_(const NotNode* op) final {
num_symbols_++;
VisitExpr(op->a);
}

private:
size_t num_symbols_{0};
};

/*!
* \brief Find undefined vars in the statement.
* \param stmt The function to be checked.
Expand Down
67 changes: 46 additions & 21 deletions include/tvm/topi/nn/pooling.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,27 +149,52 @@ inline Tensor pool_impl(const Tensor& x, const Array<PrimExpr>& kernel_size,
"tensor", "pool_sum");

// TVM compute for dividing the reduced window sum by kernel size.
return tvm::te::compute(
out_shape,
[&](const Array<Var>& output) {
Array<PrimExpr> indices;
for (const Var& var : output) indices.push_back(var);
if (count_include_pad) {
return div(pool_sum(indices), (kernel_height * kernel_width));
} else {
PrimExpr h_start = output[height_axis] * stride_height - pad_top;
PrimExpr w_start = output[width_axis] * stride_width - pad_left;

PrimExpr h_end = min(h_start + kernel_height, height);
PrimExpr w_end = min(w_start + kernel_width, width);
h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
make_const(DataType::DataType::Int(32), 1));
return div(pool_sum(indices), divide_factor);
}
},
"tensor", kElementWise);
if (x->dtype.code() == DataType::kInt || x->dtype.code() == DataType::kUInt) {
return tvm::te::compute(
out_shape,
[&](const Array<Var>& output) {
Array<PrimExpr> indices;
for (const Var& var : output) indices.push_back(var);
if (count_include_pad) {
PrimExpr kernel_size = kernel_height * kernel_width;
PrimExpr up_rounder = floordiv(kernel_size, 2);
return floordiv(pool_sum(indices) + up_rounder, kernel_size);
} else {
PrimExpr h_start = output[height_axis] * stride_height - pad_top;
PrimExpr w_start = output[width_axis] * stride_width - pad_left;
PrimExpr h_end = min(h_start + kernel_height, height);
PrimExpr w_end = min(w_start + kernel_width, width);
h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
make_const(DataType::DataType::Int(32), 1));
PrimExpr up_rounder = floordiv(divide_factor, 2);
return floordiv(pool_sum(indices) + up_rounder, divide_factor);
}
},
"tensor", kElementWise);
} else {
return tvm::te::compute(
out_shape,
[&](const Array<Var>& output) {
Array<PrimExpr> indices;
for (const Var& var : output) indices.push_back(var);
if (count_include_pad) {
return div(pool_sum(indices), (kernel_height * kernel_width));
} else {
PrimExpr h_start = output[height_axis] * stride_height - pad_top;
PrimExpr w_start = output[width_axis] * stride_width - pad_left;
PrimExpr h_end = min(h_start + kernel_height, height);
PrimExpr w_end = min(w_start + kernel_width, width);
h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
make_const(DataType::DataType::Int(32), 1));
return div(pool_sum(indices), divide_factor);
}
},
"tensor", kElementWise);
}
} else {
LOG(ERROR) << "Unrecognized pool_type: " << pool_type;
return x;
Expand Down
61 changes: 45 additions & 16 deletions python/tvm/relay/frontend/tflite.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, tensor_idx, tensor, buffer, qnn_params=None):
class OperatorConverter(object):
"""Operator Converted for converting TFLite ops to Relay ops"""

def __init__(self, model, subgraph, exp_tab):
def __init__(self, model, subgraph, exp_tab, rounding):

try:
from tflite.BuiltinOperator import BuiltinOperator
Expand All @@ -66,6 +66,7 @@ def __init__(self, model, subgraph, exp_tab):
self.activation_fn_type = build_str_map(ActivationFunctionType())
self.builtin_options = build_str_map(BuiltinOptions())
self.prefetched_nodes = {}
self.rounding = rounding

# Add more operators
self.convert_map = {
Expand Down Expand Up @@ -570,6 +571,7 @@ def convert_reshape(self, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -840,6 +842,7 @@ def convert_relu(self, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -915,6 +918,7 @@ def convert_relu6(self, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -986,6 +990,7 @@ def convert_relu_n1_to_1(self, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -1228,16 +1233,30 @@ def _convert_elemwise(self, relay_op, op, ignore_qnn_params=False):
if not ignore_qnn_params and lhs_tensor.qnn_params:
assert rhs_tensor.qnn_params, "Both tensors should be quantized."
assert output_tensor.qnn_params, "Output tensor should be quantized."
out = relay_op(
lhs=lhs_expr,
rhs=rhs_expr,
lhs_scale=lhs_tensor.qnn_params["scale"],
lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
rhs_scale=rhs_tensor.qnn_params["scale"],
rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
)
has_tflite_rounding_mode = [_qnn.op.add]
if relay_op in has_tflite_rounding_mode:
out = relay_op(
lhs=lhs_expr,
rhs=rhs_expr,
lhs_scale=lhs_tensor.qnn_params["scale"],
lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
rhs_scale=rhs_tensor.qnn_params["scale"],
rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
)
else:
out = relay_op(
lhs=lhs_expr,
rhs=rhs_expr,
lhs_scale=lhs_tensor.qnn_params["scale"],
lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
rhs_scale=rhs_tensor.qnn_params["scale"],
rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
)
else:
out = relay_op(lhs_expr, rhs_expr)

Expand Down Expand Up @@ -1732,6 +1751,7 @@ def _convert_reduce(self, relay_op, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -1904,6 +1924,7 @@ def convert_fully_connected(self, op):
input_zero_point=new_input_zero_point,
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)

Expand Down Expand Up @@ -2157,6 +2178,7 @@ def convert_conv(self, op, conv_type):
input_zero_point=new_input_zero_point,
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
axis=3,
)
Expand Down Expand Up @@ -2933,6 +2955,7 @@ def convert_transpose_conv(self, op):
input_zero_point=new_input_zero_point,
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
axis=3,
)
Expand Down Expand Up @@ -2965,6 +2988,7 @@ def convert_quantize(self, op):
input_zero_point=input_tensor.qnn_params["zero_point"],
output_scale=output_tensor.qnn_params["scale"],
output_zero_point=output_tensor.qnn_params["zero_point"],
rounding=self.rounding,
out_dtype=output_tensor_type_str,
)
return out
Expand Down Expand Up @@ -3457,9 +3481,11 @@ def get_scalar_from_constant(expr):
isinstance(expr, _expr.Constant) and not expr.data.shape
), "Expr is not a constant scalar."
value = expr.data.asnumpy()
assert value.dtype == np.dtype(np.int32) or value.dtype == np.dtype(
np.float32
), "value must be float32/int32"
assert (
value.dtype == np.dtype(np.int32)
or value.dtype == np.dtype(np.float32)
or value.dtype == np.dtype(np.float64)
), "value must be float32/float64/int32"
return np.asscalar(value)


Expand Down Expand Up @@ -3577,7 +3603,7 @@ def _input_type(model):
return shape_dict, dtype_dict


def from_tflite(model, shape_dict=None, dtype_dict=None):
def from_tflite(model, shape_dict=None, dtype_dict=None, rounding="TFLITE"):
"""Convert from tflite model into compatible relay Function.

Parameters
Expand All @@ -3591,6 +3617,9 @@ def from_tflite(model, shape_dict=None, dtype_dict=None):
dtype_dict : dict of str to str
Input types of the model.

rounding : str
Rounding mode for tflite model

Returns
-------
mod : tvm.IRModule
Expand Down Expand Up @@ -3637,7 +3666,7 @@ def from_tflite(model, shape_dict=None, dtype_dict=None):
exp_tab.set_expr(model_input_name, _expr.var(model_input_name, shape=shape, dtype=dtype))

# op code in model
op_converter = OperatorConverter(model, subgraph, exp_tab)
op_converter = OperatorConverter(model, subgraph, exp_tab, rounding)
op_converter.check_unsupported_ops()
op_converter.convert_op_to_relay()

Expand Down
14 changes: 13 additions & 1 deletion python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,15 @@ def conv2d_transpose(


def add(
lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale, output_zero_point
lhs,
rhs,
lhs_scale,
lhs_zero_point,
rhs_scale,
rhs_zero_point,
output_scale,
output_zero_point,
rounding="UPWARD",
):
"""Quantized addition with numpy-style broadcasting.

Expand Down Expand Up @@ -439,6 +447,9 @@ def add(
output_zero_point: relay.Expr
The zero point of output quantized expr.

rounding: str, optional
rounding mode of qnn add

Returns
-------
result : relay.Expr
Expand All @@ -454,6 +465,7 @@ def add(
rhs_zero_point,
output_scale,
output_zero_point,
rounding,
)


Expand Down
Loading