apache · FrozenGene · Feb 4, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -106,6 +106,7 @@ if(MSVC)
   set(CMAKE_SUPPRESS_REGENERATION ON)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
   if(USE_MSVC_MT)
     foreach(flag_var

diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
@@ -32,6 +32,25 @@ namespace tvm {
 namespace relay {
 namespace qnn {
 
+/*! \brief Attribute for qnn add operator */
+struct QnnAddAttrs : public tvm::AttrsNode<QnnAddAttrs> {
+  std::string rounding;
+
+  TVM_DECLARE_ATTRS(QnnAddAttrs, "relay.attrs.QnnAddAttrs") {
+    TVM_ATTR_FIELD(rounding).set_default("UPWARD").describe(
+        "Defines the rounding direction when the value is midway between"
+        "two representable values. There are two 3 modes - UPWARD, TONEAREST"
+        "or TFLITE. UP/TONEAREST modes behave exactly same except at the"
+        "midpoints between the two representable values. At the midpoint,"
+        "UPWARD rounds towards positive infinity (for example -1.5 will be"
+        "rounded to -1). TONEAREST is the standard rounding where the"
+        "value is rounded away from zero at midpoints (for example, -1.5"
+        "rounds to -2). More context can be found at following glibc manual"
+        "https://www.gnu.org/software/libc/manual/html_node/Rounding.html."
+        "TFLITE mode is more complicated, referring to tflite implementation.");
+  }
+};
+
 /*! \brief Attribute for requantize operator */
 struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
   int axis;
@@ -46,14 +65,15 @@ struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
         .set_default(-1);
     TVM_ATTR_FIELD(rounding).set_default("UPWARD").describe(
         "Defines the rounding direction when the value is midway between"
-        "two representable values. There are two supported modes - UPWARD"
-        "or TONEAREST. Both modes behave exactly same except at the"
+        "two representable values. There are two 3 modes - UPWARD, TONEAREST"
+        "or TFLITE. UP/TONEAREST modes behave exactly same except at the"
         "midpoints between the two representable values. At the midpoint,"
         "UPWARD rounds towards positive infinity (for example -1.5 will be"
         "rounded to -1). TONEAREST is the standard rounding where the"
         "value is rounded away from zero at midpoints (for example, -1.5"
-        "rounds to -2). More context can be found at following gblic manual"
-        "https://www.gnu.org/software/libc/manual/html_node/Rounding.html.");
+        "rounds to -2). More context can be found at following glibc manual"
+        "https://www.gnu.org/software/libc/manual/html_node/Rounding.html."
+        "TFLITE mode is more complicated, referring to tflite implementation.");
     TVM_ATTR_FIELD(out_dtype)
         .set_default(NullValue<DataType>())
         .describe("Output data type, set to explicit type under mixed precision setting");

diff --git a/include/tvm/tir/analysis.h b/include/tvm/tir/analysis.h
@@ -27,6 +27,7 @@
 #include <tvm/ir/module.h>
 #include <tvm/ir/transform.h>
 #include <tvm/tir/expr.h>
+#include <tvm/tir/expr_functor.h>
 #include <tvm/tir/function.h>
 #include <tvm/tir/op_attr_types.h>
 #include <tvm/tir/stmt.h>
@@ -56,6 +57,55 @@ struct ExprDeepEqual {
   TVM_DLL bool operator()(const PrimExpr& lhs, const PrimExpr& rhs) const;
 };
 
+#define PLUS_ONE(OP) \
+  void VisitExpr_(const OP* op) final { num_symbols_++; }
+
+#define PLUS_ONE_BINARY(OP)             \
+  void VisitExpr_(const OP* op) final { \
+    num_symbols_++;                     \
+    VisitExpr(op->a);                   \
+    VisitExpr(op->b);                   \
+  }
+
+/*!
+ * \brief Calculate the expresion complexity based on number of symbols it contains.
+ */
+class ExprComplexity : public ExprVisitor {
+ public:
+  TVM_DLL size_t Eval(const PrimExpr& expr) {
+    VisitExpr(expr);
+    return num_symbols_;
+  }
+
+  PLUS_ONE_BINARY(AddNode)
+  PLUS_ONE_BINARY(SubNode)
+  PLUS_ONE_BINARY(MulNode)
+  PLUS_ONE_BINARY(DivNode)
+  PLUS_ONE_BINARY(ModNode)
+  PLUS_ONE_BINARY(FloorDivNode)
+  PLUS_ONE_BINARY(FloorModNode)
+  PLUS_ONE_BINARY(MinNode)
+  PLUS_ONE_BINARY(MaxNode)
+  PLUS_ONE_BINARY(EQNode)
+  PLUS_ONE_BINARY(NENode)
+  PLUS_ONE_BINARY(LTNode)
+  PLUS_ONE_BINARY(LENode)
+  PLUS_ONE_BINARY(GTNode)
+  PLUS_ONE_BINARY(GENode)
+  PLUS_ONE_BINARY(AndNode)
+  PLUS_ONE_BINARY(OrNode)
+  PLUS_ONE(VarNode)
+  PLUS_ONE(FloatImmNode)
+  PLUS_ONE(IntImmNode)
+  void VisitExpr_(const NotNode* op) final {
+    num_symbols_++;
+    VisitExpr(op->a);
+  }
+
+ private:
+  size_t num_symbols_{0};
+};
+
 /*!
  * \brief Find undefined vars in the statement.
  * \param stmt The function to be checked.

diff --git a/include/tvm/topi/nn/pooling.h b/include/tvm/topi/nn/pooling.h
@@ -149,27 +149,52 @@ inline Tensor pool_impl(const Tensor& x, const Array<PrimExpr>& kernel_size,
         "tensor", "pool_sum");
 
     // TVM compute for dividing the reduced window sum by kernel size.
-    return tvm::te::compute(
-        out_shape,
-        [&](const Array<Var>& output) {
-          Array<PrimExpr> indices;
-          for (const Var& var : output) indices.push_back(var);
-          if (count_include_pad) {
-            return div(pool_sum(indices), (kernel_height * kernel_width));
-          } else {
-            PrimExpr h_start = output[height_axis] * stride_height - pad_top;
-            PrimExpr w_start = output[width_axis] * stride_width - pad_left;
-
-            PrimExpr h_end = min(h_start + kernel_height, height);
-            PrimExpr w_end = min(w_start + kernel_width, width);
-            h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
-            w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
-            PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
-                                         make_const(DataType::DataType::Int(32), 1));
-            return div(pool_sum(indices), divide_factor);
-          }
-        },
-        "tensor", kElementWise);
+    if (x->dtype.code() == DataType::kInt || x->dtype.code() == DataType::kUInt) {
+      return tvm::te::compute(
+          out_shape,
+          [&](const Array<Var>& output) {
+            Array<PrimExpr> indices;
+            for (const Var& var : output) indices.push_back(var);
+            if (count_include_pad) {
+              PrimExpr kernel_size = kernel_height * kernel_width;
+              PrimExpr up_rounder = floordiv(kernel_size, 2);
+              return floordiv(pool_sum(indices) + up_rounder, kernel_size);
+            } else {
+              PrimExpr h_start = output[height_axis] * stride_height - pad_top;
+              PrimExpr w_start = output[width_axis] * stride_width - pad_left;
+              PrimExpr h_end = min(h_start + kernel_height, height);
+              PrimExpr w_end = min(w_start + kernel_width, width);
+              h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
+              w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
+              PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
+                                           make_const(DataType::DataType::Int(32), 1));
+              PrimExpr up_rounder = floordiv(divide_factor, 2);
+              return floordiv(pool_sum(indices) + up_rounder, divide_factor);
+            }
+          },
+          "tensor", kElementWise);
+    } else {
+      return tvm::te::compute(
+          out_shape,
+          [&](const Array<Var>& output) {
+            Array<PrimExpr> indices;
+            for (const Var& var : output) indices.push_back(var);
+            if (count_include_pad) {
+              return div(pool_sum(indices), (kernel_height * kernel_width));
+            } else {
+              PrimExpr h_start = output[height_axis] * stride_height - pad_top;
+              PrimExpr w_start = output[width_axis] * stride_width - pad_left;
+              PrimExpr h_end = min(h_start + kernel_height, height);
+              PrimExpr w_end = min(w_start + kernel_width, width);
+              h_start = max(h_start, make_const(DataType::DataType::Int(32), 0));
+              w_start = max(w_start, make_const(DataType::DataType::Int(32), 0));
+              PrimExpr divide_factor = max((h_end - h_start) * (w_end - w_start),
+                                           make_const(DataType::DataType::Int(32), 1));
+              return div(pool_sum(indices), divide_factor);
+            }
+          },
+          "tensor", kElementWise);
+    }
   } else {
     LOG(ERROR) << "Unrecognized pool_type: " << pool_type;
     return x;

diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py
@@ -50,7 +50,7 @@ def __init__(self, tensor_idx, tensor, buffer, qnn_params=None):
 class OperatorConverter(object):
     """Operator Converted for converting TFLite ops to Relay ops"""
 
-    def __init__(self, model, subgraph, exp_tab):
+    def __init__(self, model, subgraph, exp_tab, rounding):
 
         try:
             from tflite.BuiltinOperator import BuiltinOperator
@@ -66,6 +66,7 @@ def __init__(self, model, subgraph, exp_tab):
         self.activation_fn_type = build_str_map(ActivationFunctionType())
         self.builtin_options = build_str_map(BuiltinOptions())
         self.prefetched_nodes = {}
+        self.rounding = rounding
 
         # Add more operators
         self.convert_map = {
@@ -570,6 +571,7 @@ def convert_reshape(self, op):
                     input_zero_point=input_tensor.qnn_params["zero_point"],
                     output_scale=output_tensor.qnn_params["scale"],
                     output_zero_point=output_tensor.qnn_params["zero_point"],
+                    rounding=self.rounding,
                     out_dtype=output_tensor_type_str,
                 )
 
@@ -840,6 +842,7 @@ def convert_relu(self, op):
                 input_zero_point=input_tensor.qnn_params["zero_point"],
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
 
@@ -915,6 +918,7 @@ def convert_relu6(self, op):
                 input_zero_point=input_tensor.qnn_params["zero_point"],
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
 
@@ -986,6 +990,7 @@ def convert_relu_n1_to_1(self, op):
                 input_zero_point=input_tensor.qnn_params["zero_point"],
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
 
@@ -1228,16 +1233,30 @@ def _convert_elemwise(self, relay_op, op, ignore_qnn_params=False):
         if not ignore_qnn_params and lhs_tensor.qnn_params:
             assert rhs_tensor.qnn_params, "Both tensors should be quantized."
             assert output_tensor.qnn_params, "Output tensor should be quantized."
-            out = relay_op(
-                lhs=lhs_expr,
-                rhs=rhs_expr,
-                lhs_scale=lhs_tensor.qnn_params["scale"],
-                lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
-                rhs_scale=rhs_tensor.qnn_params["scale"],
-                rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
-                output_scale=output_tensor.qnn_params["scale"],
-                output_zero_point=output_tensor.qnn_params["zero_point"],
-            )
+            has_tflite_rounding_mode = [_qnn.op.add]
+            if relay_op in has_tflite_rounding_mode:
+                out = relay_op(
+                    lhs=lhs_expr,
+                    rhs=rhs_expr,
+                    lhs_scale=lhs_tensor.qnn_params["scale"],
+                    lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
+                    rhs_scale=rhs_tensor.qnn_params["scale"],
+                    rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
+                    output_scale=output_tensor.qnn_params["scale"],
+                    output_zero_point=output_tensor.qnn_params["zero_point"],
+                    rounding=self.rounding,
+                )
+            else:
+                out = relay_op(
+                    lhs=lhs_expr,
+                    rhs=rhs_expr,
+                    lhs_scale=lhs_tensor.qnn_params["scale"],
+                    lhs_zero_point=lhs_tensor.qnn_params["zero_point"],
+                    rhs_scale=rhs_tensor.qnn_params["scale"],
+                    rhs_zero_point=rhs_tensor.qnn_params["zero_point"],
+                    output_scale=output_tensor.qnn_params["scale"],
+                    output_zero_point=output_tensor.qnn_params["zero_point"],
+                )
         else:
             out = relay_op(lhs_expr, rhs_expr)
 
@@ -1732,6 +1751,7 @@ def _convert_reduce(self, relay_op, op):
                 input_zero_point=input_tensor.qnn_params["zero_point"],
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
 
@@ -1904,6 +1924,7 @@ def convert_fully_connected(self, op):
                 input_zero_point=new_input_zero_point,
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
 
@@ -2157,6 +2178,7 @@ def convert_conv(self, op, conv_type):
                 input_zero_point=new_input_zero_point,
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
                 axis=3,
             )
@@ -2933,6 +2955,7 @@ def convert_transpose_conv(self, op):
                 input_zero_point=new_input_zero_point,
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
                 axis=3,
             )
@@ -2965,6 +2988,7 @@ def convert_quantize(self, op):
                 input_zero_point=input_tensor.qnn_params["zero_point"],
                 output_scale=output_tensor.qnn_params["scale"],
                 output_zero_point=output_tensor.qnn_params["zero_point"],
+                rounding=self.rounding,
                 out_dtype=output_tensor_type_str,
             )
         return out
@@ -3457,9 +3481,11 @@ def get_scalar_from_constant(expr):
         isinstance(expr, _expr.Constant) and not expr.data.shape
     ), "Expr is not a constant scalar."
     value = expr.data.asnumpy()
-    assert value.dtype == np.dtype(np.int32) or value.dtype == np.dtype(
-        np.float32
-    ), "value must be float32/int32"
+    assert (
+        value.dtype == np.dtype(np.int32)
+        or value.dtype == np.dtype(np.float32)
+        or value.dtype == np.dtype(np.float64)
+    ), "value must be float32/float64/int32"
     return np.asscalar(value)
 
 
@@ -3577,7 +3603,7 @@ def _input_type(model):
     return shape_dict, dtype_dict
 
 
-def from_tflite(model, shape_dict=None, dtype_dict=None):
+def from_tflite(model, shape_dict=None, dtype_dict=None, rounding="TFLITE"):
     """Convert from tflite model into compatible relay Function.
 
     Parameters
@@ -3591,6 +3617,9 @@ def from_tflite(model, shape_dict=None, dtype_dict=None):
     dtype_dict : dict of str to str
         Input types of the model.
 
+    rounding : str
+        Rounding mode for tflite model
+
     Returns
     -------
     mod : tvm.IRModule
@@ -3637,7 +3666,7 @@ def from_tflite(model, shape_dict=None, dtype_dict=None):
         exp_tab.set_expr(model_input_name, _expr.var(model_input_name, shape=shape, dtype=dtype))
 
     # op code in model
-    op_converter = OperatorConverter(model, subgraph, exp_tab)
+    op_converter = OperatorConverter(model, subgraph, exp_tab, rounding)
     op_converter.check_unsupported_ops()
     op_converter.convert_op_to_relay()
 

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
@@ -409,7 +409,15 @@ def conv2d_transpose(
 
 
 def add(
-    lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale, output_zero_point
+    lhs,
+    rhs,
+    lhs_scale,
+    lhs_zero_point,
+    rhs_scale,
+    rhs_zero_point,
+    output_scale,
+    output_zero_point,
+    rounding="UPWARD",
 ):
     """Quantized addition with numpy-style broadcasting.
 
@@ -439,6 +447,9 @@ def add(
     output_zero_point: relay.Expr
        The zero point of output quantized expr.
 
+    rounding: str, optional
+       rounding mode of qnn add
+
     Returns
     -------
     result : relay.Expr
@@ -454,6 +465,7 @@ def add(
         rhs_zero_point,
         output_scale,
         output_zero_point,
+        rounding,
     )