Adding the fixed point compute handling for requantiazation.

anijain2305 · anijain2305 · commit 9403ba62402f · 2019-07-02T21:10:27.000Z
diff --git a/include/tvm/relay/attrs/nn_quantize.h b/include/tvm/relay/attrs/nn_quantize.h
@@ -52,7 +52,8 @@ struct QuantizedConv2DAttrs : public tvm::AttrsNode<QuantizedConv2DAttrs> {
   double input_scale;
   double kernel_scale;
   double output_scale;
-  bool use_integer_computation_for_scale_handling;
+  bool use_int_compute_for_requantize;
+  std::string rounding;
 
   TVM_DECLARE_ATTRS(QuantizedConv2DAttrs, "relay.attrs.QuantizedConv2DAttrs") {
     TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
@@ -107,8 +108,10 @@ struct QuantizedConv2DAttrs : public tvm::AttrsNode<QuantizedConv2DAttrs> {
         .describe("The scale of the kernel tensor.");
     TVM_ATTR_FIELD(output_scale)
         .describe("The scale of the output tensor.");
-    TVM_ATTR_FIELD(use_integer_computation_for_scale_handling).set_default(false)
+    TVM_ATTR_FIELD(use_int_compute_for_requantize).set_default(false)
       .describe("When true, the integer computation is used to handle output scale");
+    TVM_ATTR_FIELD(rounding).set_default("ceil")
+        .describe("The rounding that has to be used for handling scales.");
 
 
   }
diff --git a/include/tvm/relay/quantize_util.h b/include/tvm/relay/quantize_util.h
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file nnvm/compiler/quantize_util.h
+ * \brief Utility methods needs for quantized ops that can be shared
+ */
+
+#ifndef TVM_QUANTIZE_UTIL_H
+#define TVM_QUANTIZE_UTIL_H
+
+#include <tvm/expr.h>
+#include "./base.h"
+
+namespace tvm {
+namespace relay {
+
+inline bool is_Int8(const DataType& dtype) {
+  return dtype == Int(8);
+}
+
+inline bool is_UInt8(const DataType& dtype) {
+  return dtype == UInt(8);
+}
+
+
+inline bool is_Int16(const DataType& dtype) {
+  return dtype == Int(16);
+}
+
+inline bool is_UInt16(const DataType& dtype) {
+  return dtype == UInt(16);
+}
+
+inline bool is_Int32(const DataType& dtype) {
+  return dtype == Int(32);
+}
+
+inline bool is_UInt32(const DataType& dtype) {
+  return dtype == UInt(32);
+}
+
+
+
+inline bool is_Float32(const DataType& dtype) {
+  return dtype == Float(32);
+}
+
+inline bool is_quantized_type(const DataType& dtype) {
+  return is_Int8(dtype) || is_UInt8(dtype)
+      || is_Int16(dtype) || is_UInt16(dtype);
+}
+
+enum class QuantizeOpType : uint8_t {
+  Quantize_Requantize,
+  Dequantize
+};
+
+inline bool is_valid_quantized_op_input_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_Float32(in_dtype) || is_quantized_type(in_dtype);
+    case QuantizeOpType ::Dequantize:
+      return is_quantized_type(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline bool is_valid_quantized_op_output_type(const QuantizeOpType &op_type, const DataType &in_dtype) {
+  switch(op_type) {
+    case QuantizeOpType::Quantize_Requantize:
+      return is_quantized_type(in_dtype);
+    case QuantizeOpType::Dequantize:
+      return is_Float32(in_dtype);
+    default:
+      return false;
+  }
+}
+
+inline const int32_t get_qmin(const DataType&  dtype) {
+  if (is_Int8(dtype)) {
+    return std::numeric_limits<int8_t>::min();
+  } else if (is_UInt8(dtype)) {
+    return std::numeric_limits<uint8_t>::min();
+  } else if (is_Int16(dtype)) {
+    return std::numeric_limits<int16_t>::min();
+  } else if (is_UInt16(dtype)) {
+    return std::numeric_limits<uint16_t>::min();
+  } else if (is_Int32(dtype)) {
+    return std::numeric_limits<int32_t>::min();
+  } else if (is_UInt32(dtype)) {
+    return std::numeric_limits<uint32_t>::min();
+  }
+  LOG(FATAL) << "Type not supported\n";
+  return -1;
+}
+
+
+inline const int32_t get_qmax(const DataType&  dtype) {
+  if (is_Int8(dtype)) {
+    return std::numeric_limits<int8_t>::max();
+  } else if (is_UInt8(dtype)) {
+    return std::numeric_limits<uint8_t>::max();
+  } else if (is_Int16(dtype)) {
+    return std::numeric_limits<int16_t>::max();
+  } else if (is_UInt16(dtype)) {
+    return std::numeric_limits<uint16_t>::max();
+  } else if (is_Int32(dtype)) {
+    return std::numeric_limits<int32_t>::max();
+  } else if (is_UInt32(dtype)) {
+    return std::numeric_limits<uint32_t>::max();
+  }
+  LOG(FATAL) << "Type not supported\n";
+  return -1;
+}
+
+} // namespace relay
+} // namespace tvm
+#endif //TVM_QUANTIZE_UTIL_H
diff --git a/python/tvm/relay/op/nn/_quantize.py b/python/tvm/relay/op/nn/_quantize.py
@@ -37,7 +37,9 @@ def quantized_conv2d(quantized_data,
                     data_layout="NCHW",
                     kernel_layout="OIHW",
                     out_layout="",
-                    out_dtype=""):
+                    out_dtype="",
+                    rounding="ceil",
+                    use_int_compute_for_requantize=False):
     r"""Quantized 2D convolution.
 
     This operator takes the quantized_weight as the convolution kernel
@@ -119,6 +121,12 @@ def quantized_conv2d(quantized_data,
     out_dtype : str, optional
         Specifies the output quantized_data type for mixed precision conv2d.
 
+    rounding : str, optional
+        Specificies which rounding to use - floor, ceil, round, trunc.
+
+    use_int_compute_for_requantize : bool, optional
+        Use fully integer computation for requantizing.
+
     Returns
     -------
     result : tvm.relay.Expr
@@ -130,4 +138,5 @@ def quantized_conv2d(quantized_data,
                         strides, padding, dilation,
                         groups, channels, kernel_size,
                         data_layout, kernel_layout, out_layout,
-                        out_dtype)
+                        out_dtype, rounding,
+                        use_int_compute_for_requantize)
diff --git a/src/relay/op/nn/quantized_convolution.cc b/src/relay/op/nn/quantized_convolution.cc
@@ -146,7 +146,9 @@ Expr MakeQuantizeConv2D(Expr quantized_data,
                         std::string data_layout,
                         std::string kernel_layout,
                         std::string out_layout,
-                        DataType out_dtype) {
+                        DataType out_dtype,
+                        std::string rounding,
+                        bool use_int_compute_for_requantize) {
   auto attrs = make_node<QuantizedConv2DAttrs>();
   attrs->strides = std::move(strides);
   attrs->padding = std::move(padding);
@@ -164,6 +166,8 @@ Expr MakeQuantizeConv2D(Expr quantized_data,
   attrs->input_scale = std::move(input_scale);
   attrs->kernel_scale = std::move(kernel_scale);
   attrs->output_scale = std::move(output_scale);
+  attrs->rounding = std::move(rounding);
+  attrs->use_int_compute_for_requantize = std::move(use_int_compute_for_requantize);
   static const Op& op = Op::Get("nn_quantized.quantized_conv2d");
   return CallNode::make(op, {quantized_data, quantized_weight}, Attrs(attrs), {});
 }
diff --git a/src/relay/pass/pattern_util.h b/src/relay/pass/pattern_util.h
@@ -399,6 +399,25 @@ inline Expr Conv2D(Expr data,
   return CallNode::make(op, {data, weight}, Attrs(attrs), {});
 }
 
+inline Expr Where(const Expr& condition, const Expr& x, const Expr& y) {
+  static const Op& op = Op::Get("where");
+  return CallNode::make(op, {condition, x, y});
+}
+
+inline Expr GreaterEqual(const Expr& lhs, const Expr& rhs) {
+  static const Op& op = Op::Get("greater_equal");
+  return CallNode::make(op, {lhs, rhs}, Attrs(), {});
+}
+
+inline Expr Full(Expr fill_value,
+              Array<IndexExpr> shape,
+              DataType dtype) {
+  auto attrs = make_node<InitOpAttrs>();
+  attrs->shape = std::move(shape);
+  attrs->dtype = std::move(dtype);
+  static const Op& op = Op::Get("full");
+  return CallNode::make(op, {fill_value}, Attrs(attrs), {});
+}
 Expr MakeConcatenate(Expr data, int axis);
 
 Expr MakeStridedSlice(Expr data, Array<Integer> begin, Array<Integer> end, Array<Integer> strides);
diff --git a/src/relay/pass/quantize_rewrite.cc b/src/relay/pass/quantize_rewrite.cc
diff --git a/tests/python/unittest/test_quantized_ops.py b/tests/python/unittest/test_quantized_ops.py