From 122d7ccd1e765c446de63a60b8c2f9bd120f8e3c Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 15:04:01 +0800
Subject: [PATCH 1/7] Support calib_func on TF 3x API

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 docs/3x/TensorFlow.md                         |   9 +-
 .../algorithms/static_quant/keras.py          |  13 +-
 .../algorithms/static_quant/tensorflow.py     |  34 ++--
 .../quantization/algorithm_entry.py           |  16 +-
 .../tensorflow/quantization/autotune.py       |   5 +-
 test/3x/tensorflow/test_quantize_model.py     | 167 ++++++++++++++++++
 6 files changed, 217 insertions(+), 27 deletions(-)
 create mode 100644 test/3x/tensorflow/test_quantize_model.py

diff --git a/docs/3x/TensorFlow.md b/docs/3x/TensorFlow.md
index 5634a524f14..dd58c389699 100644
--- a/docs/3x/TensorFlow.md
+++ b/docs/3x/TensorFlow.md
@@ -23,7 +23,7 @@ Intel(R) Neural Compressor provides `quantize_model` and `autotune` as main inte
 
 **quantize_model**
 
-The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader` and `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot.
+The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader`, `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot.
 
 ```python
 def quantize_model(
@@ -31,6 +31,7 @@ def quantize_model(
     quant_config: Union[BaseConfig, list],
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ):
 ```
 `model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
@@ -41,6 +42,9 @@ def quantize_model(
 
 `calib_iteration` is used to decide how many iterations the calibration process will be run.
 
+`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
+
+
 Here is a simple example of using `quantize_model` interface with a dummy calibration dataloader and the default `StaticQuantConfig`:
 ```python
 from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
@@ -68,6 +72,7 @@ def autotune(
     eval_args: Optional[Tuple[Any]] = None,
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ) -> Optional[BaseModel]:
 ```
 `model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
@@ -82,6 +87,8 @@ def autotune(
 
 `calib_iteration` is used to decide how many iterations the calibration process will be run.
 
+`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
+
 Here is a simple example of using `autotune` interface with different quantization rules defined by a list of  `StaticQuantConfig`:
 ```python
 from neural_compressor.common.base_tuning import TuningConfig
diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
index 004393c8c27..f0572c8f5e0 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
@@ -314,16 +314,18 @@ def fuse_conv_bn(conv_weight, bn_weight, conv_type="Conv2D", eps=1.0e-5):
         return bn_fused_model
 
     @dump_elapsed_time("Pass quantize model")
-    def quantize(self, quant_config, model, dataloader, iteration, q_func=None):
+    def quantize(self, quant_config, model, dataloader, iteration, calib_func=None):
         """Execute the quantize process on the specified model.
 
         Args:
-            tune_cfg(dict): The user defined 'StaticQuantConfig' class.
+            quant_config(dict): The user defined 'StaticQuantConfig' class.
             model (object): The model to do quantization.
             dataloader(object): The calibration dataloader used to load quantization dataset.
             iteration(int): The iteration of calibration.
-            q_func (optional): training function for quantization aware training mode.
+            calib_func (optional): the function used for calibration, should be a substitution for calibration
+            dataloader when the built-in calibration function of INC does not work for model inference.
         """
+        assert calib_func is None, "The calibration function is not supported on Keras backend yet"
         self.query_fw_capability(model)
         converter = KerasConfigConverter(quant_config, iteration)
         tune_cfg = converter.parse_to_tune_cfg()
@@ -363,19 +365,18 @@ def quantize(self, quant_config, model, dataloader, iteration, q_func=None):
             calibration_model,
             dataloader,
             self.quantize_config["calib_iteration"],
+            calib_func,
         )
 
         return quantized_model
 
-    def _calibrate(self, model, dataloader, calib_interation):
+    def _calibrate(self, model, dataloader=None, calib_interation=None):
         """Apply calibration.
 
         Args:
             model (tf.keras.Model): The model inserted with FakeQuant layers for calibration.
             dataloader(object): The calibration dataloader used to load quantization dataset.
             iteration(int): The iteration of calibration.
-            fq_output_layers (dict): A dict mapping from names of FakeQuant layers to
-                names of their output layers.
         """
         # run eagerly to fetch the numpy min/max
         results = {}
diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
index 160cdb01e44..8c22183e5b4 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
@@ -172,7 +172,7 @@ def quantize(
         model: BaseModel,
         calib_dataloader: Callable = None,
         calib_iteration: int = 100,
-        q_func=None,
+        calib_func: Callable = None,
     ):
         """Execute the quantize process on the specified model.
 
@@ -181,11 +181,11 @@ def quantize(
             model: the fp32 model to be quantized.
             calib_dataloader: a data loader for calibration.
             calib_iteration: the iteration of calibration.
-            q_func: training function for quantization aware training mode,
-                                which not enabled for tensorflow yet.
+            calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+            when the built-in calibration function of INC does not work for model inference.
 
         Returns:
-            tf.compat.v1.GraphDef: the quantized model
+            converted_model: the quantized INC model wrapper.
         """
         assert (
             self.approach != "post_training_dynamic_quant"
@@ -228,7 +228,7 @@ def quantize(
                     fp32_ops=self.fp32_ops,
                     bf16_ops=self.bf16_ops,
                     data_loader=calib_dataloader,
-                    calib_func=q_func,
+                    calib_func=calib_func,
                     qdq_enabled=self.qdq_enabled,
                     new_api=self.new_api,
                     performance_only=self.performance_only,
@@ -251,7 +251,7 @@ def quantize(
                     fp32_ops=self.fp32_ops,
                     bf16_ops=self.bf16_ops,
                     data_loader=calib_dataloader,
-                    calib_func=q_func,
+                    calib_func=calib_func,
                     qdq_enabled=self.qdq_enabled,
                     new_api=self.new_api,
                     performance_only=self.performance_only,
@@ -275,7 +275,7 @@ def quantize(
                 fp32_ops=self.fp32_ops,
                 bf16_ops=self.bf16_ops,
                 data_loader=calib_dataloader,
-                calib_func=q_func,
+                calib_func=calib_func,
                 qdq_enabled=self.qdq_enabled,
                 new_api=self.new_api,
                 performance_only=self.performance_only,
@@ -750,21 +750,21 @@ def quantize(
         model: BaseModel,
         calib_dataloader: Callable = None,
         calib_iteration: int = 100,
-        q_func=None,
+        calib_func: Callable = None,
     ):
         """Execute the quantize process on the specified model.
 
         Args:
-            tune_cfg (dict): quantization configuration
-            model (tf.compat.v1.GraphDef): fp32 model
-            data_loader (generator): generator the data and labels
-            q_func (optional): training function for quantization aware training mode,
-                                which not enabled for tensorflow yet.
+            quant_config: a quantization configuration.
+            model: the fp32 model to be quantized.
+            calib_dataloader: a data loader for calibration.
+            calib_iteration: the iteration of calibration.
+            calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+            when the built-in calibration function of INC does not work for model inference.
 
         Returns:
-            tf.compat.v1.GraphDef: the quantized model
+            converted_model: the quantized INC model wrapper.
         """
-        assert q_func is None, "quantization aware training mode is not support on tensorflow"
         self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration
         tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration)
         self._tuning_cfg_to_fw(tune_cfg)
@@ -798,7 +798,7 @@ def quantize(
                     fp32_ops=self.fp32_ops,
                     bf16_ops=self.bf16_ops,
                     data_loader=calib_dataloader,
-                    calib_func=q_func,
+                    calib_func=calib_func,
                     itex_mode=self.itex_mode,
                     qdq_enabled=self.qdq_enabled,
                     new_api=self.new_api,
@@ -846,7 +846,7 @@ def quantize(
                 fp32_ops=self.fp32_ops,
                 bf16_ops=self.bf16_ops,
                 data_loader=calib_dataloader,
-                calib_func=q_func,
+                calib_func=calib_func,
                 itex_mode=self.itex_mode,
                 qdq_enabled=self.qdq_enabled,
                 new_api=self.new_api,
diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py
index 4b40a2f39a1..9ce0ff1a066 100644
--- a/neural_compressor/tensorflow/quantization/algorithm_entry.py
+++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py
@@ -28,6 +28,7 @@ def static_quant_entry(
     quant_config: BaseConfig,
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ):
     """The main entry to apply static quantization.
 
@@ -36,6 +37,8 @@ def static_quant_entry(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        when the built-in calibration function of INC does not work for model inference.
 
     Returns:
         q_model: the quantized model.
@@ -49,7 +52,7 @@ def static_quant_entry(
         framework = TensorFlowAdaptor
 
     quantizer = framework(TFConfig.global_config)
-    q_model = quantizer.quantize(quant_config, model, calib_dataloader, calib_iteration)
+    q_model = quantizer.quantize(quant_config, model, calib_dataloader, calib_iteration, calib_func)
     TFConfig.reset_global_config()
 
     return q_model
@@ -62,6 +65,17 @@ def smooth_quant_entry(
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
 ):
+    """The main entry to apply smooth quantization.
+
+    Args:
+        model: a fp32 model to be quantized.
+        quant_config: a quantization configuration.
+        calib_dataloader: a data loader for calibration.
+        calib_iteration: the iteration of calibration.
+
+    Returns:
+        q_model: the quantized model.
+    """
     assert not isinstance(model, KerasModel), "INC don't support smooth quantization for Keras models now."
 
     from neural_compressor.tensorflow.algorithms import SmoothQuant
diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py
index 55b089b923c..a13d6066095 100644
--- a/neural_compressor/tensorflow/quantization/autotune.py
+++ b/neural_compressor/tensorflow/quantization/autotune.py
@@ -44,6 +44,7 @@ def autotune(
     eval_args: Optional[Tuple[Any]] = None,
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ) -> Optional[BaseModel]:
     """The main entry of auto-tune."""
     model = Model(model)
@@ -57,7 +58,7 @@ def autotune(
         tuning_logger.trial_start(trial_index=trial_index)
         tuning_logger.execution_start()
         logger.info(quant_config.to_dict())
-        q_model = quantize_model(model, quant_config, calib_dataloader, calib_iteration)
+        q_model = quantize_model(model, quant_config, calib_dataloader, calib_iteration, calib_func)
         tuning_logger.execution_end()
         tuning_logger.evaluation_start()
         eval_result: float = eval_func_wrapper.evaluate(q_model)
@@ -71,7 +72,7 @@ def autotune(
                 logger.info("Re-quantizing with best quantization config...")
                 del q_model
                 best_quant_config: BaseConfig = best_trial_record.quant_config
-                best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration)
+                best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration, calib_func)
             else:
                 best_quant_model = q_model
             break
diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py
new file mode 100644
index 00000000000..b5763939395
--- /dev/null
+++ b/test/3x/tensorflow/test_quantize_model.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import shutil
+import time
+import unittest
+
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+
+from neural_compressor.common import Logger
+from neural_compressor.tensorflow.utils import version1_gte_version2
+
+def build_model():
+    # Load MNIST dataset
+    mnist = keras.datasets.mnist
+
+    # 60000 images in train and 10000 images in test, but we don't need so much for ut
+    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+    train_images, train_labels = train_images[:1000], train_labels[:1000]
+    test_images, test_labels = test_images[:200], test_labels[:200]
+
+    # Normalize the input image so that each pixel value is between 0 to 1.
+    train_images = train_images / 255.0
+    test_images = test_images / 255.0
+
+    # Define the model architecture.
+    model = keras.Sequential(
+        [
+            keras.layers.InputLayer(input_shape=(28, 28)),
+            keras.layers.Reshape(target_shape=(28, 28, 1)),
+            keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu", name="conv2d"),
+            keras.layers.MaxPooling2D(pool_size=(2, 2)),
+            keras.layers.Flatten(),
+            keras.layers.Dense(10, name="dense"),
+        ]
+    )
+    # Train the digit classification model
+    model.compile(
+        optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"]
+    )
+
+    model.fit(
+        train_images,
+        train_labels,
+        epochs=1,
+        validation_split=0.1,
+    )
+
+    _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
+
+    print("Baseline test accuracy:", baseline_model_accuracy)
+    if version1_gte_version2(tf.__version__, "2.16.1"):
+        model.save("baseline_model.keras")
+    else:
+        model.save("baseline_model")
+
+
+class Dataset(object):
+    def __init__(self, batch_size=1):
+        self.batch_size = batch_size
+        mnist = keras.datasets.mnist
+        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+        train_images, train_labels = train_images[:1000], train_labels[:1000]
+        test_images, test_labels = test_images[:200], test_labels[:200]
+        # Normalize the input image so that each pixel value is between 0 to 1.
+        self.train_images = train_images / 255.0
+        self.test_images = test_images / 255.0
+        self.train_labels = train_labels
+        self.test_labels = test_labels
+
+    def __len__(self):
+        return len(self.test_images)
+
+    def __getitem__(self, idx):
+        return self.test_images[idx], self.test_labels[idx]
+
+
+class MyDataloader:
+    def __init__(self, dataset, batch_size=1):
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.length = math.ceil(len(dataset) / self.batch_size)
+
+    def __iter__(self):
+        for _, (images, labels) in enumerate(self.dataset):
+            images = np.expand_dims(images, axis=0)
+            labels = np.expand_dims(labels, axis=0)
+            yield (images, labels)
+
+    def __len__(self):
+        return self.length
+
+
+def evaluate(model):
+    from neural_compressor.tensorflow import Model
+    model = Model(model)
+    input_tensor = model.input_tensor
+    output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
+                        model.output_tensor[0]
+
+    iteration = -1
+    calib_dataloader = MyDataloader(dataset=Dataset())
+    for idx, (inputs, labels) in enumerate(calib_dataloader):
+        # dataloader should keep the order and len of inputs same with input_tensor
+        inputs = np.array([inputs])
+        feed_dict = dict(zip(input_tensor, inputs))
+
+        start = time.time()
+        predictions = model.sess.run(output_tensor, feed_dict)
+        end = time.time()
+
+        if idx + 1 == iteration:
+            break
+
+
+class TestQuantizeModel(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        build_model()
+        self.fp32_model_path = (
+            "baseline_model.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model"
+        )
+
+    @classmethod
+    def tearDownClass(self):
+        if self.fp32_model_path.endswith(".keras"):
+            os.remove(self.fp32_model_path)
+        else:
+            shutil.rmtree(self.fp32_model_path, ignore_errors=True)
+
+    def test_calib_func(self):
+        logger.info("Run test_calib_func case...")
+
+        from neural_compressor.common import set_random_seed
+        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+
+        set_random_seed(9527)
+        quant_config = StaticQuantConfig()
+        q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate)
+        conv2d_quantized = False
+        for node in qmodel.graph_def.node:
+            if "Quantized" in node.op:
+                conv2d_quantized = True
+                break
+        
+        self.assertEqual(conv2d_quantized, True)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 29404385714713e67438937f0ea491d8432b4f5c Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 15:42:18 +0800
Subject: [PATCH 2/7] fix param

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 docs/3x/TF_Quant.md                                 | 13 +++++++------
 .../tensorflow/algorithms/smoother/core.py          | 12 ++++++++----
 .../tensorflow/quantization/algorithm_entry.py      |  5 ++++-
 .../tensorflow/quantization/quantize.py             | 12 +++++++++---
 4 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/docs/3x/TF_Quant.md b/docs/3x/TF_Quant.md
index d80c25ecada..1dc91a1830d 100644
--- a/docs/3x/TF_Quant.md
+++ b/docs/3x/TF_Quant.md
@@ -2,12 +2,13 @@
 TensorFlow Quantization
 ===============
 
-1. [Introduction](#introduction)
-2. [Usage](#usage)  
-   2.1 [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning)   
-   2.2 [With Accuracy Aware Tuning](#with-accuracy-aware-tuning)   
-   2.3 [Specify Quantization Rules](#specify-quantization-rules) 
-3. [Examples](#examples) 
+- [TensorFlow Quantization](#tensorflow-quantization)
+  - [Introduction](#introduction)
+  - [Get Started](#get-started)
+    - [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning)
+    - [With Accuracy Aware Tuning](#with-accuracy-aware-tuning)
+    - [Specify Quantization Rules](#specify-quantization-rules)
+  - [Examples](#examples)
 
 ## Introduction
 
diff --git a/neural_compressor/tensorflow/algorithms/smoother/core.py b/neural_compressor/tensorflow/algorithms/smoother/core.py
index d8c3af164f5..74719a3770f 100644
--- a/neural_compressor/tensorflow/algorithms/smoother/core.py
+++ b/neural_compressor/tensorflow/algorithms/smoother/core.py
@@ -37,19 +37,23 @@ class SmoothQuant:
     def __init__(
         self,
         config: SmoothQuantConfig,
-        calib_dataloader: Callable,
+        calib_dataloader: Callable=None,
         calib_iteration: int = 1,
+        calib_func: Callable=None,
     ):
         """Convert the model by smooth quant.
 
         Args:
-            config: the SmoothQuantConfig class used to set this class
-            calibdataloader: the calibration dataloader
-            calib_iteration: how many steps of iterations on the dataloader to move forward
+            config: the SmoothQuantConfig class used to set this class.
+            calibdataloader: the calibration dataloader.
+            calib_iteration: how many steps of iterations on the dataloader to move forward.
+            calib_func: the function used for calibration, should be a substitution for calib_dataloader
+            when the built-in calibration function of INC does not work for model inference.
 
         Returns:
             model: A smoothed Tensorflow model
         """
+        assert calib_func is None, "calibration function is not supported for smooth quant."
         self.config = config
         self.calib_dataloader = calib_dataloader
         self.calib_iteration = calib_iteration
diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py
index 9ce0ff1a066..ff425e8db9c 100644
--- a/neural_compressor/tensorflow/quantization/algorithm_entry.py
+++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py
@@ -64,6 +64,7 @@ def smooth_quant_entry(
     smooth_quant_config: SmoothQuantConfig,
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ):
     """The main entry to apply smooth quantization.
 
@@ -72,6 +73,8 @@ def smooth_quant_entry(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        when the built-in calibration function of INC does not work for model inference.
 
     Returns:
         q_model: the quantized model.
@@ -80,7 +83,7 @@ def smooth_quant_entry(
 
     from neural_compressor.tensorflow.algorithms import SmoothQuant
 
-    converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration)
+    converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration, calib_func)
     sq_model = converter(model)
 
     return sq_model
diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py
index fa613759515..0ea590a8de3 100644
--- a/neural_compressor/tensorflow/quantization/quantize.py
+++ b/neural_compressor/tensorflow/quantization/quantize.py
@@ -32,6 +32,7 @@ def quantize_model(
     quant_config: Union[BaseConfig, list],
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ):
     """The main entry to quantize model.
 
@@ -40,6 +41,8 @@ def quantize_model(
         quant_config: single or lists of quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        when the built-in calibration function of INC does not work for model inference.
 
     Returns:
         q_model: the quantized model.
@@ -47,9 +50,9 @@ def quantize_model(
     q_model = Model(model)
     if isinstance(quant_config, list):
         for config in quant_config:
-            q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration)
+            q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration, calib_func)
     else:
-        q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration)
+        q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration, calib_func)
 
     return q_model
 
@@ -59,6 +62,7 @@ def quantize_model_with_single_config(
     quant_config: BaseConfig,
     calib_dataloader: Callable = None,
     calib_iteration: int = 100,
+    calib_func: Callable = None,
 ):
     """Quantize model using single config.
 
@@ -67,6 +71,8 @@ def quantize_model_with_single_config(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        when the built-in calibration function of INC does not work for model inference.
 
     Returns:
         q_model: the quantized model.
@@ -89,5 +95,5 @@ def quantize_model_with_single_config(
     for algo_name, algo_func in algos_mapping.items():
         if need_apply(configs_mapping, algo_name):
             logger.info(f"Start to apply {algo_name} on the model.")
-            q_model = algo_func(q_model, configs_mapping, calib_dataloader, calib_iteration)
+            q_model = algo_func(q_model, configs_mapping, calib_dataloader, calib_iteration, calib_func)
     return q_model

From 2122b8e6ab8155643e3ea1bbd59f820f04de92a7 Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 16:06:44 +0800
Subject: [PATCH 3/7] fix adptor

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 .../algorithms/static_quant/tensorflow.py     |  2 +-
 .../quantization/utils/graph_converter.py     |  4 +++
 test/3x/tensorflow/test_quantize_model.py     | 29 +++++++------------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
index 8c22183e5b4..a1da41922cd 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
@@ -195,7 +195,7 @@ def quantize(
             self.approach != "quant_aware_training"
         ), "Quantize Aware Training is not supported on TensorFlow framework now!"
 
-        self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration
+        self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration if calib_dataloader else 100
         tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration)
         self._tuning_cfg_to_fw(tune_cfg)
         self.bf16_ops.extend(self.smooth_quant_mul_ops)
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
index 302bfe13717..23e349cf168 100644
--- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py
+++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
@@ -231,6 +231,10 @@ def _inference(self, model):
         Args:
             model(TensorflowBaseModel): input TensorflowBaseModel
         """
+        if self.calib_func:
+            self.calib_func(model)
+            return
+            
         if model.model_type == "llm_saved_model":
             self._inference_llm(model)
             return
diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py
index b5763939395..3fd4aad4398 100644
--- a/test/3x/tensorflow/test_quantize_model.py
+++ b/test/3x/tensorflow/test_quantize_model.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2022 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
+import math
 import shutil
 import time
 import unittest
@@ -24,7 +24,7 @@
 import tensorflow as tf
 from tensorflow import keras
 
-from neural_compressor.common import Logger
+from neural_compressor.common import logger
 from neural_compressor.tensorflow.utils import version1_gte_version2
 
 def build_model():
@@ -67,7 +67,7 @@ def build_model():
 
     print("Baseline test accuracy:", baseline_model_accuracy)
     if version1_gte_version2(tf.__version__, "2.16.1"):
-        model.save("baseline_model.keras")
+        model.export("baseline_model")
     else:
         model.save("baseline_model")
 
@@ -109,8 +109,6 @@ def __len__(self):
 
 
 def evaluate(model):
-    from neural_compressor.tensorflow import Model
-    model = Model(model)
     input_tensor = model.input_tensor
     output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
                         model.output_tensor[0]
@@ -134,16 +132,11 @@ class TestQuantizeModel(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         build_model()
-        self.fp32_model_path = (
-            "baseline_model.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model"
-        )
+        self.fp32_model_path = "baseline_model"
 
     @classmethod
     def tearDownClass(self):
-        if self.fp32_model_path.endswith(".keras"):
-            os.remove(self.fp32_model_path)
-        else:
-            shutil.rmtree(self.fp32_model_path, ignore_errors=True)
+        shutil.rmtree(self.fp32_model_path, ignore_errors=True)
 
     def test_calib_func(self):
         logger.info("Run test_calib_func case...")
@@ -154,14 +147,14 @@ def test_calib_func(self):
         set_random_seed(9527)
         quant_config = StaticQuantConfig()
         q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate)
-        conv2d_quantized = False
-        for node in qmodel.graph_def.node:
+        quantized = False
+        for node in q_model.graph_def.node:
             if "Quantized" in node.op:
-                conv2d_quantized = True
+                quantized = True
                 break
         
-        self.assertEqual(conv2d_quantized, True)
+        self.assertEqual(quantized, True)
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
\ No newline at end of file

From fe45c03ad78e8c63fd5b081412fb1978f13c72c4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 17 Jul 2024 08:10:51 +0000
Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/tensorflow/algorithms/smoother/core.py  | 4 ++--
 .../tensorflow/algorithms/static_quant/tensorflow.py      | 4 ++--
 .../tensorflow/quantization/algorithm_entry.py            | 4 ++--
 neural_compressor/tensorflow/quantization/autotune.py     | 4 +++-
 neural_compressor/tensorflow/quantization/quantize.py     | 8 +++++---
 .../tensorflow/quantization/utils/graph_converter.py      | 2 +-
 test/3x/tensorflow/test_quantize_model.py                 | 8 ++++----
 7 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/neural_compressor/tensorflow/algorithms/smoother/core.py b/neural_compressor/tensorflow/algorithms/smoother/core.py
index 74719a3770f..187539ee6eb 100644
--- a/neural_compressor/tensorflow/algorithms/smoother/core.py
+++ b/neural_compressor/tensorflow/algorithms/smoother/core.py
@@ -37,9 +37,9 @@ class SmoothQuant:
     def __init__(
         self,
         config: SmoothQuantConfig,
-        calib_dataloader: Callable=None,
+        calib_dataloader: Callable = None,
         calib_iteration: int = 1,
-        calib_func: Callable=None,
+        calib_func: Callable = None,
     ):
         """Convert the model by smooth quant.
 
diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
index a1da41922cd..3bf9cff80af 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py
@@ -181,7 +181,7 @@ def quantize(
             model: the fp32 model to be quantized.
             calib_dataloader: a data loader for calibration.
             calib_iteration: the iteration of calibration.
-            calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+            calib_func: the function used for calibration, should be a substitution for calib_dataloader
             when the built-in calibration function of INC does not work for model inference.
 
         Returns:
@@ -759,7 +759,7 @@ def quantize(
             model: the fp32 model to be quantized.
             calib_dataloader: a data loader for calibration.
             calib_iteration: the iteration of calibration.
-            calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+            calib_func: the function used for calibration, should be a substitution for calib_dataloader
             when the built-in calibration function of INC does not work for model inference.
 
         Returns:
diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py
index ff425e8db9c..e3530bc5e28 100644
--- a/neural_compressor/tensorflow/quantization/algorithm_entry.py
+++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py
@@ -37,7 +37,7 @@ def static_quant_entry(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
-        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader
         when the built-in calibration function of INC does not work for model inference.
 
     Returns:
@@ -73,7 +73,7 @@ def smooth_quant_entry(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
-        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader
         when the built-in calibration function of INC does not work for model inference.
 
     Returns:
diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py
index a13d6066095..847557b0b8a 100644
--- a/neural_compressor/tensorflow/quantization/autotune.py
+++ b/neural_compressor/tensorflow/quantization/autotune.py
@@ -72,7 +72,9 @@ def autotune(
                 logger.info("Re-quantizing with best quantization config...")
                 del q_model
                 best_quant_config: BaseConfig = best_trial_record.quant_config
-                best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration, calib_func)
+                best_quant_model = quantize_model(
+                    model, best_quant_config, calib_dataloader, calib_iteration, calib_func
+                )
             else:
                 best_quant_model = q_model
             break
diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py
index 0ea590a8de3..6cfd24225b7 100644
--- a/neural_compressor/tensorflow/quantization/quantize.py
+++ b/neural_compressor/tensorflow/quantization/quantize.py
@@ -41,7 +41,7 @@ def quantize_model(
         quant_config: single or lists of quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
-        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader
         when the built-in calibration function of INC does not work for model inference.
 
     Returns:
@@ -52,7 +52,9 @@ def quantize_model(
         for config in quant_config:
             q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration, calib_func)
     else:
-        q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration, calib_func)
+        q_model = quantize_model_with_single_config(
+            q_model, quant_config, calib_dataloader, calib_iteration, calib_func
+        )
 
     return q_model
 
@@ -71,7 +73,7 @@ def quantize_model_with_single_config(
         quant_config: a quantization configuration.
         calib_dataloader: a data loader for calibration.
         calib_iteration: the iteration of calibration.
-        calib_func: the function used for calibration, should be a substitution for calib_dataloader 
+        calib_func: the function used for calibration, should be a substitution for calib_dataloader
         when the built-in calibration function of INC does not work for model inference.
 
     Returns:
diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
index 23e349cf168..e3c1c640c86 100644
--- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py
+++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py
@@ -234,7 +234,7 @@ def _inference(self, model):
         if self.calib_func:
             self.calib_func(model)
             return
-            
+
         if model.model_type == "llm_saved_model":
             self._inference_llm(model)
             return
diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py
index 3fd4aad4398..383bb80c0ab 100644
--- a/test/3x/tensorflow/test_quantize_model.py
+++ b/test/3x/tensorflow/test_quantize_model.py
@@ -27,6 +27,7 @@
 from neural_compressor.common import logger
 from neural_compressor.tensorflow.utils import version1_gte_version2
 
+
 def build_model():
     # Load MNIST dataset
     mnist = keras.datasets.mnist
@@ -110,8 +111,7 @@ def __len__(self):
 
 def evaluate(model):
     input_tensor = model.input_tensor
-    output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
-                        model.output_tensor[0]
+    output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0]
 
     iteration = -1
     calib_dataloader = MyDataloader(dataset=Dataset())
@@ -152,9 +152,9 @@ def test_calib_func(self):
             if "Quantized" in node.op:
                 quantized = True
                 break
-        
+
         self.assertEqual(quantized, True)
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 7d8f5b60f112d566aef10efedd8da7ae4b5831bf Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 16:11:09 +0800
Subject: [PATCH 5/7] fix doc

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 docs/3x/TF_Quant.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/3x/TF_Quant.md b/docs/3x/TF_Quant.md
index 1dc91a1830d..d80c25ecada 100644
--- a/docs/3x/TF_Quant.md
+++ b/docs/3x/TF_Quant.md
@@ -2,13 +2,12 @@
 TensorFlow Quantization
 ===============
 
-- [TensorFlow Quantization](#tensorflow-quantization)
-  - [Introduction](#introduction)
-  - [Get Started](#get-started)
-    - [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning)
-    - [With Accuracy Aware Tuning](#with-accuracy-aware-tuning)
-    - [Specify Quantization Rules](#specify-quantization-rules)
-  - [Examples](#examples)
+1. [Introduction](#introduction)
+2. [Usage](#usage)  
+   2.1 [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning)   
+   2.2 [With Accuracy Aware Tuning](#with-accuracy-aware-tuning)   
+   2.3 [Specify Quantization Rules](#specify-quantization-rules) 
+3. [Examples](#examples) 
 
 ## Introduction
 

From e07f2395547eb730bd77d6094bc1ed0ccc6d8b9a Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 16:54:21 +0800
Subject: [PATCH 6/7] fix pylint

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 neural_compressor/tensorflow/algorithms/static_quant/keras.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
index f0572c8f5e0..83d9a54609d 100644
--- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py
+++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py
@@ -365,7 +365,6 @@ def quantize(self, quant_config, model, dataloader, iteration, calib_func=None):
             calibration_model,
             dataloader,
             self.quantize_config["calib_iteration"],
-            calib_func,
         )
 
         return quantized_model

From fbfc54670d4fbd121ed8081350b04055d86a364a Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 17 Jul 2024 18:04:57 +0800
Subject: [PATCH 7/7] remove ut

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 test/3x/tensorflow/test_quantize_model.py | 160 ----------------------
 1 file changed, 160 deletions(-)
 delete mode 100644 test/3x/tensorflow/test_quantize_model.py

diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py
deleted file mode 100644
index 383bb80c0ab..00000000000
--- a/test/3x/tensorflow/test_quantize_model.py
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import shutil
-import time
-import unittest
-
-import numpy as np
-import tensorflow as tf
-from tensorflow import keras
-
-from neural_compressor.common import logger
-from neural_compressor.tensorflow.utils import version1_gte_version2
-
-
-def build_model():
-    # Load MNIST dataset
-    mnist = keras.datasets.mnist
-
-    # 60000 images in train and 10000 images in test, but we don't need so much for ut
-    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
-    train_images, train_labels = train_images[:1000], train_labels[:1000]
-    test_images, test_labels = test_images[:200], test_labels[:200]
-
-    # Normalize the input image so that each pixel value is between 0 to 1.
-    train_images = train_images / 255.0
-    test_images = test_images / 255.0
-
-    # Define the model architecture.
-    model = keras.Sequential(
-        [
-            keras.layers.InputLayer(input_shape=(28, 28)),
-            keras.layers.Reshape(target_shape=(28, 28, 1)),
-            keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu", name="conv2d"),
-            keras.layers.MaxPooling2D(pool_size=(2, 2)),
-            keras.layers.Flatten(),
-            keras.layers.Dense(10, name="dense"),
-        ]
-    )
-    # Train the digit classification model
-    model.compile(
-        optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"]
-    )
-
-    model.fit(
-        train_images,
-        train_labels,
-        epochs=1,
-        validation_split=0.1,
-    )
-
-    _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)
-
-    print("Baseline test accuracy:", baseline_model_accuracy)
-    if version1_gte_version2(tf.__version__, "2.16.1"):
-        model.export("baseline_model")
-    else:
-        model.save("baseline_model")
-
-
-class Dataset(object):
-    def __init__(self, batch_size=1):
-        self.batch_size = batch_size
-        mnist = keras.datasets.mnist
-        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
-        train_images, train_labels = train_images[:1000], train_labels[:1000]
-        test_images, test_labels = test_images[:200], test_labels[:200]
-        # Normalize the input image so that each pixel value is between 0 to 1.
-        self.train_images = train_images / 255.0
-        self.test_images = test_images / 255.0
-        self.train_labels = train_labels
-        self.test_labels = test_labels
-
-    def __len__(self):
-        return len(self.test_images)
-
-    def __getitem__(self, idx):
-        return self.test_images[idx], self.test_labels[idx]
-
-
-class MyDataloader:
-    def __init__(self, dataset, batch_size=1):
-        self.dataset = dataset
-        self.batch_size = batch_size
-        self.length = math.ceil(len(dataset) / self.batch_size)
-
-    def __iter__(self):
-        for _, (images, labels) in enumerate(self.dataset):
-            images = np.expand_dims(images, axis=0)
-            labels = np.expand_dims(labels, axis=0)
-            yield (images, labels)
-
-    def __len__(self):
-        return self.length
-
-
-def evaluate(model):
-    input_tensor = model.input_tensor
-    output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0]
-
-    iteration = -1
-    calib_dataloader = MyDataloader(dataset=Dataset())
-    for idx, (inputs, labels) in enumerate(calib_dataloader):
-        # dataloader should keep the order and len of inputs same with input_tensor
-        inputs = np.array([inputs])
-        feed_dict = dict(zip(input_tensor, inputs))
-
-        start = time.time()
-        predictions = model.sess.run(output_tensor, feed_dict)
-        end = time.time()
-
-        if idx + 1 == iteration:
-            break
-
-
-class TestQuantizeModel(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        build_model()
-        self.fp32_model_path = "baseline_model"
-
-    @classmethod
-    def tearDownClass(self):
-        shutil.rmtree(self.fp32_model_path, ignore_errors=True)
-
-    def test_calib_func(self):
-        logger.info("Run test_calib_func case...")
-
-        from neural_compressor.common import set_random_seed
-        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-
-        set_random_seed(9527)
-        quant_config = StaticQuantConfig()
-        q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate)
-        quantized = False
-        for node in q_model.graph_def.node:
-            if "Quantized" in node.op:
-                quantized = True
-                break
-
-        self.assertEqual(quantized, True)
-
-
-if __name__ == "__main__":
-    unittest.main()