From 122d7ccd1e765c446de63a60b8c2f9bd120f8e3c Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 15:04:01 +0800 Subject: [PATCH 1/7] Support calib_func on TF 3x API Signed-off-by: zehao-intel --- docs/3x/TensorFlow.md | 9 +- .../algorithms/static_quant/keras.py | 13 +- .../algorithms/static_quant/tensorflow.py | 34 ++-- .../quantization/algorithm_entry.py | 16 +- .../tensorflow/quantization/autotune.py | 5 +- test/3x/tensorflow/test_quantize_model.py | 167 ++++++++++++++++++ 6 files changed, 217 insertions(+), 27 deletions(-) create mode 100644 test/3x/tensorflow/test_quantize_model.py diff --git a/docs/3x/TensorFlow.md b/docs/3x/TensorFlow.md index 5634a524f14..dd58c389699 100644 --- a/docs/3x/TensorFlow.md +++ b/docs/3x/TensorFlow.md @@ -23,7 +23,7 @@ Intel(R) Neural Compressor provides `quantize_model` and `autotune` as main inte **quantize_model** -The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader` and `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot. +The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader`, `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot. ```python def quantize_model( @@ -31,6 +31,7 @@ def quantize_model( quant_config: Union[BaseConfig, list], calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ): ``` `model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class. @@ -41,6 +42,9 @@ def quantize_model( `calib_iteration` is used to decide how many iterations the calibration process will be run. +`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference. + + Here is a simple example of using `quantize_model` interface with a dummy calibration dataloader and the default `StaticQuantConfig`: ```python from neural_compressor.tensorflow import StaticQuantConfig, quantize_model @@ -68,6 +72,7 @@ def autotune( eval_args: Optional[Tuple[Any]] = None, calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ) -> Optional[BaseModel]: ``` `model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class. @@ -82,6 +87,8 @@ def autotune( `calib_iteration` is used to decide how many iterations the calibration process will be run. +`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference. + Here is a simple example of using `autotune` interface with different quantization rules defined by a list of `StaticQuantConfig`: ```python from neural_compressor.common.base_tuning import TuningConfig diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index 004393c8c27..f0572c8f5e0 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -314,16 +314,18 @@ def fuse_conv_bn(conv_weight, bn_weight, conv_type="Conv2D", eps=1.0e-5): return bn_fused_model @dump_elapsed_time("Pass quantize model") - def quantize(self, quant_config, model, dataloader, iteration, q_func=None): + def quantize(self, quant_config, model, dataloader, iteration, calib_func=None): """Execute the quantize process on the specified model. Args: - tune_cfg(dict): The user defined 'StaticQuantConfig' class. + quant_config(dict): The user defined 'StaticQuantConfig' class. model (object): The model to do quantization. dataloader(object): The calibration dataloader used to load quantization dataset. iteration(int): The iteration of calibration. - q_func (optional): training function for quantization aware training mode. + calib_func (optional): the function used for calibration, should be a substitution for calibration + dataloader when the built-in calibration function of INC does not work for model inference. """ + assert calib_func is None, "The calibration function is not supported on Keras backend yet" self.query_fw_capability(model) converter = KerasConfigConverter(quant_config, iteration) tune_cfg = converter.parse_to_tune_cfg() @@ -363,19 +365,18 @@ def quantize(self, quant_config, model, dataloader, iteration, q_func=None): calibration_model, dataloader, self.quantize_config["calib_iteration"], + calib_func, ) return quantized_model - def _calibrate(self, model, dataloader, calib_interation): + def _calibrate(self, model, dataloader=None, calib_interation=None): """Apply calibration. Args: model (tf.keras.Model): The model inserted with FakeQuant layers for calibration. dataloader(object): The calibration dataloader used to load quantization dataset. iteration(int): The iteration of calibration. - fq_output_layers (dict): A dict mapping from names of FakeQuant layers to - names of their output layers. """ # run eagerly to fetch the numpy min/max results = {} diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py index 160cdb01e44..8c22183e5b4 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py @@ -172,7 +172,7 @@ def quantize( model: BaseModel, calib_dataloader: Callable = None, calib_iteration: int = 100, - q_func=None, + calib_func: Callable = None, ): """Execute the quantize process on the specified model. @@ -181,11 +181,11 @@ def quantize( model: the fp32 model to be quantized. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - q_func: training function for quantization aware training mode, - which not enabled for tensorflow yet. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: - tf.compat.v1.GraphDef: the quantized model + converted_model: the quantized INC model wrapper. """ assert ( self.approach != "post_training_dynamic_quant" @@ -228,7 +228,7 @@ def quantize( fp32_ops=self.fp32_ops, bf16_ops=self.bf16_ops, data_loader=calib_dataloader, - calib_func=q_func, + calib_func=calib_func, qdq_enabled=self.qdq_enabled, new_api=self.new_api, performance_only=self.performance_only, @@ -251,7 +251,7 @@ def quantize( fp32_ops=self.fp32_ops, bf16_ops=self.bf16_ops, data_loader=calib_dataloader, - calib_func=q_func, + calib_func=calib_func, qdq_enabled=self.qdq_enabled, new_api=self.new_api, performance_only=self.performance_only, @@ -275,7 +275,7 @@ def quantize( fp32_ops=self.fp32_ops, bf16_ops=self.bf16_ops, data_loader=calib_dataloader, - calib_func=q_func, + calib_func=calib_func, qdq_enabled=self.qdq_enabled, new_api=self.new_api, performance_only=self.performance_only, @@ -750,21 +750,21 @@ def quantize( model: BaseModel, calib_dataloader: Callable = None, calib_iteration: int = 100, - q_func=None, + calib_func: Callable = None, ): """Execute the quantize process on the specified model. Args: - tune_cfg (dict): quantization configuration - model (tf.compat.v1.GraphDef): fp32 model - data_loader (generator): generator the data and labels - q_func (optional): training function for quantization aware training mode, - which not enabled for tensorflow yet. + quant_config: a quantization configuration. + model: the fp32 model to be quantized. + calib_dataloader: a data loader for calibration. + calib_iteration: the iteration of calibration. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: - tf.compat.v1.GraphDef: the quantized model + converted_model: the quantized INC model wrapper. """ - assert q_func is None, "quantization aware training mode is not support on tensorflow" self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration) self._tuning_cfg_to_fw(tune_cfg) @@ -798,7 +798,7 @@ def quantize( fp32_ops=self.fp32_ops, bf16_ops=self.bf16_ops, data_loader=calib_dataloader, - calib_func=q_func, + calib_func=calib_func, itex_mode=self.itex_mode, qdq_enabled=self.qdq_enabled, new_api=self.new_api, @@ -846,7 +846,7 @@ def quantize( fp32_ops=self.fp32_ops, bf16_ops=self.bf16_ops, data_loader=calib_dataloader, - calib_func=q_func, + calib_func=calib_func, itex_mode=self.itex_mode, qdq_enabled=self.qdq_enabled, new_api=self.new_api, diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py index 4b40a2f39a1..9ce0ff1a066 100644 --- a/neural_compressor/tensorflow/quantization/algorithm_entry.py +++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py @@ -28,6 +28,7 @@ def static_quant_entry( quant_config: BaseConfig, calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ): """The main entry to apply static quantization. @@ -36,6 +37,8 @@ def static_quant_entry( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: q_model: the quantized model. @@ -49,7 +52,7 @@ def static_quant_entry( framework = TensorFlowAdaptor quantizer = framework(TFConfig.global_config) - q_model = quantizer.quantize(quant_config, model, calib_dataloader, calib_iteration) + q_model = quantizer.quantize(quant_config, model, calib_dataloader, calib_iteration, calib_func) TFConfig.reset_global_config() return q_model @@ -62,6 +65,17 @@ def smooth_quant_entry( calib_dataloader: Callable = None, calib_iteration: int = 100, ): + """The main entry to apply smooth quantization. + + Args: + model: a fp32 model to be quantized. + quant_config: a quantization configuration. + calib_dataloader: a data loader for calibration. + calib_iteration: the iteration of calibration. + + Returns: + q_model: the quantized model. + """ assert not isinstance(model, KerasModel), "INC don't support smooth quantization for Keras models now." from neural_compressor.tensorflow.algorithms import SmoothQuant diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py index 55b089b923c..a13d6066095 100644 --- a/neural_compressor/tensorflow/quantization/autotune.py +++ b/neural_compressor/tensorflow/quantization/autotune.py @@ -44,6 +44,7 @@ def autotune( eval_args: Optional[Tuple[Any]] = None, calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ) -> Optional[BaseModel]: """The main entry of auto-tune.""" model = Model(model) @@ -57,7 +58,7 @@ def autotune( tuning_logger.trial_start(trial_index=trial_index) tuning_logger.execution_start() logger.info(quant_config.to_dict()) - q_model = quantize_model(model, quant_config, calib_dataloader, calib_iteration) + q_model = quantize_model(model, quant_config, calib_dataloader, calib_iteration, calib_func) tuning_logger.execution_end() tuning_logger.evaluation_start() eval_result: float = eval_func_wrapper.evaluate(q_model) @@ -71,7 +72,7 @@ def autotune( logger.info("Re-quantizing with best quantization config...") del q_model best_quant_config: BaseConfig = best_trial_record.quant_config - best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration) + best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration, calib_func) else: best_quant_model = q_model break diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py new file mode 100644 index 00000000000..b5763939395 --- /dev/null +++ b/test/3x/tensorflow/test_quantize_model.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import time +import unittest + +import numpy as np +import tensorflow as tf +from tensorflow import keras + +from neural_compressor.common import Logger +from neural_compressor.tensorflow.utils import version1_gte_version2 + +def build_model(): + # Load MNIST dataset + mnist = keras.datasets.mnist + + # 60000 images in train and 10000 images in test, but we don't need so much for ut + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + train_images, train_labels = train_images[:1000], train_labels[:1000] + test_images, test_labels = test_images[:200], test_labels[:200] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential( + [ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu", name="conv2d"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10, name="dense"), + ] + ) + # Train the digit classification model + model.compile( + optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] + ) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) + + print("Baseline test accuracy:", baseline_model_accuracy) + if version1_gte_version2(tf.__version__, "2.16.1"): + model.save("baseline_model.keras") + else: + model.save("baseline_model") + + +class Dataset(object): + def __init__(self, batch_size=1): + self.batch_size = batch_size + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + train_images, train_labels = train_images[:1000], train_labels[:1000] + test_images, test_labels = test_images[:200], test_labels[:200] + # Normalize the input image so that each pixel value is between 0 to 1. + self.train_images = train_images / 255.0 + self.test_images = test_images / 255.0 + self.train_labels = train_labels + self.test_labels = test_labels + + def __len__(self): + return len(self.test_images) + + def __getitem__(self, idx): + return self.test_images[idx], self.test_labels[idx] + + +class MyDataloader: + def __init__(self, dataset, batch_size=1): + self.dataset = dataset + self.batch_size = batch_size + self.length = math.ceil(len(dataset) / self.batch_size) + + def __iter__(self): + for _, (images, labels) in enumerate(self.dataset): + images = np.expand_dims(images, axis=0) + labels = np.expand_dims(labels, axis=0) + yield (images, labels) + + def __len__(self): + return self.length + + +def evaluate(model): + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + + iteration = -1 + calib_dataloader = MyDataloader(dataset=Dataset()) + for idx, (inputs, labels) in enumerate(calib_dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + if idx + 1 == iteration: + break + + +class TestQuantizeModel(unittest.TestCase): + @classmethod + def setUpClass(self): + build_model() + self.fp32_model_path = ( + "baseline_model.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model" + ) + + @classmethod + def tearDownClass(self): + if self.fp32_model_path.endswith(".keras"): + os.remove(self.fp32_model_path) + else: + shutil.rmtree(self.fp32_model_path, ignore_errors=True) + + def test_calib_func(self): + logger.info("Run test_calib_func case...") + + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate) + conv2d_quantized = False + for node in qmodel.graph_def.node: + if "Quantized" in node.op: + conv2d_quantized = True + break + + self.assertEqual(conv2d_quantized, True) + + +if __name__ == "__main__": + unittest.main() From 29404385714713e67438937f0ea491d8432b4f5c Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 15:42:18 +0800 Subject: [PATCH 2/7] fix param Signed-off-by: zehao-intel --- docs/3x/TF_Quant.md | 13 +++++++------ .../tensorflow/algorithms/smoother/core.py | 12 ++++++++---- .../tensorflow/quantization/algorithm_entry.py | 5 ++++- .../tensorflow/quantization/quantize.py | 12 +++++++++--- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/docs/3x/TF_Quant.md b/docs/3x/TF_Quant.md index d80c25ecada..1dc91a1830d 100644 --- a/docs/3x/TF_Quant.md +++ b/docs/3x/TF_Quant.md @@ -2,12 +2,13 @@ TensorFlow Quantization =============== -1. [Introduction](#introduction) -2. [Usage](#usage) - 2.1 [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning) - 2.2 [With Accuracy Aware Tuning](#with-accuracy-aware-tuning) - 2.3 [Specify Quantization Rules](#specify-quantization-rules) -3. [Examples](#examples) +- [TensorFlow Quantization](#tensorflow-quantization) + - [Introduction](#introduction) + - [Get Started](#get-started) + - [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning) + - [With Accuracy Aware Tuning](#with-accuracy-aware-tuning) + - [Specify Quantization Rules](#specify-quantization-rules) + - [Examples](#examples) ## Introduction diff --git a/neural_compressor/tensorflow/algorithms/smoother/core.py b/neural_compressor/tensorflow/algorithms/smoother/core.py index d8c3af164f5..74719a3770f 100644 --- a/neural_compressor/tensorflow/algorithms/smoother/core.py +++ b/neural_compressor/tensorflow/algorithms/smoother/core.py @@ -37,19 +37,23 @@ class SmoothQuant: def __init__( self, config: SmoothQuantConfig, - calib_dataloader: Callable, + calib_dataloader: Callable=None, calib_iteration: int = 1, + calib_func: Callable=None, ): """Convert the model by smooth quant. Args: - config: the SmoothQuantConfig class used to set this class - calibdataloader: the calibration dataloader - calib_iteration: how many steps of iterations on the dataloader to move forward + config: the SmoothQuantConfig class used to set this class. + calibdataloader: the calibration dataloader. + calib_iteration: how many steps of iterations on the dataloader to move forward. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: model: A smoothed Tensorflow model """ + assert calib_func is None, "calibration function is not supported for smooth quant." self.config = config self.calib_dataloader = calib_dataloader self.calib_iteration = calib_iteration diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py index 9ce0ff1a066..ff425e8db9c 100644 --- a/neural_compressor/tensorflow/quantization/algorithm_entry.py +++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py @@ -64,6 +64,7 @@ def smooth_quant_entry( smooth_quant_config: SmoothQuantConfig, calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ): """The main entry to apply smooth quantization. @@ -72,6 +73,8 @@ def smooth_quant_entry( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: q_model: the quantized model. @@ -80,7 +83,7 @@ def smooth_quant_entry( from neural_compressor.tensorflow.algorithms import SmoothQuant - converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration) + converter = SmoothQuant(smooth_quant_config, calib_dataloader, calib_iteration, calib_func) sq_model = converter(model) return sq_model diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py index fa613759515..0ea590a8de3 100644 --- a/neural_compressor/tensorflow/quantization/quantize.py +++ b/neural_compressor/tensorflow/quantization/quantize.py @@ -32,6 +32,7 @@ def quantize_model( quant_config: Union[BaseConfig, list], calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ): """The main entry to quantize model. @@ -40,6 +41,8 @@ def quantize_model( quant_config: single or lists of quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: q_model: the quantized model. @@ -47,9 +50,9 @@ def quantize_model( q_model = Model(model) if isinstance(quant_config, list): for config in quant_config: - q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration) + q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration, calib_func) else: - q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration) + q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration, calib_func) return q_model @@ -59,6 +62,7 @@ def quantize_model_with_single_config( quant_config: BaseConfig, calib_dataloader: Callable = None, calib_iteration: int = 100, + calib_func: Callable = None, ): """Quantize model using single config. @@ -67,6 +71,8 @@ def quantize_model_with_single_config( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. + calib_func: the function used for calibration, should be a substitution for calib_dataloader + when the built-in calibration function of INC does not work for model inference. Returns: q_model: the quantized model. @@ -89,5 +95,5 @@ def quantize_model_with_single_config( for algo_name, algo_func in algos_mapping.items(): if need_apply(configs_mapping, algo_name): logger.info(f"Start to apply {algo_name} on the model.") - q_model = algo_func(q_model, configs_mapping, calib_dataloader, calib_iteration) + q_model = algo_func(q_model, configs_mapping, calib_dataloader, calib_iteration, calib_func) return q_model From 2122b8e6ab8155643e3ea1bbd59f820f04de92a7 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 16:06:44 +0800 Subject: [PATCH 3/7] fix adptor Signed-off-by: zehao-intel --- .../algorithms/static_quant/tensorflow.py | 2 +- .../quantization/utils/graph_converter.py | 4 +++ test/3x/tensorflow/test_quantize_model.py | 29 +++++++------------ 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py index 8c22183e5b4..a1da41922cd 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py @@ -195,7 +195,7 @@ def quantize( self.approach != "quant_aware_training" ), "Quantize Aware Training is not supported on TensorFlow framework now!" - self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration + self.calib_sampling_size = calib_dataloader.batch_size * calib_iteration if calib_dataloader else 100 tune_cfg = self.parse_quant_config(quant_config, model, calib_iteration) self._tuning_cfg_to_fw(tune_cfg) self.bf16_ops.extend(self.smooth_quant_mul_ops) diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py index 302bfe13717..23e349cf168 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py @@ -231,6 +231,10 @@ def _inference(self, model): Args: model(TensorflowBaseModel): input TensorflowBaseModel """ + if self.calib_func: + self.calib_func(model) + return + if model.model_type == "llm_saved_model": self._inference_llm(model) return diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py index b5763939395..3fd4aad4398 100644 --- a/test/3x/tensorflow/test_quantize_model.py +++ b/test/3x/tensorflow/test_quantize_model.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2022 Intel Corporation +# Copyright (c) 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os +import math import shutil import time import unittest @@ -24,7 +24,7 @@ import tensorflow as tf from tensorflow import keras -from neural_compressor.common import Logger +from neural_compressor.common import logger from neural_compressor.tensorflow.utils import version1_gte_version2 def build_model(): @@ -67,7 +67,7 @@ def build_model(): print("Baseline test accuracy:", baseline_model_accuracy) if version1_gte_version2(tf.__version__, "2.16.1"): - model.save("baseline_model.keras") + model.export("baseline_model") else: model.save("baseline_model") @@ -109,8 +109,6 @@ def __len__(self): def evaluate(model): - from neural_compressor.tensorflow import Model - model = Model(model) input_tensor = model.input_tensor output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ model.output_tensor[0] @@ -134,16 +132,11 @@ class TestQuantizeModel(unittest.TestCase): @classmethod def setUpClass(self): build_model() - self.fp32_model_path = ( - "baseline_model.keras" if version1_gte_version2(tf.__version__, "2.16.1") else "baseline_model" - ) + self.fp32_model_path = "baseline_model" @classmethod def tearDownClass(self): - if self.fp32_model_path.endswith(".keras"): - os.remove(self.fp32_model_path) - else: - shutil.rmtree(self.fp32_model_path, ignore_errors=True) + shutil.rmtree(self.fp32_model_path, ignore_errors=True) def test_calib_func(self): logger.info("Run test_calib_func case...") @@ -154,14 +147,14 @@ def test_calib_func(self): set_random_seed(9527) quant_config = StaticQuantConfig() q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate) - conv2d_quantized = False - for node in qmodel.graph_def.node: + quantized = False + for node in q_model.graph_def.node: if "Quantized" in node.op: - conv2d_quantized = True + quantized = True break - self.assertEqual(conv2d_quantized, True) + self.assertEqual(quantized, True) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file From fe45c03ad78e8c63fd5b081412fb1978f13c72c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jul 2024 08:10:51 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/tensorflow/algorithms/smoother/core.py | 4 ++-- .../tensorflow/algorithms/static_quant/tensorflow.py | 4 ++-- .../tensorflow/quantization/algorithm_entry.py | 4 ++-- neural_compressor/tensorflow/quantization/autotune.py | 4 +++- neural_compressor/tensorflow/quantization/quantize.py | 8 +++++--- .../tensorflow/quantization/utils/graph_converter.py | 2 +- test/3x/tensorflow/test_quantize_model.py | 8 ++++---- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/neural_compressor/tensorflow/algorithms/smoother/core.py b/neural_compressor/tensorflow/algorithms/smoother/core.py index 74719a3770f..187539ee6eb 100644 --- a/neural_compressor/tensorflow/algorithms/smoother/core.py +++ b/neural_compressor/tensorflow/algorithms/smoother/core.py @@ -37,9 +37,9 @@ class SmoothQuant: def __init__( self, config: SmoothQuantConfig, - calib_dataloader: Callable=None, + calib_dataloader: Callable = None, calib_iteration: int = 1, - calib_func: Callable=None, + calib_func: Callable = None, ): """Convert the model by smooth quant. diff --git a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py index a1da41922cd..3bf9cff80af 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/tensorflow.py @@ -181,7 +181,7 @@ def quantize( model: the fp32 model to be quantized. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: @@ -759,7 +759,7 @@ def quantize( model: the fp32 model to be quantized. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: diff --git a/neural_compressor/tensorflow/quantization/algorithm_entry.py b/neural_compressor/tensorflow/quantization/algorithm_entry.py index ff425e8db9c..e3530bc5e28 100644 --- a/neural_compressor/tensorflow/quantization/algorithm_entry.py +++ b/neural_compressor/tensorflow/quantization/algorithm_entry.py @@ -37,7 +37,7 @@ def static_quant_entry( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: @@ -73,7 +73,7 @@ def smooth_quant_entry( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: diff --git a/neural_compressor/tensorflow/quantization/autotune.py b/neural_compressor/tensorflow/quantization/autotune.py index a13d6066095..847557b0b8a 100644 --- a/neural_compressor/tensorflow/quantization/autotune.py +++ b/neural_compressor/tensorflow/quantization/autotune.py @@ -72,7 +72,9 @@ def autotune( logger.info("Re-quantizing with best quantization config...") del q_model best_quant_config: BaseConfig = best_trial_record.quant_config - best_quant_model = quantize_model(model, best_quant_config, calib_dataloader, calib_iteration, calib_func) + best_quant_model = quantize_model( + model, best_quant_config, calib_dataloader, calib_iteration, calib_func + ) else: best_quant_model = q_model break diff --git a/neural_compressor/tensorflow/quantization/quantize.py b/neural_compressor/tensorflow/quantization/quantize.py index 0ea590a8de3..6cfd24225b7 100644 --- a/neural_compressor/tensorflow/quantization/quantize.py +++ b/neural_compressor/tensorflow/quantization/quantize.py @@ -41,7 +41,7 @@ def quantize_model( quant_config: single or lists of quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: @@ -52,7 +52,9 @@ def quantize_model( for config in quant_config: q_model = quantize_model_with_single_config(q_model, config, calib_dataloader, calib_iteration, calib_func) else: - q_model = quantize_model_with_single_config(q_model, quant_config, calib_dataloader, calib_iteration, calib_func) + q_model = quantize_model_with_single_config( + q_model, quant_config, calib_dataloader, calib_iteration, calib_func + ) return q_model @@ -71,7 +73,7 @@ def quantize_model_with_single_config( quant_config: a quantization configuration. calib_dataloader: a data loader for calibration. calib_iteration: the iteration of calibration. - calib_func: the function used for calibration, should be a substitution for calib_dataloader + calib_func: the function used for calibration, should be a substitution for calib_dataloader when the built-in calibration function of INC does not work for model inference. Returns: diff --git a/neural_compressor/tensorflow/quantization/utils/graph_converter.py b/neural_compressor/tensorflow/quantization/utils/graph_converter.py index 23e349cf168..e3c1c640c86 100644 --- a/neural_compressor/tensorflow/quantization/utils/graph_converter.py +++ b/neural_compressor/tensorflow/quantization/utils/graph_converter.py @@ -234,7 +234,7 @@ def _inference(self, model): if self.calib_func: self.calib_func(model) return - + if model.model_type == "llm_saved_model": self._inference_llm(model) return diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py index 3fd4aad4398..383bb80c0ab 100644 --- a/test/3x/tensorflow/test_quantize_model.py +++ b/test/3x/tensorflow/test_quantize_model.py @@ -27,6 +27,7 @@ from neural_compressor.common import logger from neural_compressor.tensorflow.utils import version1_gte_version2 + def build_model(): # Load MNIST dataset mnist = keras.datasets.mnist @@ -110,8 +111,7 @@ def __len__(self): def evaluate(model): input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] + output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0] iteration = -1 calib_dataloader = MyDataloader(dataset=Dataset()) @@ -152,9 +152,9 @@ def test_calib_func(self): if "Quantized" in node.op: quantized = True break - + self.assertEqual(quantized, True) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 7d8f5b60f112d566aef10efedd8da7ae4b5831bf Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 16:11:09 +0800 Subject: [PATCH 5/7] fix doc Signed-off-by: zehao-intel --- docs/3x/TF_Quant.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/3x/TF_Quant.md b/docs/3x/TF_Quant.md index 1dc91a1830d..d80c25ecada 100644 --- a/docs/3x/TF_Quant.md +++ b/docs/3x/TF_Quant.md @@ -2,13 +2,12 @@ TensorFlow Quantization =============== -- [TensorFlow Quantization](#tensorflow-quantization) - - [Introduction](#introduction) - - [Get Started](#get-started) - - [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning) - - [With Accuracy Aware Tuning](#with-accuracy-aware-tuning) - - [Specify Quantization Rules](#specify-quantization-rules) - - [Examples](#examples) +1. [Introduction](#introduction) +2. [Usage](#usage) + 2.1 [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning) + 2.2 [With Accuracy Aware Tuning](#with-accuracy-aware-tuning) + 2.3 [Specify Quantization Rules](#specify-quantization-rules) +3. [Examples](#examples) ## Introduction From e07f2395547eb730bd77d6094bc1ed0ccc6d8b9a Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 16:54:21 +0800 Subject: [PATCH 6/7] fix pylint Signed-off-by: zehao-intel --- neural_compressor/tensorflow/algorithms/static_quant/keras.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index f0572c8f5e0..83d9a54609d 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -365,7 +365,6 @@ def quantize(self, quant_config, model, dataloader, iteration, calib_func=None): calibration_model, dataloader, self.quantize_config["calib_iteration"], - calib_func, ) return quantized_model From fbfc54670d4fbd121ed8081350b04055d86a364a Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 17 Jul 2024 18:04:57 +0800 Subject: [PATCH 7/7] remove ut Signed-off-by: zehao-intel --- test/3x/tensorflow/test_quantize_model.py | 160 ---------------------- 1 file changed, 160 deletions(-) delete mode 100644 test/3x/tensorflow/test_quantize_model.py diff --git a/test/3x/tensorflow/test_quantize_model.py b/test/3x/tensorflow/test_quantize_model.py deleted file mode 100644 index 383bb80c0ab..00000000000 --- a/test/3x/tensorflow/test_quantize_model.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import shutil -import time -import unittest - -import numpy as np -import tensorflow as tf -from tensorflow import keras - -from neural_compressor.common import logger -from neural_compressor.tensorflow.utils import version1_gte_version2 - - -def build_model(): - # Load MNIST dataset - mnist = keras.datasets.mnist - - # 60000 images in train and 10000 images in test, but we don't need so much for ut - (train_images, train_labels), (test_images, test_labels) = mnist.load_data() - train_images, train_labels = train_images[:1000], train_labels[:1000] - test_images, test_labels = test_images[:200], test_labels[:200] - - # Normalize the input image so that each pixel value is between 0 to 1. - train_images = train_images / 255.0 - test_images = test_images / 255.0 - - # Define the model architecture. - model = keras.Sequential( - [ - keras.layers.InputLayer(input_shape=(28, 28)), - keras.layers.Reshape(target_shape=(28, 28, 1)), - keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu", name="conv2d"), - keras.layers.MaxPooling2D(pool_size=(2, 2)), - keras.layers.Flatten(), - keras.layers.Dense(10, name="dense"), - ] - ) - # Train the digit classification model - model.compile( - optimizer="adam", loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=["accuracy"] - ) - - model.fit( - train_images, - train_labels, - epochs=1, - validation_split=0.1, - ) - - _, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0) - - print("Baseline test accuracy:", baseline_model_accuracy) - if version1_gte_version2(tf.__version__, "2.16.1"): - model.export("baseline_model") - else: - model.save("baseline_model") - - -class Dataset(object): - def __init__(self, batch_size=1): - self.batch_size = batch_size - mnist = keras.datasets.mnist - (train_images, train_labels), (test_images, test_labels) = mnist.load_data() - train_images, train_labels = train_images[:1000], train_labels[:1000] - test_images, test_labels = test_images[:200], test_labels[:200] - # Normalize the input image so that each pixel value is between 0 to 1. - self.train_images = train_images / 255.0 - self.test_images = test_images / 255.0 - self.train_labels = train_labels - self.test_labels = test_labels - - def __len__(self): - return len(self.test_images) - - def __getitem__(self, idx): - return self.test_images[idx], self.test_labels[idx] - - -class MyDataloader: - def __init__(self, dataset, batch_size=1): - self.dataset = dataset - self.batch_size = batch_size - self.length = math.ceil(len(dataset) / self.batch_size) - - def __iter__(self): - for _, (images, labels) in enumerate(self.dataset): - images = np.expand_dims(images, axis=0) - labels = np.expand_dims(labels, axis=0) - yield (images, labels) - - def __len__(self): - return self.length - - -def evaluate(model): - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor) > 1 else model.output_tensor[0] - - iteration = -1 - calib_dataloader = MyDataloader(dataset=Dataset()) - for idx, (inputs, labels) in enumerate(calib_dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - - start = time.time() - predictions = model.sess.run(output_tensor, feed_dict) - end = time.time() - - if idx + 1 == iteration: - break - - -class TestQuantizeModel(unittest.TestCase): - @classmethod - def setUpClass(self): - build_model() - self.fp32_model_path = "baseline_model" - - @classmethod - def tearDownClass(self): - shutil.rmtree(self.fp32_model_path, ignore_errors=True) - - def test_calib_func(self): - logger.info("Run test_calib_func case...") - - from neural_compressor.common import set_random_seed - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model - - set_random_seed(9527) - quant_config = StaticQuantConfig() - q_model = quantize_model(self.fp32_model_path, quant_config, calib_func=evaluate) - quantized = False - for node in q_model.graph_def.node: - if "Quantized" in node.op: - quantized = True - break - - self.assertEqual(quantized, True) - - -if __name__ == "__main__": - unittest.main()