From d0a1be95e304ece1ef8364bb7c4229d43657a885 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Wed, 1 Oct 2025 12:54:40 +0200 Subject: [PATCH 1/3] Move is_weight_compression_needed function to common --- .../fx/quantization/backend_parameters.py | 31 ------------------- .../torch/fx/quantization/quantize_model.py | 2 +- .../quantization/backend_parameters.py | 18 ----------- .../openvino/quantization/quantize_model.py | 9 +++--- src/nncf/quantization/advanced_parameters.py | 17 ++++++++++ tests/torch2/fx/test_models.py | 4 +-- 6 files changed, 24 insertions(+), 57 deletions(-) delete mode 100644 src/nncf/experimental/torch/fx/quantization/backend_parameters.py diff --git a/src/nncf/experimental/torch/fx/quantization/backend_parameters.py b/src/nncf/experimental/torch/fx/quantization/backend_parameters.py deleted file mode 100644 index eca6c1d8bba..00000000000 --- a/src/nncf/experimental/torch/fx/quantization/backend_parameters.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional - -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters - - -class FXBackendParameters: - COMPRESS_WEIGHTS = "compress_weights" - - -def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool: - """ - Determines whether weight compression is needed based on the provided - advanced quantization parameters. - - :param advanced_parameters: Advanced quantization parameters. - :return: True if weight compression is needed, False otherwise. - """ - if advanced_parameters is not None and advanced_parameters.backend_params is not None: - return advanced_parameters.backend_params.get(FXBackendParameters.COMPRESS_WEIGHTS, True) - return True diff --git a/src/nncf/experimental/torch/fx/quantization/quantize_model.py b/src/nncf/experimental/torch/fx/quantization/quantize_model.py index 6daa6b5bc34..8a491c8514c 100644 --- a/src/nncf/experimental/torch/fx/quantization/quantize_model.py +++ b/src/nncf/experimental/torch/fx/quantization/quantize_model.py @@ -25,7 +25,6 @@ from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset -from nncf.experimental.torch.fx.quantization.backend_parameters import is_weight_compression_needed from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations from nncf.experimental.torch.fx.transformations import apply_quantization_transformations from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation @@ -39,6 +38,7 @@ from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedCompressionParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters +from nncf.quantization.advanced_parameters import is_weight_compression_needed from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression from nncf.scopes import IgnoredScope diff --git a/src/nncf/openvino/quantization/backend_parameters.py b/src/nncf/openvino/quantization/backend_parameters.py index cd9dba8669e..ec7ab102791 100644 --- a/src/nncf/openvino/quantization/backend_parameters.py +++ b/src/nncf/openvino/quantization/backend_parameters.py @@ -9,29 +9,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional - -from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters - class BackendParameters: - COMPRESS_WEIGHTS = "compress_weights" STAT_REQUESTS_NUMBER = "stat_requests_number" EVAL_REQUESTS_NUMBER = "eval_requests_number" ACTIVATIONS = "activations" WEIGHTS = "weights" LEVEL_LOW = "level_low" LEVEL_HIGH = "level_high" - - -def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool: - """ - Determines whether weight compression is needed based on the provided - advanced quantization parameters. - - :param advanced_parameters: Advanced quantization parameters. - :return: True if weight compression is needed, False otherwise. - """ - if advanced_parameters is not None and advanced_parameters.backend_params is not None: - return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True) - return True diff --git a/src/nncf/openvino/quantization/quantize_model.py b/src/nncf/openvino/quantization/quantize_model.py index 4ac077a17d7..31420651c78 100644 --- a/src/nncf/openvino/quantization/quantize_model.py +++ b/src/nncf/openvino/quantization/quantize_model.py @@ -27,8 +27,6 @@ from nncf.openvino.graph.model_utils import remove_friendly_name_duplicates from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.graph.node_utils import get_number_if_op -from nncf.openvino.quantization.backend_parameters import BackendParameters -from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies from nncf.openvino.rt_info import dump_parameters from nncf.parameters import BackupMode @@ -43,6 +41,7 @@ from nncf.quantization.advanced_parameters import AdvancedCompressionParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import convert_to_dict_recursively +from nncf.quantization.advanced_parameters import is_weight_compression_needed from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator @@ -211,13 +210,13 @@ def quantize_with_accuracy_control_impl( if advanced_accuracy_restorer_parameters is None: advanced_accuracy_restorer_parameters = AdvancedAccuracyRestorerParameters() - compress_weights = is_weight_compression_needed(advanced_quantization_parameters) - if advanced_quantization_parameters is None: copied_parameters = AdvancedQuantizationParameters() else: copied_parameters = deepcopy(advanced_quantization_parameters) - copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False + + compress_weights = is_weight_compression_needed(copied_parameters) + copied_parameters.compress_weights = False quantized_model = quantize_impl( model=model, diff --git a/src/nncf/quantization/advanced_parameters.py b/src/nncf/quantization/advanced_parameters.py index a7b42fd7209..264e61785ad 100644 --- a/src/nncf/quantization/advanced_parameters.py +++ b/src/nncf/quantization/advanced_parameters.py @@ -256,6 +256,8 @@ class AdvancedQuantizationParameters: :type smooth_quant_alpha: float :param backend_params: Backend-specific parameters. :type backend_params: dict[str, Any] + :param compress_weights: Indicates whether to apply weight compression after quantization. + :type compress_weights: bool """ # General parameters @@ -270,6 +272,7 @@ class AdvancedQuantizationParameters: activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE + compress_weights: bool = True # Range estimator parameters activations_range_estimator_params: RangeEstimatorParameters = field(default_factory=RangeEstimatorParameters) @@ -287,6 +290,20 @@ class AdvancedQuantizationParameters: backend_params: dict[str, Any] = field(default_factory=dict) +def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool: + """ + Determine whether weight compression is needed based on advanced quantization parameters. + + If `advanced_parameters` or its `backend_params` are not provided, defaults to True. + + :param advanced_parameters: Advanced quantization parameters. + :return: True if weight compression is needed, False otherwise. + """ + if advanced_parameters is not None: + return advanced_parameters.compress_weights + return True + + @api() @dataclass class AdvancedAWQParameters: diff --git a/tests/torch2/fx/test_models.py b/tests/torch2/fx/test_models.py index 2977953a0e6..d23ad2140b8 100644 --- a/tests/torch2/fx/test_models.py +++ b/tests/torch2/fx/test_models.py @@ -33,7 +33,6 @@ from nncf.common.utils.os import safe_open from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node -from nncf.experimental.torch.fx.quantization.backend_parameters import FXBackendParameters from nncf.experimental.torch.fx.transformations import DEQUANTIZE_NODE_TARGETS from nncf.experimental.torch.fx.transformations import _get_node_inputs from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -219,7 +218,8 @@ def transform_fn(data_item): calibration_dataset = nncf.Dataset([example_input], transform_fn) quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters( - disable_bias_correction=True, backend_params={FXBackendParameters.COMPRESS_WEIGHTS: compress_weights} + disable_bias_correction=True, + compress_weights=compress_weights, ) quantization_parameters["subset_size"] = 1 From b34c575176ecb06be4ab862f576f0ae4bed4f7e9 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 6 Oct 2025 10:06:43 +0200 Subject: [PATCH 2/3] Apply changes for ONNX --- src/nncf/onnx/quantization/backend_parameters.py | 14 -------------- src/nncf/onnx/quantization/quantize_model.py | 5 ++--- tests/onnx/test_passes.py | 3 +-- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/src/nncf/onnx/quantization/backend_parameters.py b/src/nncf/onnx/quantization/backend_parameters.py index 7673b8e452c..31259b6a0c3 100644 --- a/src/nncf/onnx/quantization/backend_parameters.py +++ b/src/nncf/onnx/quantization/backend_parameters.py @@ -24,23 +24,9 @@ class BackendParameters: is skipped. """ - COMPRESS_WEIGHTS = "compress_weights" EXTERNAL_DATA_DIR = "external_data_dir" -def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool: - """ - Determines whether weight compression is needed based on the provided - advanced quantization parameters. - - :param advanced_parameters: Advanced quantization parameters. - :return: `True` if weight compression is needed, `False` otherwise. - """ - if advanced_parameters is not None and advanced_parameters.backend_params is not None: - return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True) - return True - - def get_external_data_dir( advanced_parameters: Optional[Union[AdvancedQuantizationParameters, AdvancedCompressionParameters]], ) -> Optional[str]: diff --git a/src/nncf/onnx/quantization/quantize_model.py b/src/nncf/onnx/quantization/quantize_model.py index 40321d66101..073ec299392 100644 --- a/src/nncf/onnx/quantization/quantize_model.py +++ b/src/nncf/onnx/quantization/quantize_model.py @@ -32,9 +32,7 @@ from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.onnx.graph.passes import apply_preprocess_passes from nncf.onnx.graph.passes import compress_quantize_weights_transformation -from nncf.onnx.quantization.backend_parameters import BackendParameters from nncf.onnx.quantization.backend_parameters import get_external_data_dir -from nncf.onnx.quantization.backend_parameters import is_weight_compression_needed from nncf.parameters import BackupMode from nncf.parameters import CompressionFormat from nncf.parameters import CompressWeightsMode @@ -47,6 +45,7 @@ from nncf.quantization.advanced_parameters import AdvancedCompressionParameters from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import QuantizationParameters +from nncf.quantization.advanced_parameters import is_weight_compression_needed from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator @@ -215,7 +214,7 @@ def quantize_with_accuracy_control_impl( copied_parameters = AdvancedQuantizationParameters() else: copied_parameters = deepcopy(advanced_quantization_parameters) - copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False + copied_parameters.compress_weights = False quantized_model = quantize_impl( model=model, diff --git a/tests/onnx/test_passes.py b/tests/onnx/test_passes.py index 262fda4b68e..190dfae898a 100644 --- a/tests/onnx/test_passes.py +++ b/tests/onnx/test_passes.py @@ -14,7 +14,6 @@ import nncf from nncf.onnx.graph.passes import apply_preprocess_passes from nncf.onnx.graph.passes import compress_quantize_weights_transformation -from nncf.onnx.quantization.backend_parameters import BackendParameters from tests.onnx.common import ModelBuilder from tests.onnx.models import build_matmul_model_with_nop_cast @@ -60,7 +59,7 @@ def test_compress_quantize_weights_transformation(): model, calibration_dataset, advanced_parameters=nncf.AdvancedQuantizationParameters( - backend_params={BackendParameters.COMPRESS_WEIGHTS: False} + compress_weights=False, ), ) From f3630f54105f8a042ac4a35dd7922a8b7c2186ba Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 6 Oct 2025 11:07:44 +0200 Subject: [PATCH 3/3] Actualize docstring --- src/nncf/onnx/quantization/backend_parameters.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/nncf/onnx/quantization/backend_parameters.py b/src/nncf/onnx/quantization/backend_parameters.py index 31259b6a0c3..6e1a5c095df 100644 --- a/src/nncf/onnx/quantization/backend_parameters.py +++ b/src/nncf/onnx/quantization/backend_parameters.py @@ -19,9 +19,6 @@ class BackendParameters: """ :param EXTERNAL_DATA_DIR: An absolute path to the directory where the external data files are stored. All external data files must be located in the same folder. - :param COMPRESS_WEIGHTS: If `True` compresses constant quantized weights by folding - `QuantizeLinear` nodes into pre-quantized initializers. If `False`, this transformation - is skipped. """ EXTERNAL_DATA_DIR = "external_data_dir"