Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from nncf.common.logging import nncf_logger
from nncf.common.quantization.structs import QuantizationPreset
from nncf.data import Dataset
from nncf.experimental.torch.fx.quantization.backend_parameters import is_weight_compression_needed
from nncf.experimental.torch.fx.transformations import DuplicateDQPassNoAnnotations
from nncf.experimental.torch.fx.transformations import apply_quantization_transformations
from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation
Expand All @@ -39,6 +38,7 @@
from nncf.parameters import TargetDevice
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
from nncf.quantization.advanced_parameters import is_weight_compression_needed
from nncf.quantization.algorithms.post_training.algorithm import PostTrainingQuantization
from nncf.quantization.algorithms.weight_compression.algorithm import WeightCompression
from nncf.scopes import IgnoredScope
Expand Down
17 changes: 0 additions & 17 deletions src/nncf/onnx/quantization/backend_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,11 @@ class BackendParameters:
"""
:param EXTERNAL_DATA_DIR: An absolute path to the directory where the external data
files are stored. All external data files must be located in the same folder.
:param COMPRESS_WEIGHTS: If `True` compresses constant quantized weights by folding
`QuantizeLinear` nodes into pre-quantized initializers. If `False`, this transformation
is skipped.
"""

COMPRESS_WEIGHTS = "compress_weights"
EXTERNAL_DATA_DIR = "external_data_dir"


def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool:
"""
Determines whether weight compression is needed based on the provided
advanced quantization parameters.

:param advanced_parameters: Advanced quantization parameters.
:return: `True` if weight compression is needed, `False` otherwise.
"""
if advanced_parameters is not None and advanced_parameters.backend_params is not None:
return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True)
return True


def get_external_data_dir(
advanced_parameters: Optional[Union[AdvancedQuantizationParameters, AdvancedCompressionParameters]],
) -> Optional[str]:
Expand Down
5 changes: 2 additions & 3 deletions src/nncf/onnx/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
from nncf.onnx.graph.nncf_graph_builder import GraphConverter
from nncf.onnx.graph.passes import apply_preprocess_passes
from nncf.onnx.graph.passes import compress_quantize_weights_transformation
from nncf.onnx.quantization.backend_parameters import BackendParameters
from nncf.onnx.quantization.backend_parameters import get_external_data_dir
from nncf.onnx.quantization.backend_parameters import is_weight_compression_needed
from nncf.parameters import BackupMode
from nncf.parameters import CompressionFormat
from nncf.parameters import CompressWeightsMode
Expand All @@ -47,6 +45,7 @@
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
from nncf.quantization.advanced_parameters import QuantizationParameters
from nncf.quantization.advanced_parameters import is_weight_compression_needed
from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer
from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop
from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator
Expand Down Expand Up @@ -215,7 +214,7 @@ def quantize_with_accuracy_control_impl(
copied_parameters = AdvancedQuantizationParameters()
else:
copied_parameters = deepcopy(advanced_quantization_parameters)
copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False
copied_parameters.compress_weights = False

quantized_model = quantize_impl(
model=model,
Expand Down
18 changes: 0 additions & 18 deletions src/nncf/openvino/quantization/backend_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters


class BackendParameters:
COMPRESS_WEIGHTS = "compress_weights"
STAT_REQUESTS_NUMBER = "stat_requests_number"
EVAL_REQUESTS_NUMBER = "eval_requests_number"
ACTIVATIONS = "activations"
WEIGHTS = "weights"
LEVEL_LOW = "level_low"
LEVEL_HIGH = "level_high"


def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool:
"""
Determines whether weight compression is needed based on the provided
advanced quantization parameters.

:param advanced_parameters: Advanced quantization parameters.
:return: True if weight compression is needed, False otherwise.
"""
if advanced_parameters is not None and advanced_parameters.backend_params is not None:
return advanced_parameters.backend_params.get(BackendParameters.COMPRESS_WEIGHTS, True)
return True
9 changes: 4 additions & 5 deletions src/nncf/openvino/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
from nncf.openvino.graph.model_utils import remove_friendly_name_duplicates
from nncf.openvino.graph.nncf_graph_builder import GraphConverter
from nncf.openvino.graph.node_utils import get_number_if_op
from nncf.openvino.quantization.backend_parameters import BackendParameters
from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed
from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies
from nncf.openvino.rt_info import dump_parameters
from nncf.parameters import BackupMode
Expand All @@ -43,6 +41,7 @@
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
from nncf.quantization.advanced_parameters import convert_to_dict_recursively
from nncf.quantization.advanced_parameters import is_weight_compression_needed
from nncf.quantization.algorithms.accuracy_control.algorithm import QuantizationAccuracyRestorer
from nncf.quantization.algorithms.accuracy_control.algorithm import calculate_accuracy_drop
from nncf.quantization.algorithms.accuracy_control.evaluator import Evaluator
Expand Down Expand Up @@ -211,13 +210,13 @@ def quantize_with_accuracy_control_impl(
if advanced_accuracy_restorer_parameters is None:
advanced_accuracy_restorer_parameters = AdvancedAccuracyRestorerParameters()

compress_weights = is_weight_compression_needed(advanced_quantization_parameters)

if advanced_quantization_parameters is None:
copied_parameters = AdvancedQuantizationParameters()
else:
copied_parameters = deepcopy(advanced_quantization_parameters)
copied_parameters.backend_params[BackendParameters.COMPRESS_WEIGHTS] = False

compress_weights = is_weight_compression_needed(copied_parameters)
copied_parameters.compress_weights = False

quantized_model = quantize_impl(
model=model,
Expand Down
17 changes: 17 additions & 0 deletions src/nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ class AdvancedQuantizationParameters:
:type smooth_quant_alpha: float
:param backend_params: Backend-specific parameters.
:type backend_params: dict[str, Any]
:param compress_weights: Indicates whether to apply weight compression after quantization.
:type compress_weights: bool
"""

# General parameters
Expand All @@ -270,6 +272,7 @@ class AdvancedQuantizationParameters:
activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None
weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None
quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE
compress_weights: bool = True

# Range estimator parameters
activations_range_estimator_params: RangeEstimatorParameters = field(default_factory=RangeEstimatorParameters)
Expand All @@ -287,6 +290,20 @@ class AdvancedQuantizationParameters:
backend_params: dict[str, Any] = field(default_factory=dict)


def is_weight_compression_needed(advanced_parameters: Optional[AdvancedQuantizationParameters]) -> bool:
"""
Determine whether weight compression is needed based on advanced quantization parameters.

If `advanced_parameters` or its `backend_params` are not provided, defaults to True.

:param advanced_parameters: Advanced quantization parameters.
:return: True if weight compression is needed, False otherwise.
"""
if advanced_parameters is not None:
return advanced_parameters.compress_weights
return True


@api()
@dataclass
class AdvancedAWQParameters:
Expand Down
3 changes: 1 addition & 2 deletions tests/onnx/test_passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import nncf
from nncf.onnx.graph.passes import apply_preprocess_passes
from nncf.onnx.graph.passes import compress_quantize_weights_transformation
from nncf.onnx.quantization.backend_parameters import BackendParameters
from tests.onnx.common import ModelBuilder
from tests.onnx.models import build_matmul_model_with_nop_cast

Expand Down Expand Up @@ -60,7 +59,7 @@ def test_compress_quantize_weights_transformation():
model,
calibration_dataset,
advanced_parameters=nncf.AdvancedQuantizationParameters(
backend_params={BackendParameters.COMPRESS_WEIGHTS: False}
compress_weights=False,
),
)

Expand Down
4 changes: 2 additions & 2 deletions tests/torch2/fx/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from nncf.common.utils.os import safe_open
from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
from nncf.experimental.torch.fx.node_utils import get_tensor_constant_from_node
from nncf.experimental.torch.fx.quantization.backend_parameters import FXBackendParameters
from nncf.experimental.torch.fx.transformations import DEQUANTIZE_NODE_TARGETS
from nncf.experimental.torch.fx.transformations import _get_node_inputs
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
Expand Down Expand Up @@ -219,7 +218,8 @@ def transform_fn(data_item):
calibration_dataset = nncf.Dataset([example_input], transform_fn)

quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(
disable_bias_correction=True, backend_params={FXBackendParameters.COMPRESS_WEIGHTS: compress_weights}
disable_bias_correction=True,
compress_weights=compress_weights,
)
quantization_parameters["subset_size"] = 1

Expand Down