From b920d77fcddb6040191e1ea409ee7c49e053f408 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 15 Jul 2024 14:57:01 +0800 Subject: [PATCH 1/5] support woq tuning Signed-off-by: Kaihui-intel --- .../torch/algorithms/weight_only/utility.py | 6 +++++- neural_compressor/torch/quantization/autotune.py | 1 - neural_compressor/torch/quantization/config.py | 9 ++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index 0cb6d6d938d..8f46b778ec5 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -1105,7 +1105,11 @@ def __iter__(self): if not args: yield kwargs elif not kwargs: - yield args + # case: tensor + if len(args) == 1: + yield args[0] + else: + yield args else: yield args, kwargs diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index bdcbf642e47..f68144c1339 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -37,7 +37,6 @@ def get_rtn_double_quant_config_set() -> List[RTNConfig]: rtn_double_quant_config_set.append(RTNConfig.from_dict(double_quant_config)) return rtn_double_quant_config_set - def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: return get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 9014f1576a3..7d5148ef8df 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -723,7 +723,7 @@ def __init__( minmax_lr: float = None, low_gpu_mem_usage: bool = True, iters: int = 200, - seqlen: int = 2048, + seqlen: int = 512, n_samples: int = 512, sampler: str = "rand", seed: int = 42, @@ -1485,13 +1485,12 @@ def get_all_registered_configs() -> Dict[str, BaseConfig]: ######################## WOQ Tuning Config ############################### def get_woq_tuning_config() -> list: """Generate the config set for WOQ tuning. - + Returns: the list of WOQ quant config. """ RTN_G32ASYM = RTNConfig(use_sym=False, group_size=32) + AUTO_ROUND_CONFIG = AutoRoundConfig(use_sym=False, group_size=32) GPTQ_G32ASYM = GPTQConfig(use_sym=False, group_size=32) - GPTQ_G32ASYM_DISABLE_LAST_LINEAR = GPTQConfig(use_sym=False).set_local("*.lm_head", GPTQConfig(dtype="fp32")) - GPTQ_G128ASYM = GPTQConfig(group_size=128, use_sym=False) AWQ_G32ASYM = AWQConfig(use_sym=False, group_size=32) - return [RTN_G32ASYM, GPTQ_G32ASYM, GPTQ_G32ASYM_DISABLE_LAST_LINEAR, GPTQ_G128ASYM, AWQ_G32ASYM] + return [RTN_G32ASYM, AUTO_ROUND_CONFIG, GPTQ_G32ASYM, AWQ_G32ASYM] \ No newline at end of file From 2ba9e1d0c9ad21b2ffa8aeecb9a0d8a9c4533a21 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 07:02:33 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/quantization/autotune.py | 1 + neural_compressor/torch/quantization/config.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index f68144c1339..bdcbf642e47 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -37,6 +37,7 @@ def get_rtn_double_quant_config_set() -> List[RTNConfig]: rtn_double_quant_config_set.append(RTNConfig.from_dict(double_quant_config)) return rtn_double_quant_config_set + def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: return get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 7d5148ef8df..da93dff09c1 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -1485,7 +1485,7 @@ def get_all_registered_configs() -> Dict[str, BaseConfig]: ######################## WOQ Tuning Config ############################### def get_woq_tuning_config() -> list: """Generate the config set for WOQ tuning. - + Returns: the list of WOQ quant config. """ @@ -1493,4 +1493,4 @@ def get_woq_tuning_config() -> list: AUTO_ROUND_CONFIG = AutoRoundConfig(use_sym=False, group_size=32) GPTQ_G32ASYM = GPTQConfig(use_sym=False, group_size=32) AWQ_G32ASYM = AWQConfig(use_sym=False, group_size=32) - return [RTN_G32ASYM, AUTO_ROUND_CONFIG, GPTQ_G32ASYM, AWQ_G32ASYM] \ No newline at end of file + return [RTN_G32ASYM, AUTO_ROUND_CONFIG, GPTQ_G32ASYM, AWQ_G32ASYM] From 65cde923c2b8404d692b62544a1285e9dda0a7f2 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 15 Jul 2024 16:56:43 +0800 Subject: [PATCH 3/5] update ut Signed-off-by: Kaihui-intel --- test/3x/torch/quantization/weight_only/test_woq_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/3x/torch/quantization/weight_only/test_woq_utils.py b/test/3x/torch/quantization/weight_only/test_woq_utils.py index c31d94b823d..69d5545e2ef 100644 --- a/test/3x/torch/quantization/weight_only/test_woq_utils.py +++ b/test/3x/torch/quantization/weight_only/test_woq_utils.py @@ -169,7 +169,7 @@ def test_captured_dataloader_iteration(self): result = list(dataloader) - assert result == [(1,), (2,), (3,)] + assert result == [1, 2, 3] # Test case when both args and kwargs are present args_list = [(1,), (2,), (3,)] From 8d7dab56b19cce52af12affb4610cbf10d17985f Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 16 Jul 2024 13:15:21 +0800 Subject: [PATCH 4/5] improve coverage Signed-off-by: Kaihui-intel --- test/3x/torch/quantization/weight_only/test_woq_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/3x/torch/quantization/weight_only/test_woq_utils.py b/test/3x/torch/quantization/weight_only/test_woq_utils.py index 69d5545e2ef..45e29553fef 100644 --- a/test/3x/torch/quantization/weight_only/test_woq_utils.py +++ b/test/3x/torch/quantization/weight_only/test_woq_utils.py @@ -170,6 +170,15 @@ def test_captured_dataloader_iteration(self): result = list(dataloader) assert result == [1, 2, 3] + + # Test case when kwargs is empty + args_list = [(1, 2), (2, 3), (3, 4)] + kwargs_list = [{}, {}, {}] + dataloader = CapturedDataloader(args_list, kwargs_list) + + result = list(dataloader) + + assert result == [(1, 2), (2, 3), (3, 4)] # Test case when both args and kwargs are present args_list = [(1,), (2,), (3,)] From 26d6c8ea75a6f3c2e15a3d4734fab897bebbdbe3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 05:22:08 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/3x/torch/quantization/weight_only/test_woq_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/3x/torch/quantization/weight_only/test_woq_utils.py b/test/3x/torch/quantization/weight_only/test_woq_utils.py index 45e29553fef..3bee40696c8 100644 --- a/test/3x/torch/quantization/weight_only/test_woq_utils.py +++ b/test/3x/torch/quantization/weight_only/test_woq_utils.py @@ -170,7 +170,7 @@ def test_captured_dataloader_iteration(self): result = list(dataloader) assert result == [1, 2, 3] - + # Test case when kwargs is empty args_list = [(1, 2), (2, 3), (3, 4)] kwargs_list = [{}, {}, {}]