From 2609498a1316072d72ebe57244cb04d4b936550d Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 1 Apr 2024 14:10:21 +0800 Subject: [PATCH 1/3] fix supported layer checking Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/rtn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index 758533e5356..98e2f45ebcc 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -81,7 +81,7 @@ def rtn_quantize( model.to(device) assert isinstance(model, torch.nn.Module), "only support torch module" - supported_layers = ["Linear"] + supported_layers = [torch.nn.Linear] # initialize global configuration double_quant_config = { "double_quant": kwargs.get("use_double_quant", False), @@ -93,7 +93,7 @@ def rtn_quantize( if export_compressed_model: use_optimum_format = kwargs.get("use_optimum_format", True) for name, m in model.named_modules(): - if m.__class__.__name__ not in supported_layers: + if not any(isinstance(m, layer) for layer in supported_layers): continue if name in weight_config: # pragma: no cover # initialize op configuration From fe7a319ec18aa83f902f39ed87e3035c45a0064d Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 1 Apr 2024 15:04:47 +0800 Subject: [PATCH 2/3] change supported layers list to tuple Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/rtn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index 98e2f45ebcc..c24dd531a23 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -81,7 +81,7 @@ def rtn_quantize( model.to(device) assert isinstance(model, torch.nn.Module), "only support torch module" - supported_layers = [torch.nn.Linear] + supported_layers = (torch.nn.Linear, ) # initialize global configuration double_quant_config = { "double_quant": kwargs.get("use_double_quant", False), @@ -93,7 +93,7 @@ def rtn_quantize( if export_compressed_model: use_optimum_format = kwargs.get("use_optimum_format", True) for name, m in model.named_modules(): - if not any(isinstance(m, layer) for layer in supported_layers): + if not isinstance(m, supported_layers): continue if name in weight_config: # pragma: no cover # initialize op configuration From 54bbca3e6e1bdeecd697ddfcc2bab8f49353edf0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 08:00:16 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/algorithms/weight_only/rtn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index c24dd531a23..e08d59a563d 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -81,7 +81,7 @@ def rtn_quantize( model.to(device) assert isinstance(model, torch.nn.Module), "only support torch module" - supported_layers = (torch.nn.Linear, ) + supported_layers = (torch.nn.Linear,) # initialize global configuration double_quant_config = { "double_quant": kwargs.get("use_double_quant", False),