Skip to content

Commit dfd083d

Browse files
authored
Support export compressed model for AutoRound [2.x] (#1648)
Signed-off-by: Kaihui-intel <[email protected]>]
1 parent 0a3d4bd commit dfd083d

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

neural_compressor/model/torch_model.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,9 @@ def export_compressed_model(
496496
gptq_config = json.load(f)
497497
else:
498498
gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}
499+
500+
autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}
501+
499502
if gptq_config:
500503
for k, v in weight_config.items():
501504
logger.debug(f"Compressing {k} on device {device}")
@@ -555,6 +558,19 @@ def export_compressed_model(
555558
)
556559
new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
557560
set_module(self.model, k, new_module)
561+
elif autoround_config:
562+
from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401
563+
564+
self.model = compress_model(
565+
self.model,
566+
weight_config=autoround_config,
567+
enable_full_range=enable_full_range,
568+
compression_dtype=compression_dtype,
569+
compression_dim=compression_dim,
570+
device=device,
571+
use_optimum_format=use_optimum_format,
572+
inplace=True,
573+
)
558574
else:
559575
for k, v in weight_config.items():
560576
logger.debug(f"Compressing {k} on device {device}")

test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,14 @@ def test_AutoRound_quant(self):
801801
self.assertTrue("scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys())
802802
self.assertTrue(torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"])
803803

804+
export_model = q_model.export_compressed_model()
805+
export_out = export_model(input)
806+
self.assertTrue(torch.allclose(out2[0], export_out[0]))
807+
from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear
808+
809+
self.assertTrue(isinstance(q_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
810+
self.assertTrue(isinstance(export_model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
811+
804812
fp32_model = copy.deepcopy(self.gptj)
805813

806814
conf = PostTrainingQuantConfig(
@@ -852,8 +860,6 @@ def test_AutoRound_quant(self):
852860
)
853861
out2 = export_model.model(input)
854862
self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-01))
855-
from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear
856-
857863
self.assertTrue(isinstance(export_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
858864

859865

0 commit comments

Comments
 (0)