Rename to StretchedIntxWeightConfig

lisjin · lisjin · commit e33264aeea4a · 2025-09-10T17:47:47.000-07:00
diff --git a/test/prototype/test_dynamic_activation_lut.py b/test/prototype/test_dynamic_activation_lut.py
@@ -12,7 +12,7 @@
 import torch
 
 from torchao.prototype.parq.quant import (
-    Int8DynamicActivationStretchedIntxWeightConfig,
+    StretchedIntxWeightConfig,
     StretchedUnifTorchaoQuantizer,
 )
 from torchao.prototype.quantization.dynamic_activation_lut import (
@@ -63,12 +63,13 @@ def run_before_and_after_tests():
 def test_parq_conversion(dtype, granularity, bit_width, lead_dim):
     torch.manual_seed(0)
     quantizer = StretchedUnifTorchaoQuantizer(bit_width)
-    config = Int8DynamicActivationStretchedIntxWeightConfig(
+    config = StretchedIntxWeightConfig(
         b=bit_width,
         quant_min=quantizer.quant_min,
         quant_max=quantizer.quant_max,
         granularity=granularity,
         activation_quantization=None,
+        version=1,
     )
 
     parq_model = ToyLinearModel(128, 256, 128, 1).to(dtype)
@@ -114,12 +115,13 @@ def test_parq_conversion(dtype, granularity, bit_width, lead_dim):
 @pytest.mark.skipif(not is_arm64_mac, reason="requires arm64 mac")
 def test_export(dtype, granularity, bit_width, lead_dim):
     quantizer = StretchedUnifTorchaoQuantizer(bit_width)
-    config = Int8DynamicActivationStretchedIntxWeightConfig(
+    config = StretchedIntxWeightConfig(
         b=bit_width,
         quant_min=quantizer.quant_min,
         quant_max=quantizer.quant_max,
         granularity=granularity,
         activation_quantization=None,
+        version=1,
     )
 
     parq_model = ToyLinearModel(128, 256, 128, 8).to(dtype)
diff --git a/test/prototype/test_parq.py b/test/prototype/test_parq.py
@@ -19,9 +19,9 @@
 )
 from torchao.prototype.parq.quant import (
     Int4UnifTorchaoQuantizer,
-    Int8DynamicActivationStretchedIntxWeightConfig,
     LSBQuantizer,
     Quantizer,
+    StretchedIntxWeightConfig,
     StretchedUnifTorchaoQuantizer,
     TernaryUnifQuantizer,
     UnifQuantizer,
@@ -237,11 +237,14 @@ class TestUnifTorchaoQuantizer(common_utils.TestCase):
     def setUp(self):
         torch.manual_seed(123)
 
-    @unittest.skipIf(not torch_version_at_least("2.8.0"), "Need pytorch >= 2.8.0")
     @unittest.skipIf(
-        torch.cuda.is_available()
-        and (not is_sm_at_least_90() or not _is_fbgemm_genai_gpu_available()),
-        "Requires sm90+ and fbgemm-gpu-genai >= 1.2.0 if GPU available",
+        _DEVICE == "cuda"
+        and (
+            not torch_version_at_least("2.8.0")
+            or not is_sm_at_least_90()
+            or not _is_fbgemm_genai_gpu_available()
+        ),
+        "Requires pytorch >= 2.8.0, sm90+ and fbgemm-gpu-genai >= 1.2.0 on GPU",
     )
     @common_utils.parametrize("group_size", [32, 256])
     def test_int4_weight_only(self, group_size: int = 32):
@@ -361,7 +364,7 @@ def test_intx_weight_only(self, b: int = 2, group_size: int = 32):
         m_ref = copy.deepcopy(model).eval().to(_DEVICE)
         quantize_(
             m_ref,
-            Int8DynamicActivationStretchedIntxWeightConfig(
+            StretchedIntxWeightConfig(
                 b=b,
                 quant_min=quantizer.quant_min,
                 quant_max=quantizer.quant_max,
@@ -381,7 +384,7 @@ def test_intx_weight_only_e2e(self, b: int = 2, group_size: int = 32):
         quantizer = StretchedUnifTorchaoQuantizer(b)
 
         m_ref = copy.deepcopy(model).eval().to(_DEVICE)
-        config = Int8DynamicActivationStretchedIntxWeightConfig(
+        config = StretchedIntxWeightConfig(
             b=b,
             quant_min=quantizer.quant_min,
             quant_max=quantizer.quant_max,
diff --git a/torchao/prototype/parq/quant/__init__.py b/torchao/prototype/parq/quant/__init__.py
@@ -4,9 +4,7 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
-from .config_torchao import (  # noqa: F401
-    Int8DynamicActivationStretchedIntxWeightConfig,
-)
+from .config_torchao import StretchedIntxWeightConfig  # noqa: F401
 from .lsbq import LSBQuantizer  # noqa: F401
 from .quantizer import Quantizer  # noqa: F401
 from .uniform import (  # noqa: F401
diff --git a/torchao/prototype/parq/quant/config_torchao.py b/torchao/prototype/parq/quant/config_torchao.py
@@ -42,7 +42,7 @@
 
 
 @dataclass
-class Int8DynamicActivationStretchedIntxWeightConfig(AOBaseConfig):
+class StretchedIntxWeightConfig(AOBaseConfig):
     granularity: Granularity = PerAxis(0)
     scale_dtype: Optional[torch.dtype] = None
     layout: Layout = QDQLayout()
@@ -53,16 +53,16 @@ class Int8DynamicActivationStretchedIntxWeightConfig(AOBaseConfig):
     activation_quantization: Optional[str] = "int8_asym_per_token"
 
 
-@register_quantize_module_handler(Int8DynamicActivationStretchedIntxWeightConfig)
+@register_quantize_module_handler(StretchedIntxWeightConfig)
 def _int8_dynamic_activation_stretched_intx_transform(
-    module: nn.Module, config: Int8DynamicActivationStretchedIntxWeightConfig
+    module: nn.Module, config: StretchedIntxWeightConfig
 ) -> nn.Module:
     weight = module.weight
     granularity = config.granularity
     mapping_type = MappingType.ASYMMETRIC
 
     assert weight.dim() == 2, (
-        f"Int8DynamicActivationStretchedIntxWeightConfig only works for 2-d Tensor, got: {weight.dim()}"
+        f"StretchedIntxWeightConfig only works for 2-d Tensor, got: {weight.dim()}"
     )
     if isinstance(granularity, PerGroup):
         group_size = granularity.group_size
@@ -138,9 +138,8 @@ def _get_config_from_quantizer(
         )
         if check_cpu_version(device):
             config.layout = Int4CPULayout()
-            config.version = 1
     elif isinstance(quantizer, StretchedUnifTorchaoQuantizer):
-        config = Int8DynamicActivationStretchedIntxWeightConfig(
+        config = StretchedIntxWeightConfig(
             b=b,
             quant_min=quantizer.quant_min,
             quant_max=quantizer.quant_max,
@@ -164,6 +163,8 @@ def _get_config_from_quantizer(
             act_mapping_type=MappingType.ASYMMETRIC,
             version=version,
         )
+    if check_cpu_version(device):
+        config.version = 1
     return config