File tree Expand file tree Collapse file tree 3 files changed +5
-5
lines changed Expand file tree Collapse file tree 3 files changed +5
-5
lines changed Original file line number Diff line number Diff line change 3333 version = 2 ,
3434)
3535
36+ # only 128 group_size is supported
3637FP8_ACT_CONFIG = Float8DynamicActivationInt4WeightConfig (
37- group_size = 128 ,
3838 packing_format = "preshuffled" ,
3939)
4040
Original file line number Diff line number Diff line change @@ -1927,7 +1927,7 @@ def test_quantize_api_fp8_int4(self):
19271927 quantize_(model, QATConfig(Float8DynamicActivationInt4WeightConfig(), step="convert"))
19281928 """
19291929 self ._test_quantize_api_against_ptq (
1930- Float8DynamicActivationInt4WeightConfig (group_size = 128 ),
1930+ Float8DynamicActivationInt4WeightConfig (),
19311931 target_prepare_sqnr = 15 ,
19321932 target_convert_sqnr = float ("inf" ),
19331933 )
Original file line number Diff line number Diff line change @@ -1156,13 +1156,13 @@ def _int4_weight_only_transform(
11561156class Float8DynamicActivationInt4WeightConfig (AOBaseConfig ):
11571157 """Configuration for apply float8 dynamic per row quantization and int4
11581158 per group weight quantization to linear
1159+ (only group_size 128 is supported right now since underlying kernel used only supports 128
1160+ and above and no benefits of making it bigger)
11591161
11601162 Args:
1161- `group_size`: group size for groupwise quantization for weight
11621163 `packing_format`: how the weight is packed, only preshuffled is supported
11631164 """
11641165
1165- group_size : int = 128
11661166 packing_format : PackingFormat = "preshuffled"
11671167
11681168
@@ -1174,13 +1174,13 @@ def _float8_dynamic_activation_int4_weight_transform(
11741174 "applying int8 weight only quant requires module to have weight attribute"
11751175 + " but {module} does not have one"
11761176 )
1177- group_size = config .group_size
11781177 packing_format = config .packing_format
11791178
11801179 assert packing_format == "preshuffled" , (
11811180 f"only preshuffled packing_format supported right now, got: { packing_format } "
11821181 )
11831182 weight = module .weight
1183+ group_size = 128
11841184 block_size = tuple ([1 for _ in range (weight .ndim - 1 )] + [group_size ])
11851185 new_weight = Int4PreshuffledTensor .from_hp (
11861186 module .weight ,
You can’t perform that action at this time.
0 commit comments