vllm-project · kylesayrs · Feb 11, 2025 · Feb 11, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -169,6 +169,7 @@ def __init__(
         num_codebooks: int,
         out_group_size: int,
     ) -> None:
+        super().__init__()
         self.in_group_size = in_group_size
         self.nbits_per_codebook = nbits_per_codebook
         self.num_codebooks = num_codebooks

@@ -26,6 +26,7 @@ def __init__(
         zero_point: bool,
         modules_to_not_convert: Optional[List[str]] = None,
     ) -> None:
+        super().__init__()
         self.weight_bits = weight_bits
         self.group_size = group_size
         self.zero_point = zero_point

@@ -47,6 +47,7 @@ def __init__(self, weight_bits: int, group_size: int, zero_point: bool,
                  lm_head_quantized: bool,
                  modules_to_not_convert: Optional[List[str]],
                  full_config: Dict[str, Any]) -> None:
+        super().__init__()
         self.pack_factor = 32 // weight_bits  # packed into int32
         self.group_size = group_size
         self.zero_point = zero_point

@@ -2,7 +2,7 @@
 
 import inspect
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Mapping, Optional, Type
+from typing import Any, Dict, List, Optional, Type
 
 import torch
 from torch import nn
@@ -59,7 +59,10 @@ def method_has_implemented_embedding(
 
 class QuantizationConfig(ABC):
     """Base class for quantization configs."""
-    packed_modules_mapping: Mapping[str, List[str]] = dict()
+
+    def __init__(self):
+        super().__init__()
+        self.packed_modules_mapping: Dict[str, List[str]] = dict()
 
     @abstractmethod
     def get_name(self) -> str:

@@ -30,7 +30,7 @@ def __init__(
         llm_int8_skip_modules: Optional[List[str]] = None,
         llm_int8_threshold: float = 6.0,
     ) -> None:
-
+        super().__init__()
         self.load_in_8bit = load_in_8bit
         self.load_in_4bit = load_in_4bit
         self.bnb_4bit_compute_dtype = bnb_4bit_compute_dtype

@@ -51,7 +51,7 @@ def __init__(
         kv_cache_scheme: Optional[Dict[str, Any]] = None,
         config: Optional[Dict[str, Any]] = None,
     ):
-
+        super().__init__()
         self.ignore = ignore
         self.quant_format = quant_format
         # Map from [target -> scheme]

@@ -25,6 +25,7 @@ def __init__(
         weight_bits: int = 8,
         group_size: int = 512,
     ) -> None:
+        super().__init__()
         self.weight_bits = weight_bits
         self.group_size = group_size
         self.valid_types = [torch.bfloat16, torch.float16]

@@ -17,7 +17,7 @@ class ExpertsInt8Config(QuantizationConfig):
     """Config class for Int8 experts quantization."""
 
     def __init__(self) -> None:
-        pass
+        super().__init__()
 
     @classmethod
     def get_name(cls) -> str:

@@ -29,6 +29,7 @@ class FBGEMMFp8Config(QuantizationConfig):
     """Config class for FBGEMM Fp8."""
 
     def __init__(self, ignore_list: List[str], input_scale_ub: float):
+        super().__init__()
         self.ignore_list = ignore_list if ignore_list else []
         self.input_scale_ub = input_scale_ub
 

@@ -47,6 +47,7 @@ def __init__(
         ignored_layers: Optional[List[str]] = None,
         weight_block_size: Optional[List[int]] = None,
     ) -> None:
+        super().__init__()
         self.is_checkpoint_fp8_serialized = is_checkpoint_fp8_serialized
         if is_checkpoint_fp8_serialized:
             logger.warning("Detected fp8 checkpoint. Please note that the "

@@ -20,7 +20,7 @@ class GGUFConfig(QuantizationConfig):
     """Config class for GGUF."""
 
     def __init__(self, ) -> None:
-        pass
+        super().__init__()
 
     def __repr__(self) -> str:
         return ("GGUFConfig()")

@@ -58,6 +58,7 @@ def __init__(
         #  r"+:.*\.(?:1[6-9]|20|21)\..*": {"bits": 8, "group_size": 64,},
         #  r"-:.*\.moe\..*": {}, # negative match (skip) all `moe` layers
         # }
+        super().__init__()
         self.dynamic = dynamic
 
         self.weight_bits = weight_bits

@@ -46,6 +46,7 @@ def __init__(self, weight_bits: int, group_size: int, desc_act: bool,
                  is_sym: bool, lm_head_quantized: bool,
                  dynamic: Dict[str, Dict[str, Union[int, bool]]],
                  full_config: Dict[str, Any]) -> None:
+        super().__init__()
         if desc_act and group_size == -1:
             # In this case, act_order == True is the same as act_order == False
             # (since we have only one group per output channel)

@@ -38,6 +38,7 @@ def __init__(
         weight_bits: int,
         group_size: int,
     ) -> None:
+        super().__init__()
         quant_type = {
             4: scalar_types.uint4b8,
             8: scalar_types.uint8b128,

@@ -33,6 +33,7 @@ def __init__(
         group_size: int,
         skip_modules: Optional[List[str]] = None,
     ) -> None:
+        super().__init__()
         assert group_size == 64, ("The only supported HQQ group size is "
                                   "currently 64.")
         assert weight_bits == 4, ("The only supported HQQ quantization "

@@ -35,6 +35,7 @@ def __init__(
         desc_act: Optional[bool] = None,
         lm_head_quantized: Optional[bool] = None,
     ) -> None:
+        super().__init__()
         self.method = method
         self.weight_bits = weight_bits
         self.group_size = group_size

@@ -28,6 +28,7 @@ def __init__(
         self,
         is_checkpoint_fp8_serialized: bool = False,
     ) -> None:
+        super().__init__()
         self.is_checkpoint_fp8_serialized = is_checkpoint_fp8_serialized
         if is_checkpoint_fp8_serialized:
             logger.warning("Detected ModelOpt fp8 checkpoint. Please note that"

@@ -24,6 +24,7 @@ def __init__(self, linear_quant_method: str, weight_bits: int,
                  group_size: int, has_zp: bool, lm_head_quantized: bool,
                  modules_to_not_convert: Optional[List[str]],
                  full_config: Dict[str, Any]) -> None:
+        super().__init__()
         self.weight_bits = weight_bits
         self.group_size = group_size
         self.has_zp = has_zp

@@ -20,6 +20,7 @@ def __init__(
         dequant_dtype: str = "f16",
         quantize_method: str = "vector_dynamic",
     ) -> None:
+        super().__init__()
         self.quant_dtype = os.getenv("NEURON_QUANT_DTYPE", "s8")
         if self.quant_dtype not in SUPPORTED_QUANT_DTYPE_LIST:
             raise ValueError(

@@ -39,6 +39,7 @@ def __init__(
         group_size: int,
         is_sym: bool = True,
     ) -> None:
+        super().__init__()
         self.weight_bits = weight_bits
         self.group_size = group_size
         self.is_sym = is_sym

@@ -30,6 +30,7 @@ def __init__(self,
                  kv_cache_group: Optional[List[str]] = None,
                  kv_cache_config: Optional[Dict[str, Any]] = None,
                  pack_method: str = "reorder"):
+        super().__init__()
         if kv_cache_group is None:
             kv_cache_group = []
         self.quant_config = quant_config

@@ -21,6 +21,7 @@ def __init__(
         self,
         activation_scheme: str = "none",
     ) -> None:
+        super().__init__()
         if activation_scheme not in ACTIVATION_SCHEMES:
             raise ValueError(
                 f"Unsupported activation scheme {activation_scheme}")