From e48d86c84d808195181df1b285208aa20c33a592 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Thu, 13 Feb 2025 21:45:52 +0000 Subject: [PATCH] Remove unused padding_idx variables Signed-off-by: Tyler Michael Smith --- vllm/model_executor/models/arctic.py | 1 - vllm/model_executor/models/baichuan.py | 1 - vllm/model_executor/models/bart.py | 1 - vllm/model_executor/models/chameleon.py | 1 - vllm/model_executor/models/deepseek.py | 1 - vllm/model_executor/models/deepseek_v2.py | 1 - vllm/model_executor/models/exaone.py | 1 - vllm/model_executor/models/florence2.py | 1 - vllm/model_executor/models/fuyu.py | 1 - vllm/model_executor/models/granite.py | 1 - vllm/model_executor/models/granitemoe.py | 1 - vllm/model_executor/models/idefics3.py | 1 - vllm/model_executor/models/internlm2.py | 1 - vllm/model_executor/models/jamba.py | 1 - vllm/model_executor/models/llama.py | 1 - vllm/model_executor/models/mamba.py | 1 - vllm/model_executor/models/minicpm.py | 1 - vllm/model_executor/models/mixtral.py | 1 - vllm/model_executor/models/mixtral_quant.py | 1 - vllm/model_executor/models/mllama.py | 1 - vllm/model_executor/models/nemotron.py | 1 - vllm/model_executor/models/olmoe.py | 1 - vllm/model_executor/models/opt.py | 1 - vllm/model_executor/models/orion.py | 1 - vllm/model_executor/models/phimoe.py | 1 - vllm/model_executor/models/qwen2.py | 1 - vllm/model_executor/models/qwen2_moe.py | 1 - vllm/model_executor/models/solar.py | 1 - vllm/model_executor/models/starcoder2.py | 2 -- vllm/model_executor/models/whisper.py | 6 +----- 30 files changed, 1 insertion(+), 35 deletions(-) diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index d015682aab47..75306b341ab2 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -375,7 +375,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( self.vocab_size, diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index 5dfaa727b75a..6f8f0832e9bd 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -273,7 +273,6 @@ def __init__( quant_config = vllm_config.quant_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/bart.py b/vllm/model_executor/models/bart.py index 204c48d0d896..af6f78b3cded 100644 --- a/vllm/model_executor/models/bart.py +++ b/vllm/model_executor/models/bart.py @@ -751,7 +751,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index b29dd65a8e35..be01a84559d0 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -861,7 +861,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( self.vocab_size, diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py index 9599e1df6a3c..0f59335ef1be 100644 --- a/vllm/model_executor/models/deepseek.py +++ b/vllm/model_executor/models/deepseek.py @@ -345,7 +345,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index fd0e58fa1458..3d27a7abed8f 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -574,7 +574,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank: diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py index 2eb91a682242..d142a67d710e 100644 --- a/vllm/model_executor/models/exaone.py +++ b/vllm/model_executor/models/exaone.py @@ -323,7 +323,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): lora_config = vllm_config.lora_config self.config = config - self.padding_idx = config.pad_token_id lora_vocab = ((lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0) self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/florence2.py b/vllm/model_executor/models/florence2.py index 4a1ad5f4ee0c..4cf219f19f18 100644 --- a/vllm/model_executor/models/florence2.py +++ b/vllm/model_executor/models/florence2.py @@ -31,7 +31,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.shared = BartScaledWordEmbedding(self.vocab_size, config.d_model) diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 50b5ef35d2cd..f80fe8a92f74 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -254,7 +254,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.multimodal_config = multimodal_config - self.padding_idx = config.pad_token_id self.vocab_size = config.text_config.vocab_size self.image_token_id = _IMAGE_TOKEN_ID self.image_feature_size = config.patch_size**2 * config.num_channels diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 85911a0f41c2..2a540aa997a4 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -266,7 +266,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): lora_config = vllm_config.lora_config self.config = config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 8ae661bf15c4..d06f28732179 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -258,7 +258,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index fdfabbaafce3..869d7da884f1 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -403,7 +403,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = self.config.text_config.pad_token_id self.vocab_size = self.config.text_config.vocab_size self.vision_model = Idefics3VisionTransformer( config.vision_config, diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py index c211ca5f4f8e..1804e16746bc 100644 --- a/vllm/model_executor/models/internlm2.py +++ b/vllm/model_executor/models/internlm2.py @@ -267,7 +267,6 @@ def __init__( quant_config = vllm_config.quant_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.tok_embeddings = VocabParallelEmbedding( config.vocab_size, diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index f307f279dad4..2a3ec5cf234b 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -279,7 +279,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): lora_config = vllm_config.lora_config self.config = config - self.padding_idx = config.pad_token_id lora_vocab = ((lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0) self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 2ff52dd78912..c72c192992b9 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -308,7 +308,6 @@ def __init__(self, self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index 3bbc219e92a6..9ccd4c54064d 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -92,7 +92,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): is_lora_enabled = bool(lora_config) self.config = config - self.padding_idx = config.pad_token_id lora_vocab = ((lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0) self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 29473f5bbaa0..d4d0881c6dee 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -370,7 +370,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.cache_config = cache_config self.quant_config = quant_config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 70880eb75224..cf431b06d4b0 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -260,7 +260,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/mixtral_quant.py b/vllm/model_executor/models/mixtral_quant.py index fdc438917542..9a938f993baf 100644 --- a/vllm/model_executor/models/mixtral_quant.py +++ b/vllm/model_executor/models/mixtral_quant.py @@ -308,7 +308,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/mllama.py b/vllm/model_executor/models/mllama.py index 3ca22d346b79..1ece01687b13 100644 --- a/vllm/model_executor/models/mllama.py +++ b/vllm/model_executor/models/mllama.py @@ -1016,7 +1016,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding(config.vocab_size + 8, config.hidden_size) diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index 6f0b831ac272..aea3423b73df 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -306,7 +306,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): lora_config = vllm_config.lora_config self.config = config - self.padding_idx = config.pad_token_id lora_vocab = (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0 self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py index d6e24c6d67f3..66bffefb0561 100644 --- a/vllm/model_executor/models/olmoe.py +++ b/vllm/model_executor/models/olmoe.py @@ -258,7 +258,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index ad1d66902435..e2ef9ff24fb9 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -206,7 +206,6 @@ def __init__( ): super().__init__() self.config = config - self.padding_idx = config.pad_token_id self.max_target_positions = config.max_position_embeddings self.vocab_size = config.vocab_size diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py index f4f5cdff6437..7eeda764b6f1 100644 --- a/vllm/model_executor/models/orion.py +++ b/vllm/model_executor/models/orion.py @@ -223,7 +223,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( config.vocab_size, diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index aa4bb52c444f..34c018cf3dcd 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -447,7 +447,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config - self.padding_idx = config.pad_token_id lora_vocab = ((lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0) self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index e3de6b64fbb3..00c15b22cd34 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -290,7 +290,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.quant_config = quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size if get_pp_group().is_first_rank or (config.tie_word_embeddings diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 35d9854a55d6..2fdf7c3f40a5 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -331,7 +331,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size self.embed_tokens = VocabParallelEmbedding( diff --git a/vllm/model_executor/models/solar.py b/vllm/model_executor/models/solar.py index 6215ed814bf4..3e57dfbf3ac7 100644 --- a/vllm/model_executor/models/solar.py +++ b/vllm/model_executor/models/solar.py @@ -275,7 +275,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): lora_config = vllm_config.lora_config self.config = config - self.padding_idx = config.pad_token_id lora_vocab = ((lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1)) if lora_config else 0) self.vocab_size = config.vocab_size + lora_vocab diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index 01ea43666482..f208c22ffba8 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -218,10 +218,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config self.config = config - self.padding_idx = config.pad_token_id self.vocab_size = config.vocab_size - # TODO: consider padding_idx (currently removed) self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py index 0b506072094e..def63e8f1ac9 100644 --- a/vllm/model_executor/models/whisper.py +++ b/vllm/model_executor/models/whisper.py @@ -44,10 +44,7 @@ class WhisperAudioInputs(TypedDict): class WhisperPositionalEmbedding(nn.Embedding): - def __init__(self, - num_positions: int, - embedding_dim: int, - padding_idx: Optional[int] = None): + def __init__(self, num_positions: int, embedding_dim: int): super().__init__(num_positions, embedding_dim) def forward(self, position_ids): @@ -380,7 +377,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config = vllm_config.model_config.hf_config embed_dim = config.d_model self.num_mel_bins = config.num_mel_bins - self.padding_idx = config.pad_token_id self.max_source_positions = config.max_source_positions self.embed_scale = (math.sqrt(embed_dim) if config.scale_embedding else 1.0)