Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.inputs.data import PromptType
from vllm.logger import init_logger
from vllm.model_executor.model_loader import get_model_cls
from vllm.model_executor.models import SupportsTranscription
from vllm.outputs import RequestOutput
from vllm.utils import PlaceholderModule
Expand Down Expand Up @@ -78,6 +77,7 @@ def __init__(

@cached_property
def model_cls(self) -> type[SupportsTranscription]:
from vllm.model_executor.model_loader import get_model_cls
model_cls = get_model_cls(self.model_config)
return cast(type[SupportsTranscription], model_cls)

Expand Down
20 changes: 12 additions & 8 deletions vllm/lora/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
from typing import Optional, Union
from typing import TYPE_CHECKING, Optional, Union

import huggingface_hub
import regex as re
Expand Down Expand Up @@ -31,10 +31,14 @@
RowParallelLinearWithLoRA,
VocabParallelEmbeddingWithLoRA)
from vllm.model_executor.layers.linear import LinearBase

# yapf: enable
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.models.utils import WeightsMapper

if TYPE_CHECKING:
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.vocab_parallel_embedding import (
ParallelLMHead)
from vllm.model_executor.models.utils import WeightsMapper

logger = init_logger(__name__)

Expand Down Expand Up @@ -75,8 +79,8 @@ def from_layer(layer: nn.Module,


def from_layer_logits_processor(
layer: LogitsProcessor,
lm_head: ParallelLMHead,
layer: "LogitsProcessor",
lm_head: "ParallelLMHead",
max_loras: int,
lora_config: LoRAConfig,
model_config: Optional[PretrainedConfig] = None,
Expand All @@ -98,8 +102,8 @@ def replace_submodule(model: nn.Module, module_name: str,


def parse_fine_tuned_lora_name(
name: str,
weights_mapper: Optional[WeightsMapper] = None
name: str,
weights_mapper: Optional["WeightsMapper"] = None
) -> tuple[str, bool, bool]:
"""Parse the name of lora weights.

Expand Down