From 547781a1b70a56d2a31015e93c587b15ebf5693f Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Fri, 7 Feb 2025 06:33:44 +0000 Subject: [PATCH 1/2] Remove unnecessary detokenization in multimodal processing Signed-off-by: DarkLight1337 --- tests/entrypoints/openai/test_audio.py | 6 +++--- tests/entrypoints/openai/test_vision.py | 2 +- tests/entrypoints/openai/test_vision_embedding.py | 4 ++-- vllm/inputs/preprocess.py | 3 --- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index 6e206dfd99b6..3459f24834db 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -83,7 +83,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI, choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message @@ -140,7 +140,7 @@ async def test_single_chat_session_audio_base64encoded( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message @@ -196,7 +196,7 @@ async def test_single_chat_session_input_audio( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=202, total_tokens=212) + completion_tokens=10, prompt_tokens=201, total_tokens=211) message = choice.message message = chat_completion.choices[0].message diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 029c9b038b04..3cb470015894 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -185,7 +185,7 @@ async def test_single_chat_session_image_base64encoded( choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=775, total_tokens=785) + completion_tokens=10, prompt_tokens=774, total_tokens=784) message = choice.message message = chat_completion.choices[0].message diff --git a/tests/entrypoints/openai/test_vision_embedding.py b/tests/entrypoints/openai/test_vision_embedding.py index f2ff4a0b07a5..cee5274561f4 100644 --- a/tests/entrypoints/openai/test_vision_embedding.py +++ b/tests/entrypoints/openai/test_vision_embedding.py @@ -93,5 +93,5 @@ async def test_image_embedding(server: RemoteOpenAIServer, model_name: str, assert len(embeddings.data) == 1 assert len(embeddings.data[0].embedding) == 3072 assert embeddings.usage.completion_tokens == 0 - assert embeddings.usage.prompt_tokens == 764 - assert embeddings.usage.total_tokens == 764 + assert embeddings.usage.prompt_tokens == 763 + assert embeddings.usage.total_tokens == 763 diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index 035e84cc0633..53f89996f0fe 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -260,9 +260,6 @@ def _process_multimodal( mm_processor = self.mm_registry.create_processor( self.model_config, tokenizer) - if isinstance(prompt, list): - prompt = tokenizer.decode(prompt) - if mm_processor_kwargs is None: mm_processor_kwargs = {} From 971c34003d4cc887a4470dfce4806748ea93d83f Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Fri, 7 Feb 2025 09:03:38 +0000 Subject: [PATCH 2/2] Fix Signed-off-by: DarkLight1337 --- tests/entrypoints/openai/test_vision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 3cb470015894..c954fca696ff 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -92,7 +92,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI, choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( - completion_tokens=10, prompt_tokens=775, total_tokens=785) + completion_tokens=10, prompt_tokens=774, total_tokens=784) message = choice.message message = chat_completion.choices[0].message