Skip to content
3 changes: 3 additions & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ steps:
source_file_dependencies:
- vllm/entrypoints
- examples/
- tests/examples/
commands:
- pip install tensorizer # for tensorizer test
- python3 offline_inference.py
Expand All @@ -200,6 +201,8 @@ steps:
- python3 offline_inference_embedding.py
- python3 offline_inference_scoring.py
- python3 offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
- cd /vllm-workspace/tests
- pytest -v -s examples

- label: Prefix Caching Test # 9min
mirror_hardwares: [amd]
Expand Down
1 change: 1 addition & 0 deletions examples/template_alpaca.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}

{% for message in messages %}
Expand Down
1 change: 1 addition & 0 deletions examples/template_baichuan.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}

{%- for message in messages -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_blip2.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token }}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'Question: ' + message['content'] + ' ' -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatglm.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- set counter = namespace(index=0) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatglm2.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- set counter = namespace(index=1) -%}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
Expand Down
1 change: 1 addition & 0 deletions examples/template_chatml.jinja
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
{{ bos_token -}}
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
1 change: 1 addition & 0 deletions examples/template_dse_qwen2_vl.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{% raw %}<|im_start|>system
You are a helpful assistant.<|im_end|>
{% endraw %}{% endif %}<|im_start|>{{ message['role'] }}{% raw %}
Expand Down
1 change: 1 addition & 0 deletions examples/template_falcon.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- for message in messages -%}
{%- if message['role'] == 'user' -%}
{{- 'User: ' + message['content'] -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_falcon_180b.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{{- 'System: ' + message['content'] -}}
Expand Down
1 change: 1 addition & 0 deletions examples/template_inkbot.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ bos_token -}}
<#meta#>
- Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
- Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}
Expand Down
1 change: 1 addition & 0 deletions examples/template_vlm2vec.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token -}}
{%- if messages | length > 1 -%}
{{ raise_exception('Embedding models should only embed one message at a time') }}
{%- endif -%}
Expand Down
1 change: 1 addition & 0 deletions examples/tool_chat_template_granite.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token }}
{%- if tools %}
{{- '<|start_of_role|>available_tools<|end_of_role|>
' }}
Expand Down
1 change: 1 addition & 0 deletions examples/tool_chat_template_granite_20b_fc.jinja
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- bos_token }}
{%- macro json_to_python_type(json_spec) %}
{%- set basic_type_map = {
"string": "str",
Expand Down
12 changes: 8 additions & 4 deletions tests/entrypoints/openai/test_chat_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,24 @@

# Define models, templates, and their corresponding expected outputs
MODEL_TEMPLATE_GENERATON_OUTPUT = [
("facebook/opt-125m", chatml_jinja_path, True, False, """<|im_start|>user
("facebook/opt-125m", chatml_jinja_path, True, False,
"""</s><|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
<|im_start|>user
What is the capital of<|im_end|>
<|im_start|>assistant
"""),
("facebook/opt-125m", chatml_jinja_path, False, False, """<|im_start|>user
("facebook/opt-125m", chatml_jinja_path, False, False,
"""</s><|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
<|im_start|>user
What is the capital of"""),
("facebook/opt-125m", chatml_jinja_path, False, True, """<|im_start|>user
("facebook/opt-125m", chatml_jinja_path, False, True,
"""</s><|im_start|>user
Hello<|im_end|>
<|im_start|>assistant
Hi there!<|im_end|>
Expand Down Expand Up @@ -63,7 +66,8 @@ def test_load_chat_template():
# Test assertions
assert template_content is not None
# Hard coded value for template_chatml.jinja
assert template_content == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
assert template_content == """{{ bos_token -}}
{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}""" # noqa: E501


Expand Down
4 changes: 2 additions & 2 deletions tests/entrypoints/openai/test_vision_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,5 @@ async def test_image_embedding(server: RemoteOpenAIServer, model_name: str,
assert len(embeddings.data) == 1
assert len(embeddings.data[0].embedding) == 3072
assert embeddings.usage.completion_tokens == 0
assert embeddings.usage.prompt_tokens == 764
assert embeddings.usage.total_tokens == 764
assert embeddings.usage.prompt_tokens == 766
assert embeddings.usage.total_tokens == 766
Empty file added tests/examples/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions tests/examples/test_jinja.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import pytest
import transformers

from ..utils import VLLM_PATH

EXAMPLES_DIR = VLLM_PATH / "examples"

jinja_paths = [
pytest.param(path, id=path.stem)
for path in sorted(EXAMPLES_DIR.glob("*.jinja"))
]


@pytest.mark.parametrize("path", jinja_paths)
@pytest.mark.parametrize("num_messages", [1, 3])
def test_bos(path: Path, num_messages: int) -> None:
with path.open("r", encoding="utf-8") as f:
chat_template = f.read()
# We might guess an appropriate tokenizer model from the file name but we
# don't maintain such list.
# Use arbitrary BOS for testing. It doesn't have to match the str in the
# correct tokenizer.
bos_token = "=BOS="
tokenizer = transformers.PreTrainedTokenizerBase(
chat_template=chat_template, bos_token=bos_token, eos_token="=EOS=")
conversation = [
{
"role": "user",
"content": "1"
},
{
"role": "assistant",
"content": "2"
},
{
"role": "user",
"content": "3"
},
][:num_messages]
try:
prompt: str = tokenizer.apply_chat_template(conversation=conversation,
tokenize=False)
except Exception as e:
if str(e
) == "Embedding models should only embed one message at a time":
pytest.skip(reason=str(e))
raise
assert prompt.startswith(bos_token)
assert prompt.count(bos_token) == 1
1 change: 1 addition & 0 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ def chat(
add_generation_prompt=add_generation_prompt,
continue_final_message=continue_final_message,
tools=tools,
tokenize=True,
)

prompt: Union[TokensPrompt, TextPrompt]
Expand Down
Loading