vllm-project · toslunar · Dec 23, 2024 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -185,6 +185,7 @@ steps:
   source_file_dependencies:
   - vllm/entrypoints
   - examples/
+  - tests/examples/
   commands:
     - pip install tensorizer # for tensorizer test
     - python3 offline_inference.py
@@ -200,6 +201,8 @@ steps:
     - python3 offline_inference_embedding.py
     - python3 offline_inference_scoring.py
     - python3 offline_profile.py --model facebook/opt-125m run_num_steps --num-steps 2
+    - cd /vllm-workspace/tests
+    - pytest -v -s examples
 
 - label: Prefix Caching Test # 9min
   mirror_hardwares: [amd]

diff --git a/examples/template_alpaca.jinja b/examples/template_alpaca.jinja
@@ -1,3 +1,4 @@
+{{ bos_token -}}
 {{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
 
 {% for message in messages %}

diff --git a/examples/template_baichuan.jinja b/examples/template_baichuan.jinja
@@ -1,3 +1,4 @@
+{{ bos_token -}}
 {{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
 
 {%- for message in messages -%}

diff --git a/examples/template_blip2.jinja b/examples/template_blip2.jinja
@@ -1,3 +1,4 @@
+{{ bos_token }}
 {%- for message in messages -%}
     {%- if message['role'] == 'user' -%}
         {{- 'Question: ' + message['content'] + ' ' -}}

diff --git a/examples/template_chatglm.jinja b/examples/template_chatglm.jinja
@@ -1,3 +1,4 @@
+{{- bos_token -}}
 {%- set counter = namespace(index=0) -%}
 {%- for message in messages -%}
     {%- if message['role'] == 'user' -%}

diff --git a/examples/template_chatglm2.jinja b/examples/template_chatglm2.jinja
@@ -1,3 +1,4 @@
+{{- bos_token -}}
 {%- set counter = namespace(index=1) -%}
 {%- for message in messages -%}
     {%- if message['role'] == 'user' -%}

diff --git a/examples/template_chatml.jinja b/examples/template_chatml.jinja
@@ -1,2 +1,3 @@
+{{ bos_token -}}
 {% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}
 {% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}
diff --git a/examples/template_dse_qwen2_vl.jinja b/examples/template_dse_qwen2_vl.jinja
@@ -1,3 +1,4 @@
+{{ bos_token -}}
 {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{% raw %}<|im_start|>system
 You are a helpful assistant.<|im_end|>
 {% endraw %}{% endif %}<|im_start|>{{ message['role'] }}{% raw %}

diff --git a/examples/template_falcon.jinja b/examples/template_falcon.jinja
@@ -1,3 +1,4 @@
+{{- bos_token -}}
 {%- for message in messages -%}
     {%- if message['role'] == 'user' -%}
         {{- 'User: ' + message['content'] -}}

diff --git a/examples/template_falcon_180b.jinja b/examples/template_falcon_180b.jinja
@@ -1,3 +1,4 @@
+{{- bos_token -}}
 {%- for message in messages -%}
     {%- if message['role'] == 'system' -%}
         {{- 'System: ' + message['content'] -}}

diff --git a/examples/template_inkbot.jinja b/examples/template_inkbot.jinja
@@ -1,3 +1,4 @@
+{{ bos_token -}}
 <#meta#>
 - Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
 - Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}

diff --git a/examples/template_vlm2vec.jinja b/examples/template_vlm2vec.jinja
@@ -1,3 +1,4 @@
+{{- bos_token -}}
 {%- if messages | length > 1 -%}
     {{ raise_exception('Embedding models should only embed one message at a time') }}
 {%- endif -%}

diff --git a/examples/tool_chat_template_granite.jinja b/examples/tool_chat_template_granite.jinja
@@ -1,3 +1,4 @@
+{{- bos_token }}
 {%- if tools %}
     {{- '<|start_of_role|>available_tools<|end_of_role|>
 ' }}

diff --git a/examples/tool_chat_template_granite_20b_fc.jinja b/examples/tool_chat_template_granite_20b_fc.jinja
@@ -1,3 +1,4 @@
+{{- bos_token }}
 {%- macro json_to_python_type(json_spec) %}
     {%- set basic_type_map = {
     "string": "str",

@@ -12,21 +12,24 @@
 
 # Define models, templates, and their corresponding expected outputs
 MODEL_TEMPLATE_GENERATON_OUTPUT = [
-    ("facebook/opt-125m", chatml_jinja_path, True, False, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, True, False,
+     """</s><|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
 <|im_start|>user
 What is the capital of<|im_end|>
 <|im_start|>assistant
 """),
-    ("facebook/opt-125m", chatml_jinja_path, False, False, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, False, False,
+     """</s><|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
 <|im_start|>user
 What is the capital of"""),
-    ("facebook/opt-125m", chatml_jinja_path, False, True, """<|im_start|>user
+    ("facebook/opt-125m", chatml_jinja_path, False, True,
+     """</s><|im_start|>user
 Hello<|im_end|>
 <|im_start|>assistant
 Hi there!<|im_end|>
@@ -63,7 +66,8 @@ def test_load_chat_template():
     # Test assertions
     assert template_content is not None
     # Hard coded value for template_chatml.jinja
-    assert template_content == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
+    assert template_content == """{{ bos_token -}}
+{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
 {% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""  # noqa: E501
 
 

@@ -91,5 +91,5 @@ async def test_image_embedding(server: RemoteOpenAIServer, model_name: str,
     assert len(embeddings.data) == 1
     assert len(embeddings.data[0].embedding) == 3072
     assert embeddings.usage.completion_tokens == 0
-    assert embeddings.usage.prompt_tokens == 764
-    assert embeddings.usage.total_tokens == 764
+    assert embeddings.usage.prompt_tokens == 766
+    assert embeddings.usage.total_tokens == 766
diff --git a/tests/examples/__init__.py b/tests/examples/__init__.py
diff --git a/tests/examples/test_jinja.py b/tests/examples/test_jinja.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+import pytest
+import transformers
+
+from ..utils import VLLM_PATH
+
+EXAMPLES_DIR = VLLM_PATH / "examples"
+
+jinja_paths = [
+    pytest.param(path, id=path.stem)
+    for path in sorted(EXAMPLES_DIR.glob("*.jinja"))
+]
+
+
+@pytest.mark.parametrize("path", jinja_paths)
+@pytest.mark.parametrize("num_messages", [1, 3])
+def test_bos(path: Path, num_messages: int) -> None:
+    with path.open("r", encoding="utf-8") as f:
+        chat_template = f.read()
+    # We might guess an appropriate tokenizer model from the file name but we
+    # don't maintain such list.
+    # Use arbitrary BOS for testing. It doesn't have to match the str in the
+    # correct tokenizer.
+    bos_token = "=BOS="
+    tokenizer = transformers.PreTrainedTokenizerBase(
+        chat_template=chat_template, bos_token=bos_token, eos_token="=EOS=")
+    conversation = [
+        {
+            "role": "user",
+            "content": "1"
+        },
+        {
+            "role": "assistant",
+            "content": "2"
+        },
+        {
+            "role": "user",
+            "content": "3"
+        },
+    ][:num_messages]
+    try:
+        prompt: str = tokenizer.apply_chat_template(conversation=conversation,
+                                                    tokenize=False)
+    except Exception as e:
+        if str(e
+               ) == "Embedding models should only embed one message at a time":
+            pytest.skip(reason=str(e))
+        raise
+    assert prompt.startswith(bos_token)
+    assert prompt.count(bos_token) == 1
@@ -675,6 +675,7 @@ def chat(
                     add_generation_prompt=add_generation_prompt,
                     continue_final_message=continue_final_message,
                     tools=tools,
+                    tokenize=True,
                 )
 
             prompt: Union[TokensPrompt, TextPrompt]