From 7fe0d40de4fc61810afbf7e16da988c33a8a2bd6 Mon Sep 17 00:00:00 2001 From: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> Date: Tue, 12 Aug 2025 17:13:43 +0000 Subject: [PATCH 1/5] [https://nvbugs/5375594][fix] fix oom issue on structural_tag test case Signed-off-by: nv-guomingz <137257613+nv-guomingz@users.noreply.github.com> --- tests/integration/test_lists/waives.txt | 1 - .../unittest/llmapi/apps/_test_openai_chat_structural_tag.py | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 2e0037bcfc5..3597a6baa3e 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -43,7 +43,6 @@ examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bf examples/test_whisper.py::test_llm_whisper_general[large-v3-enable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime] SKIP (https://nvbugs/4866931) examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (https://nvbugs/4961624) examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long] SKIP (https://nvbugs/5321371) -test_e2e.py::test_openai_chat_structural_tag_example SKIP (https://nvbugspro.nvidia.com/bug/5375594) cpp/test_e2e.py::test_model[fp8-chatglm-90] SKIP (https://nvbugs/5034830) full:B200_PCIe/unittest/trt/functional SKIP (Disable for Blackwell) full:B200_PCIe/unittest/trt/quantization SKIP (Disable for Blackwell) diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py index e3411404947..e1bf48f31c6 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py @@ -23,7 +23,10 @@ def temp_extra_llm_api_options_file(request): temp_dir = tempfile.gettempdir() temp_file_path = os.path.join(temp_dir, "extra_llm_api_options.yaml") try: - extra_llm_api_options_dict = {"guided_decoding_backend": "xgrammar"} + extra_llm_api_options_dict = { + "guided_decoding_backend": "xgrammar", + "max_batch_size": 32 + } with open(temp_file_path, 'w') as f: yaml.dump(extra_llm_api_options_dict, f) From 0e2a5c13efe0966bcb7483765ff79e03cab93872 Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:07:07 +0000 Subject: [PATCH 2/5] fix test Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --- .../apps/_test_openai_chat_structural_tag.py | 56 +++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py index e1bf48f31c6..c35aa182f00 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py @@ -1,32 +1,32 @@ # Adapted from # https://github.com/vllm-project/vllm/blob/aae6927be06dedbda39c6b0c30f6aa3242b84388/tests/entrypoints/openai/test_chat.py +import json import os +import re import tempfile +import jsonschema import openai import pytest import yaml -from ..test_llm import get_model_path, similar +from ..test_llm import get_model_path from .openai_server import RemoteOpenAIServer pytestmark = pytest.mark.threadleak(enabled=False) -@pytest.fixture(scope="module", ids=["TinyLlama-1.1B-Chat"]) +@pytest.fixture(scope="module") def model_name(): return "llama-3.1-model/Llama-3.1-8B-Instruct" @pytest.fixture(scope="module") -def temp_extra_llm_api_options_file(request): +def temp_extra_llm_api_options_file(): temp_dir = tempfile.gettempdir() temp_file_path = os.path.join(temp_dir, "extra_llm_api_options.yaml") try: - extra_llm_api_options_dict = { - "guided_decoding_backend": "xgrammar", - "max_batch_size": 32 - } + extra_llm_api_options_dict = {"guided_decoding_backend": "xgrammar"} with open(temp_file_path, 'w') as f: yaml.dump(extra_llm_api_options_dict, f) @@ -40,7 +40,11 @@ def temp_extra_llm_api_options_file(request): @pytest.fixture(scope="module") def server(model_name: str, temp_extra_llm_api_options_file: str): model_path = get_model_path(model_name) - args = ["--extra_llm_api_options", temp_extra_llm_api_options_file] + + args = [ + "--max_batch_size=32", + f"--extra_llm_api_options={temp_extra_llm_api_options_file}" + ] with RemoteOpenAIServer(model_path, args) as remote_server: yield remote_server @@ -115,12 +119,7 @@ def tool_get_current_date(): def test_chat_structural_tag(client: openai.OpenAI, model_name: str, tool_get_current_weather, tool_get_current_date): - messages = [ - { - "role": - "system", - "content": - f""" + system_prompt = f""" # Tool Instructions - Always execute python code in messages that you share. - When looking for real time information use relevant functions if available else fallback to brave_search @@ -143,13 +142,17 @@ def test_chat_structural_tag(client: openai.OpenAI, model_name: str, - Only call one function at a time - Put the entire function call reply on one line - Always add your sources when using search results to answer the user query -You are a helpful assistant.""", +You are a helpful assistant.""" + user_prompt = "You are in New York. Please get the current date and time, and the weather." + + messages = [ + { + "role": "system", + "content": system_prompt, }, { - "role": - "user", - "content": - "You are in New York. Please get the current date and time, and the weather.", + "role": "user", + "content": user_prompt, }, ] @@ -176,11 +179,18 @@ def test_chat_structural_tag(client: openai.OpenAI, model_name: str, "triggers": ["([\S\s]+?)', + message.content) + params = json.loads(match.group(1)) + jsonschema.validate(params, + tool_get_current_weather["function"]["parameters"]) + + match = re.search(r'([\S\s]+?)', + message.content) + params = json.loads(match.group(1)) + jsonschema.validate(params, tool_get_current_date["function"]["parameters"]) From 671ac9bd3442ac82027742e0317228a8db10aba2 Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Wed, 13 Aug 2025 02:17:03 +0000 Subject: [PATCH 3/5] add req Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index c8293761eaa..986026e5303 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -24,6 +24,7 @@ cloudpickle typing-extensions==4.12.2 bandit==1.7.7 jsonlines==4.0.0 +jsonschema jieba==0.42.1 rouge==1.0.1 pytest-rerunfailures From c597d603dc78e5e51a53cf6a06cb8db0b8dab9fb Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Wed, 13 Aug 2025 06:05:05 +0000 Subject: [PATCH 4/5] fix oom Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --- .../unittest/llmapi/apps/_test_openai_chat_structural_tag.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py index c35aa182f00..022b5a89863 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py @@ -41,8 +41,9 @@ def temp_extra_llm_api_options_file(): def server(model_name: str, temp_extra_llm_api_options_file: str): model_path = get_model_path(model_name) + # Use small max_batch_size/max_seq_len/max_num_tokens to avoid OOM on A10/A30 GPUs. args = [ - "--max_batch_size=32", + "--max_batch_size=8", "--max_seq_len=1024", "--max_num_tokens=1024", f"--extra_llm_api_options={temp_extra_llm_api_options_file}" ] with RemoteOpenAIServer(model_path, args) as remote_server: @@ -159,7 +160,7 @@ def test_chat_structural_tag(client: openai.OpenAI, model_name: str, chat_completion = client.chat.completions.create( model=model_name, messages=messages, - max_completion_tokens=100, + max_completion_tokens=256, response_format={ "type": "structural_tag", From 64123876dace0aabe2ef681baa630cf0c39294db Mon Sep 17 00:00:00 2001 From: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Date: Wed, 13 Aug 2025 07:34:52 +0000 Subject: [PATCH 5/5] clean Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> --- requirements-dev.txt | 1 - tests/unittest/llmapi/apps/_test_openai_chat_json.py | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 986026e5303..c8293761eaa 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -24,7 +24,6 @@ cloudpickle typing-extensions==4.12.2 bandit==1.7.7 jsonlines==4.0.0 -jsonschema jieba==0.42.1 rouge==1.0.1 pytest-rerunfailures diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_json.py b/tests/unittest/llmapi/apps/_test_openai_chat_json.py index a444b5566b8..53651828507 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_json.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_json.py @@ -26,11 +26,7 @@ def temp_extra_llm_api_options_file(request): temp_dir = tempfile.gettempdir() temp_file_path = os.path.join(temp_dir, "extra_llm_api_options.yaml") try: - extra_llm_api_options_dict = { - "guided_decoding_backend": "xgrammar", - "disable_overlap_scheduler": - True, # Guided decoding is not supported with overlap scheduler - } + extra_llm_api_options_dict = {"guided_decoding_backend": "xgrammar"} with open(temp_file_path, "w") as f: yaml.dump(extra_llm_api_options_dict, f)