From 1150de57773db12b2bf1e16f38dca6715dc10024 Mon Sep 17 00:00:00 2001 From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Date: Mon, 4 Aug 2025 09:28:37 +0000 Subject: [PATCH 1/3] Extra options for guided decoding example Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --- .../llmapi/apps/_test_trtllm_serve_example.py | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index 262eafa820f..c9813fb11bd 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -1,8 +1,10 @@ import os import subprocess import sys +import tempfile import pytest +import yaml from .openai_server import RemoteOpenAIServer @@ -16,10 +18,26 @@ def model_name(): @pytest.fixture(scope="module") -def server(model_name: str): +def temp_extra_llm_api_options_file(): + temp_dir = tempfile.gettempdir() + temp_file_path = os.path.join(temp_dir, "extra_llm_api_options.yaml") + try: + extra_llm_api_options_dict = {"guided_decoding_backend": "xgrammar"} + with open(temp_file_path, 'w') as f: + yaml.dump(extra_llm_api_options_dict, f) + + yield temp_file_path + finally: + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + + +@pytest.fixture(scope="module") +def server(model_name: str, temp_extra_llm_api_options_file: str): model_path = get_model_path(model_name) # fix port to facilitate concise trtllm-serve examples - with RemoteOpenAIServer(model_path, port=8000) as remote_server: + args = ["--extra_llm_api_options", temp_extra_llm_api_options_file] + with RemoteOpenAIServer(model_path, args, port=8000) as remote_server: yield remote_server From f3135163675ca88d4c70e9a8d10b04417a603d0a Mon Sep 17 00:00:00 2001 From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Date: Mon, 4 Aug 2025 12:39:30 +0000 Subject: [PATCH 2/3] Enhance curl & json tests Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --- .../openai_completion_client_json_schema.py | 14 ++++++++++++-- .../llmapi/apps/_test_trtllm_serve_example.py | 17 ++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/examples/serve/openai_completion_client_json_schema.py b/examples/serve/openai_completion_client_json_schema.py index 2f110270f55..56e5a351a08 100644 --- a/examples/serve/openai_completion_client_json_schema.py +++ b/examples/serve/openai_completion_client_json_schema.py @@ -1,5 +1,9 @@ ### :title OpenAI Completion Client with JSON Schema +# This example requires to specify `guided_decoding_backend` as +# `xgrammar` or `llguidance` in the extra_llm_api_options.yaml file. +import json + from openai import OpenAI client = OpenAI( @@ -18,7 +22,6 @@ "content": f"Give me the information of the biggest city of China in the JSON format.", }], - max_tokens=100, temperature=0, response_format={ "type": "json", @@ -39,4 +42,11 @@ } }, ) -print(response.choices[0].message.content) + +content = response.choices[0].message.content +try: + response_json = json.loads(content) + assert "name" in response_json and "population" in response_json + print(content) +except json.JSONDecodeError: + print("Failed to decode JSON response") diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index c9813fb11bd..541feb1dc71 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -1,3 +1,4 @@ +import json import os import subprocess import sys @@ -58,8 +59,14 @@ def test_trtllm_serve_examples(exe: str, script: str, server: RemoteOpenAIServer, example_root: str): client_script = os.path.join(example_root, script) # CalledProcessError will be raised if any errors occur - subprocess.run([exe, client_script], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - check=True) + result = subprocess.run([exe, client_script], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True) + if script.startswith("curl"): + # For curl scripts, we expect a JSON response + result_stdout = result.stdout.strip() + data = json.loads(result_stdout) + assert "code" not in data or data[ + "code"] == 200, f"Unexpected response: {data}" From 0472f566406c335c4e24d6139f2a4a9a4862ed50 Mon Sep 17 00:00:00 2001 From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Date: Tue, 5 Aug 2025 07:56:49 +0000 Subject: [PATCH 3/3] Improve error handling Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --- .../llmapi/apps/_test_trtllm_serve_example.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py index 541feb1dc71..6921c024d54 100644 --- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py +++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py @@ -67,6 +67,11 @@ def test_trtllm_serve_examples(exe: str, script: str, if script.startswith("curl"): # For curl scripts, we expect a JSON response result_stdout = result.stdout.strip() - data = json.loads(result_stdout) - assert "code" not in data or data[ - "code"] == 200, f"Unexpected response: {data}" + try: + data = json.loads(result_stdout) + assert "code" not in data or data[ + "code"] == 200, f"Unexpected response: {data}" + except json.JSONDecodeError as e: + pytest.fail( + f"Failed to parse JSON response from {script}: {e}\nStdout: {result_stdout}\nStderr: {result.stderr}" + )