From 1150de57773db12b2bf1e16f38dca6715dc10024 Mon Sep 17 00:00:00 2001
From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
Date: Mon, 4 Aug 2025 09:28:37 +0000
Subject: [PATCH 1/3] Extra options for guided decoding example

Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
---
 .../llmapi/apps/_test_trtllm_serve_example.py | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
index 262eafa820f..c9813fb11bd 100644
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -1,8 +1,10 @@
 import os
 import subprocess
 import sys
+import tempfile
 
 import pytest
+import yaml
 
 from .openai_server import RemoteOpenAIServer
 
@@ -16,10 +18,26 @@ def model_name():
 
 
 @pytest.fixture(scope="module")
-def server(model_name: str):
+def temp_extra_llm_api_options_file():
+    temp_dir = tempfile.gettempdir()
+    temp_file_path = os.path.join(temp_dir, "extra_llm_api_options.yaml")
+    try:
+        extra_llm_api_options_dict = {"guided_decoding_backend": "xgrammar"}
+        with open(temp_file_path, 'w') as f:
+            yaml.dump(extra_llm_api_options_dict, f)
+
+        yield temp_file_path
+    finally:
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+
+
+@pytest.fixture(scope="module")
+def server(model_name: str, temp_extra_llm_api_options_file: str):
     model_path = get_model_path(model_name)
     # fix port to facilitate concise trtllm-serve examples
-    with RemoteOpenAIServer(model_path, port=8000) as remote_server:
+    args = ["--extra_llm_api_options", temp_extra_llm_api_options_file]
+    with RemoteOpenAIServer(model_path, args, port=8000) as remote_server:
         yield remote_server
 
 

From f3135163675ca88d4c70e9a8d10b04417a603d0a Mon Sep 17 00:00:00 2001
From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
Date: Mon, 4 Aug 2025 12:39:30 +0000
Subject: [PATCH 2/3] Enhance curl & json tests

Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
---
 .../openai_completion_client_json_schema.py     | 14 ++++++++++++--
 .../llmapi/apps/_test_trtllm_serve_example.py   | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/examples/serve/openai_completion_client_json_schema.py b/examples/serve/openai_completion_client_json_schema.py
index 2f110270f55..56e5a351a08 100644
--- a/examples/serve/openai_completion_client_json_schema.py
+++ b/examples/serve/openai_completion_client_json_schema.py
@@ -1,5 +1,9 @@
 ### :title OpenAI Completion Client with JSON Schema
 
+# This example requires to specify `guided_decoding_backend` as
+# `xgrammar` or `llguidance` in the extra_llm_api_options.yaml file.
+import json
+
 from openai import OpenAI
 
 client = OpenAI(
@@ -18,7 +22,6 @@
         "content":
         f"Give me the information of the biggest city of China in the JSON format.",
     }],
-    max_tokens=100,
     temperature=0,
     response_format={
         "type": "json",
@@ -39,4 +42,11 @@
         }
     },
 )
-print(response.choices[0].message.content)
+
+content = response.choices[0].message.content
+try:
+    response_json = json.loads(content)
+    assert "name" in response_json and "population" in response_json
+    print(content)
+except json.JSONDecodeError:
+    print("Failed to decode JSON response")
diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
index c9813fb11bd..541feb1dc71 100644
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -1,3 +1,4 @@
+import json
 import os
 import subprocess
 import sys
@@ -58,8 +59,14 @@ def test_trtllm_serve_examples(exe: str, script: str,
                                server: RemoteOpenAIServer, example_root: str):
     client_script = os.path.join(example_root, script)
     # CalledProcessError will be raised if any errors occur
-    subprocess.run([exe, client_script],
-                   stdout=subprocess.PIPE,
-                   stderr=subprocess.PIPE,
-                   text=True,
-                   check=True)
+    result = subprocess.run([exe, client_script],
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE,
+                            text=True,
+                            check=True)
+    if script.startswith("curl"):
+        # For curl scripts, we expect a JSON response
+        result_stdout = result.stdout.strip()
+        data = json.loads(result_stdout)
+        assert "code" not in data or data[
+            "code"] == 200, f"Unexpected response: {data}"

From 0472f566406c335c4e24d6139f2a4a9a4862ed50 Mon Sep 17 00:00:00 2001
From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
Date: Tue, 5 Aug 2025 07:56:49 +0000
Subject: [PATCH 3/3] Improve error handling

Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com>
---
 .../llmapi/apps/_test_trtllm_serve_example.py         | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
index 541feb1dc71..6921c024d54 100644
--- a/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
+++ b/tests/unittest/llmapi/apps/_test_trtllm_serve_example.py
@@ -67,6 +67,11 @@ def test_trtllm_serve_examples(exe: str, script: str,
     if script.startswith("curl"):
         # For curl scripts, we expect a JSON response
         result_stdout = result.stdout.strip()
-        data = json.loads(result_stdout)
-        assert "code" not in data or data[
-            "code"] == 200, f"Unexpected response: {data}"
+        try:
+            data = json.loads(result_stdout)
+            assert "code" not in data or data[
+                "code"] == 200, f"Unexpected response: {data}"
+        except json.JSONDecodeError as e:
+            pytest.fail(
+                f"Failed to parse JSON response from {script}: {e}\nStdout: {result_stdout}\nStderr: {result.stderr}"
+            )