fix oom

syuoni · syuoni · commit c597d603dc78 · 2025-08-13T06:05:05.000Z
Signed-off-by: Enwei Zhu &lt;21126786+syuoni@users.noreply.github.com&gt;
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py b/tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py
@@ -41,8 +41,9 @@ def temp_extra_llm_api_options_file():
 def server(model_name: str, temp_extra_llm_api_options_file: str):
     model_path = get_model_path(model_name)
 
+    # Use small max_batch_size/max_seq_len/max_num_tokens to avoid OOM on A10/A30 GPUs.
     args = [
-        "--max_batch_size=32",
+        "--max_batch_size=8", "--max_seq_len=1024", "--max_num_tokens=1024",
         f"--extra_llm_api_options={temp_extra_llm_api_options_file}"
     ]
     with RemoteOpenAIServer(model_path, args) as remote_server:
@@ -159,7 +160,7 @@ def test_chat_structural_tag(client: openai.OpenAI, model_name: str,
     chat_completion = client.chat.completions.create(
         model=model_name,
         messages=messages,
-        max_completion_tokens=100,
+        max_completion_tokens=256,
         response_format={
             "type":
             "structural_tag",