Skip to content

Commit c597d60

Browse files
committed
fix oom
Signed-off-by: Enwei Zhu <[email protected]>
1 parent 671ac9b commit c597d60

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

tests/unittest/llmapi/apps/_test_openai_chat_structural_tag.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ def temp_extra_llm_api_options_file():
4141
def server(model_name: str, temp_extra_llm_api_options_file: str):
4242
model_path = get_model_path(model_name)
4343

44+
# Use small max_batch_size/max_seq_len/max_num_tokens to avoid OOM on A10/A30 GPUs.
4445
args = [
45-
"--max_batch_size=32",
46+
"--max_batch_size=8", "--max_seq_len=1024", "--max_num_tokens=1024",
4647
f"--extra_llm_api_options={temp_extra_llm_api_options_file}"
4748
]
4849
with RemoteOpenAIServer(model_path, args) as remote_server:
@@ -159,7 +160,7 @@ def test_chat_structural_tag(client: openai.OpenAI, model_name: str,
159160
chat_completion = client.chat.completions.create(
160161
model=model_name,
161162
messages=messages,
162-
max_completion_tokens=100,
163+
max_completion_tokens=256,
163164
response_format={
164165
"type":
165166
"structural_tag",

0 commit comments

Comments
 (0)