diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 11ee02bdf79..8af1f0eef10 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -434,8 +434,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backe accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5349343) full:B200/test_e2e.py::test_ptp_quickstart_advanced_deepseek_multi_nodes[DeepSeek-R1/DeepSeek-R1-0528-FP4] SKIP (https://nvbugs/5344688) accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443) -test_e2e.py::test_openai_reasoning SKIP (https://nvbugs/5355091) -test_e2e.py::test_openai_misc_example SKIP (https://nvbugs/5355091) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5354956) accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=False] SKIP (https://nvbugs/5354956) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=vanilla-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5354946) diff --git a/tests/unittest/llmapi/apps/_test_openai_misc.py b/tests/unittest/llmapi/apps/_test_openai_misc.py index d981e9df011..cca64de799b 100644 --- a/tests/unittest/llmapi/apps/_test_openai_misc.py +++ b/tests/unittest/llmapi/apps/_test_openai_misc.py @@ -34,10 +34,13 @@ def max_seq_len(request): def server(model_name: str, backend: str, max_batch_size: str, max_seq_len: str): model_path = get_model_path(model_name) - args = ["--max_beam_width", "4"] + args = [] if backend is not None: args.append("--backend") args.append(backend) + if backend != "pytorch": + args.append("--max_beam_width") + args.append("4") if max_batch_size is not None: args.append("--max_batch_size") args.append(max_batch_size) diff --git a/tests/unittest/llmapi/apps/_test_openai_reasoning.py b/tests/unittest/llmapi/apps/_test_openai_reasoning.py index e8c23d0355d..b20c365c3e0 100644 --- a/tests/unittest/llmapi/apps/_test_openai_reasoning.py +++ b/tests/unittest/llmapi/apps/_test_openai_reasoning.py @@ -27,7 +27,8 @@ def server(model_name: str, backend: str) -> RemoteOpenAIServer: args = [] if backend == "pytorch": args.extend(["--backend", f"{backend}"]) - args.extend(["--max_beam_width", "2"]) + max_beam_width = 1 if backend == "pytorch" else 2 + args.extend(["--max_beam_width", str(max_beam_width)]) args.extend(["--max_batch_size", "2", "--max_seq_len", "1024"]) args.extend(["--reasoning_parser", "deepseek-r1"]) with RemoteOpenAIServer(model_path, args) as remote_server: