Skip to content
Merged
2 changes: 0 additions & 2 deletions tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backe
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5349343)
full:B200/test_e2e.py::test_ptp_quickstart_advanced_deepseek_multi_nodes[DeepSeek-R1/DeepSeek-R1-0528-FP4] SKIP (https://nvbugs/5344688)
accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443)
test_e2e.py::test_openai_reasoning SKIP (https://nvbugs/5355091)
test_e2e.py::test_openai_misc_example SKIP (https://nvbugs/5355091)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5354956)
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=False] SKIP (https://nvbugs/5354956)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=vanilla-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5354946)
Expand Down
5 changes: 4 additions & 1 deletion tests/unittest/llmapi/apps/_test_openai_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ def max_seq_len(request):
def server(model_name: str, backend: str, max_batch_size: str,
max_seq_len: str):
model_path = get_model_path(model_name)
args = ["--max_beam_width", "4"]
args = []
if backend is not None:
args.append("--backend")
args.append(backend)
if backend != "pytorch":
args.append("--max_beam_width")
args.append("4")
if max_batch_size is not None:
args.append("--max_batch_size")
args.append(max_batch_size)
Expand Down
3 changes: 2 additions & 1 deletion tests/unittest/llmapi/apps/_test_openai_reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def server(model_name: str, backend: str) -> RemoteOpenAIServer:
args = []
if backend == "pytorch":
args.extend(["--backend", f"{backend}"])
args.extend(["--max_beam_width", "2"])
max_beam_width = 1 if backend == "pytorch" else 2
args.extend(["--max_beam_width", str(max_beam_width)])
args.extend(["--max_batch_size", "2", "--max_seq_len", "1024"])
args.extend(["--reasoning_parser", "deepseek-r1"])
with RemoteOpenAIServer(model_path, args) as remote_server:
Expand Down