NVIDIA · netanel-haber · Jun 25, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -434,8 +434,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backe
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5349343)
 full:B200/test_e2e.py::test_ptp_quickstart_advanced_deepseek_multi_nodes[DeepSeek-R1/DeepSeek-R1-0528-FP4] SKIP (https://nvbugs/5344688)
 accuracy/test_llm_api.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar] SKIP (https://nvbugs/5346443)
-test_e2e.py::test_openai_reasoning SKIP (https://nvbugs/5355091)
-test_e2e.py::test_openai_misc_example SKIP (https://nvbugs/5355091)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5354956)
 accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=2-overlap_scheduler=False] SKIP (https://nvbugs/5354956)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[mtp=vanilla-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5354946)

diff --git a/tests/unittest/llmapi/apps/_test_openai_misc.py b/tests/unittest/llmapi/apps/_test_openai_misc.py
@@ -34,10 +34,13 @@ def max_seq_len(request):
 def server(model_name: str, backend: str, max_batch_size: str,
            max_seq_len: str):
     model_path = get_model_path(model_name)
-    args = ["--max_beam_width", "4"]
+    args = []
     if backend is not None:
         args.append("--backend")
         args.append(backend)
+    if backend != "pytorch":
+        args.append("--max_beam_width")
+        args.append("4")
     if max_batch_size is not None:
         args.append("--max_batch_size")
         args.append(max_batch_size)

diff --git a/tests/unittest/llmapi/apps/_test_openai_reasoning.py b/tests/unittest/llmapi/apps/_test_openai_reasoning.py
@@ -27,7 +27,8 @@ def server(model_name: str, backend: str) -> RemoteOpenAIServer:
     args = []
     if backend == "pytorch":
         args.extend(["--backend", f"{backend}"])
-    args.extend(["--max_beam_width", "2"])
+    max_beam_width = 1 if backend == "pytorch" else 2
+    args.extend(["--max_beam_width", str(max_beam_width)])
     args.extend(["--max_batch_size", "2", "--max_seq_len", "1024"])
     args.extend(["--reasoning_parser", "deepseek-r1"])
     with RemoteOpenAIServer(model_path, args) as remote_server: