[fix][ci] move torch tests to run under torch stage (#5473)

omera-nv · web-flow · commit 6bae76d7cac3 · 2025-06-26T14:31:38.000+03:00
Signed-off-by: Omer Ullman Argov &lt;118735753+omera-nv@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
@@ -1371,34 +1371,46 @@ def test_trtllm_serve_multimodal_example(llm_root, llm_venv):
     ])
 
 
-def test_openai_misc_example(llm_root, llm_venv):
+@pytest.mark.parametrize("backend", ["pytorch", "trt"])
+def test_openai_misc_example(llm_root, llm_venv, backend: str):
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd(["-m", "pytest", str(test_root / "_test_openai_misc.py")])
+    llm_venv.run_cmd([
+        "-m", "pytest",
+        str(test_root / "_test_openai_misc.py"), "-k", backend
+    ])
 
 
-def test_openai_completions_example(llm_root, llm_venv):
+@pytest.mark.parametrize("backend", ["pytorch", "trt"])
+def test_openai_completions_example(llm_root, llm_venv, backend: str):
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd(
-        ["-m", "pytest",
-         str(test_root / "_test_openai_completions.py")])
+    llm_venv.run_cmd([
+        "-m", "pytest",
+        str(test_root / "_test_openai_completions.py"), "-k", backend
+    ])
 
 
-def test_openai_chat_example(llm_root, llm_venv):
+@pytest.mark.parametrize("backend", ["pytorch", "trt"])
+def test_openai_chat_example(llm_root, llm_venv, backend: str):
     example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
     llm_venv.run_cmd([
         "-m", "pip", "install", "-r",
         os.path.join(example_root, "requirements.txt")
     ])
 
-    llm_venv.run_cmd(["-m", "pytest", str(test_root / "_test_openai_chat.py")])
+    llm_venv.run_cmd([
+        "-m", "pytest",
+        str(test_root / "_test_openai_chat.py"), "-k", backend
+    ])
 
 
-def test_openai_reasoning(llm_root, llm_venv):
+@pytest.mark.parametrize("backend", ["pytorch", "trt"])
+def test_openai_reasoning(llm_root, llm_venv, backend: str):
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd(
-        ["-m", "pytest",
-         str(test_root / "_test_openai_reasoning.py")])
+    llm_venv.run_cmd([
+        "-m", "pytest",
+        str(test_root / "_test_openai_reasoning.py"), "-k", backend
+    ])
 
 
 def test_openai_chat_multimodal_example(llm_root, llm_venv):
diff --git a/tests/integration/test_lists/test-db/l0_a10.yml b/tests/integration/test_lists/test-db/l0_a10.yml
@@ -22,6 +22,12 @@ l0_a10:
   - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-pytorch-stress-test]
   - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-pytorch-stress-test]
   - test_e2e.py::test_openai_chat_structural_tag_example
+  - test_e2e.py::test_openai_chat_multimodal_example
+  - test_e2e.py::test_trtllm_serve_multimodal_example
+  - test_e2e.py::test_openai_misc_example[pytorch]
+  - test_e2e.py::test_openai_reasoning[pytorch]
+  - test_e2e.py::test_openai_completions_example[pytorch]
+  - test_e2e.py::test_openai_chat_example[pytorch]
 - condition:
     ranges:
       system_gpu_count:
@@ -72,12 +78,10 @@ l0_a10:
   - llmapi/test_llm_e2e.py::test_llmapi_exit
   - llmapi/test_llm_examples.py::test_llmapi_server_example
   - test_e2e.py::test_trtllm_serve_example
-  - test_e2e.py::test_trtllm_serve_multimodal_example
-  - test_e2e.py::test_openai_misc_example
-  - test_e2e.py::test_openai_completions_example
-  - test_e2e.py::test_openai_chat_example
-  - test_e2e.py::test_openai_reasoning
-  - test_e2e.py::test_openai_chat_multimodal_example
+  - test_e2e.py::test_openai_misc_example[trt]
+  - test_e2e.py::test_openai_completions_example[trt]
+  - test_e2e.py::test_openai_chat_example[trt]
+  - test_e2e.py::test_openai_reasoning[trt]
   - test_e2e.py::test_trtllm_bench_sanity[--non-streaming-FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
   - test_e2e.py::test_trtllm_bench_latency_sanity[FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
   - test_e2e.py::test_trtllm_bench_request_rate_and_concurrency[enable_concurrency-]
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -43,7 +43,8 @@ examples/test_multimodal.py::test_llm_multimodal_general[video-neva-pp:1-tp:1-bf
 examples/test_whisper.py::test_llm_whisper_general[large-v3-enable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_python_runtime] SKIP (https://nvbugs/4866931)
 examples/test_nemotron.py::test_llm_nemotron_3_8b_1gpu[bfloat16-fp8] SKIP (https://nvbugs/4961624)
 examples/test_mistral.py::test_llm_mistral_v1_1gpu[mistral-7b-v0.1-float16-max_attention_window_size_4096-chunked_summarization_long] SKIP (https://nvbugs/5321371)
-test_e2e.py::test_openai_completions_example SKIP (https://nvbugspro.nvidia.com/bug/5004744)
+test_e2e.py::test_openai_completions_example[pytorch] SKIP (https://nvbugspro.nvidia.com/bug/5004744)
+test_e2e.py::test_openai_completions_example[trt] SKIP (https://nvbugspro.nvidia.com/bug/5004744)
 cpp/test_e2e.py::test_model[fp8-chatglm-90] SKIP (https://nvbugs/5034830)
 full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin] SKIP (Disable for Blackwell)
 full:B200_PCIe/examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin] SKIP (Disable for Blackwell)
diff --git a/tests/unittest/llmapi/apps/_test_openai_misc.py b/tests/unittest/llmapi/apps/_test_openai_misc.py
@@ -15,7 +15,7 @@ def model_name():
     return "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
 
 
-@pytest.fixture(scope="module", params=[None, 'pytorch'])
+@pytest.fixture(scope="module", params=["trt", 'pytorch'])
 def backend(request):
     return request.param
 
@@ -35,7 +35,7 @@ def server(model_name: str, backend: str, max_batch_size: str,
            max_seq_len: str):
     model_path = get_model_path(model_name)
     args = []
-    if backend is not None:
+    if backend == "pytorch":
         args.append("--backend")
         args.append(backend)
     if backend != "pytorch":