diff --git a/tensorrt_llm/commands/serve.py b/tensorrt_llm/commands/serve.py
index df96a1868ca..7de263ea89f 100644
--- a/tensorrt_llm/commands/serve.py
+++ b/tensorrt_llm/commands/serve.py
@@ -71,7 +71,7 @@ def _signal_handler_cleanup_child(signum, frame):
 
 def get_llm_args(model: str,
                  tokenizer: Optional[str] = None,
-                 backend: Optional[str] = None,
+                 backend: str = "pytorch",
                  max_beam_width: int = BuildConfig.max_beam_width,
                  max_batch_size: int = BuildConfig.max_batch_size,
                  max_num_tokens: int = BuildConfig.max_num_tokens,
@@ -165,8 +165,8 @@ def launch_server(host: str,
               help="Hostname of the server.")
 @click.option("--port", type=int, default=8000, help="Port of the server.")
 @click.option("--backend",
-              type=click.Choice(["pytorch"]),
-              default=None,
+              type=click.Choice(["pytorch", "trt"]),
+              default="pytorch",
               help="Set to 'pytorch' for pytorch path. Default is cpp path.")
 @click.option('--log_level',
               type=click.Choice(severity_map.keys()),
diff --git a/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1_trt_backend.yaml b/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1_trt_backend.yaml
index bde3132f8a1..388be9d4d66 100644
--- a/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1_trt_backend.yaml
+++ b/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1_trt_backend.yaml
@@ -2,6 +2,7 @@ hostname: localhost
 port: 8000
 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 free_gpu_memory_fraction: 0.25
+backend: "trt"
 context_servers:
   num_instances: 1
   tensor_parallel_size: 2
diff --git a/tests/integration/defs/disaggregated/test_configs/disagg_config_gen_only_trt_backend.yaml b/tests/integration/defs/disaggregated/test_configs/disagg_config_gen_only_trt_backend.yaml
index 386a8fba01f..6d9fc7d07fd 100644
--- a/tests/integration/defs/disaggregated/test_configs/disagg_config_gen_only_trt_backend.yaml
+++ b/tests/integration/defs/disaggregated/test_configs/disagg_config_gen_only_trt_backend.yaml
@@ -1,6 +1,7 @@
 hostname: localhost
 port: 8000
 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+backend: "trt"
 context_servers:
   num_instances: 0
 generation_servers:
diff --git a/tests/integration/defs/disaggregated/test_configs/disagg_config_trt_backend.yaml b/tests/integration/defs/disaggregated/test_configs/disagg_config_trt_backend.yaml
index fa57d987de4..885991c886c 100644
--- a/tests/integration/defs/disaggregated/test_configs/disagg_config_trt_backend.yaml
+++ b/tests/integration/defs/disaggregated/test_configs/disagg_config_trt_backend.yaml
@@ -2,6 +2,7 @@ hostname: localhost
 port: 8000
 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 free_gpu_memory_fraction: 0.25
+backend: "trt"
 context_servers:
   num_instances: 1
   tensor_parallel_size: 1
diff --git a/tests/integration/defs/stress_test/stress_test.py b/tests/integration/defs/stress_test/stress_test.py
index f0f85fe51e3..03456d8d5c5 100644
--- a/tests/integration/defs/stress_test/stress_test.py
+++ b/tests/integration/defs/stress_test/stress_test.py
@@ -364,12 +364,11 @@ def test_run_stress_test(config, stress_time_timeout, backend,
     """
     # Create a new ModelConfig with the backend parameter
     # Convert 'trt' to None as expected by the ModelConfig
-    backend_param = None if backend == "trt" else backend
 
     new_config = ModelConfig(model_dir=config.model_dir,
                              tp_size=config.tp_size,
                              memory_requirement=config.memory_requirement,
-                             backend=backend_param)
+                             backend=backend)
 
     # Extract stress_time and stress_timeout from the tuple
     stress_time, stress_timeout = stress_time_timeout
@@ -542,6 +541,8 @@ def stress_test(config,
         str(config.tp_size),
         "--pp_size",
         str(test_server_config.pp_size),
+        "--backend",
+        config.backend,
     ]
 
     # Only add ep_size parameter if it's not None
@@ -560,12 +561,6 @@ def stress_test(config,
         extra_llm_options_path,
     ])
 
-    # Add backend option only if specified
-    # backend = None means trt backend
-    # backend = pytorch means pytorch backend
-    if config.backend:
-        server_cmd.extend(["--backend", config.backend])
-
     # Log the command we're about to run
     print_info(f"Running command: {' '.join(server_cmd)}")
 
diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 1e8098330f4..d0674717e2e 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -1407,13 +1407,7 @@ def test_openai_completions_example(llm_root, llm_venv, backend: str):
 
 @pytest.mark.parametrize("backend", ["pytorch", "trt"])
 def test_openai_chat_example(llm_root, llm_venv, backend: str):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd([
         "-m", "pytest",
         str(test_root / "_test_openai_chat.py"), "-k", backend
@@ -1435,13 +1429,7 @@ def test_openai_lora(llm_root, llm_venv):
 
 
 def test_openai_chat_multimodal_example(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd(
         ["-m", "pytest",
          str(test_root / "_test_openai_chat_multimodal.py")])
@@ -1449,7 +1437,6 @@ def test_openai_chat_multimodal_example(llm_root, llm_venv):
 
 def test_openai_chat_structural_tag_example(llm_venv):
     test_root = unittest_path() / "llmapi" / "apps"
-
     llm_venv.run_cmd([
         "-m", "pytest",
         str(test_root / "_test_openai_chat_structural_tag.py")
@@ -1459,13 +1446,7 @@ def test_openai_chat_structural_tag_example(llm_venv):
 @pytest.mark.skip_less_device(2)
 @pytest.mark.skip_less_device_memory(40000)
 def test_openai_multi_chat_example(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd(
         ["-m", "pytest",
          str(test_root / "_test_openai_multi_chat.py")])
@@ -1475,13 +1456,7 @@ def test_openai_multi_chat_example(llm_root, llm_venv):
 @pytest.mark.skip_less_device(4)
 @pytest.mark.skip_less_device_memory(80000)
 def test_openai_consistent_chat(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd(
         ["-m", "pytest",
          str(test_root / "_test_openai_consistent_chat.py")])
@@ -1491,13 +1466,7 @@ def test_openai_consistent_chat(llm_root, llm_venv):
 @pytest.mark.skip_less_device(4)
 @pytest.mark.skip_less_device_memory(80000)
 def test_openai_multinodes_chat_tp16pp1(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd([
         "-m", "pytest", "-k", "tp16pp1",
         str(test_root / "_test_openai_multi_nodes.py")
@@ -1508,13 +1477,7 @@ def test_openai_multinodes_chat_tp16pp1(llm_root, llm_venv):
 @pytest.mark.skip_less_device(4)
 @pytest.mark.skip_less_device_memory(80000)
 def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd([
         "-m", "pytest", "-k", "tp8pp2",
         str(test_root / "_test_openai_multi_nodes.py")
@@ -1523,13 +1486,7 @@ def test_openai_multinodes_chat_tp8pp2(llm_root, llm_venv):
 
 @pytest.mark.skip_less_device_memory(80000)
 def test_trtllm_benchmark_serving(llm_root, llm_venv):
-    example_root = Path(os.path.join(llm_root, "examples", "apps"))
     test_root = unittest_path() / "llmapi" / "apps"
-    llm_venv.run_cmd([
-        "-m", "pip", "install", "-r",
-        os.path.join(example_root, "requirements.txt")
-    ])
-
     llm_venv.run_cmd(
         ["-m", "pytest",
          str(test_root / "_test_trtllm_serve_benchmark.py")])
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py
index aeea774e788..2306afe9456 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat.py
@@ -20,9 +20,7 @@ def model_name():
     return "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
 
 
-@pytest.fixture(scope="module",
-                params=[None, 'pytorch'],
-                ids=["trt", "pytorch"])
+@pytest.fixture(scope="module", params=["trt", "pytorch"])
 def backend(request):
     return request.param
 
@@ -67,10 +65,9 @@ def temp_extra_llm_api_options_file(request):
 def server(model_name: str, backend: str, extra_llm_api_options: bool,
            temp_extra_llm_api_options_file: str, num_postprocess_workers: int):
     model_path = get_model_path(model_name)
-    if backend == "pytorch":
-        args = ["--backend", f"{backend}"]
-    else:
-        args = ["--max_beam_width", "4"]
+    args = ["--backend", f"{backend}"]
+    if backend == "trt":
+        args.extend(["--max_beam_width", "4"])
     if extra_llm_api_options:
         args.extend(
             ["--extra_llm_api_options", temp_extra_llm_api_options_file])
diff --git a/tests/unittest/llmapi/apps/_test_openai_completions.py b/tests/unittest/llmapi/apps/_test_openai_completions.py
index 79b9b49a1a7..7beeff0179b 100644
--- a/tests/unittest/llmapi/apps/_test_openai_completions.py
+++ b/tests/unittest/llmapi/apps/_test_openai_completions.py
@@ -14,7 +14,7 @@ def model_name():
     return "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
 
 
-@pytest.fixture(scope="module", params=["trt", 'pytorch'])
+@pytest.fixture(scope="module", params=["trt", "pytorch"])
 def backend(request):
     return request.param
 
@@ -29,10 +29,9 @@ def num_postprocess_workers(request):
 @pytest.fixture(scope="module")
 def server(model_name: str, backend: str, num_postprocess_workers: int):
     model_path = get_model_path(model_name)
-    if backend == "pytorch":
-        args = ["--backend", f"{backend}"]
-    else:
-        args = ["--max_beam_width", "4"]
+    args = ["--backend", f"{backend}"]
+    if backend == "trt":
+        args.extend(["--max_beam_width", "4"])
     args.extend(["--num_postprocess_workers", f"{num_postprocess_workers}"])
     with RemoteOpenAIServer(model_path, args) as remote_server:
         yield remote_server
diff --git a/tests/unittest/llmapi/apps/_test_openai_metrics.py b/tests/unittest/llmapi/apps/_test_openai_metrics.py
index 9d207ae4e9a..25047eea1ea 100755
--- a/tests/unittest/llmapi/apps/_test_openai_metrics.py
+++ b/tests/unittest/llmapi/apps/_test_openai_metrics.py
@@ -21,7 +21,6 @@ def client():
     llm = PyTorchLLM(model=llama_model_path,
                      build_config=build_config,
                      kv_cache_config=KvCacheConfig(),
-                     backend="pytorch",
                      enable_iter_perf_stats=True)
     hf_tokenizer = AutoTokenizer.from_pretrained(llama_model_path)
 
diff --git a/tests/unittest/llmapi/apps/_test_openai_misc.py b/tests/unittest/llmapi/apps/_test_openai_misc.py
index 52c8ff98535..51e3d4f840c 100644
--- a/tests/unittest/llmapi/apps/_test_openai_misc.py
+++ b/tests/unittest/llmapi/apps/_test_openai_misc.py
@@ -15,17 +15,17 @@ def model_name():
     return "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
 
 
-@pytest.fixture(scope="module", params=["trt", 'pytorch'])
+@pytest.fixture(scope="module", params=["trt", "pytorch"])
 def backend(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=['8'])
+@pytest.fixture(scope="module", params=["8"])
 def max_batch_size(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=['80000'])
+@pytest.fixture(scope="module", params=["80000"])
 def max_seq_len(request):
     return request.param
 
@@ -34,19 +34,13 @@ def max_seq_len(request):
 def server(model_name: str, backend: str, max_batch_size: str,
            max_seq_len: str):
     model_path = get_model_path(model_name)
-    args = []
-    if backend == "pytorch":
-        args.append("--backend")
-        args.append(backend)
+    args = ["--backend", f"{backend}"]
     if backend != "pytorch":
-        args.append("--max_beam_width")
-        args.append("4")
+        args.extend(["--max_beam_width", "4"])
     if max_batch_size is not None:
-        args.append("--max_batch_size")
-        args.append(max_batch_size)
+        args.extend(["--max_batch_size", max_batch_size])
     if max_seq_len is not None:
-        args.append("--max_seq_len")
-        args.append(max_seq_len)
+        args.extend(["--max_seq_len", max_seq_len])
     with RemoteOpenAIServer(model_path, args) as remote_server:
         yield remote_server
 
diff --git a/tests/unittest/llmapi/apps/_test_openai_multi_gpu.py b/tests/unittest/llmapi/apps/_test_openai_multi_gpu.py
index cff9962bfa6..6ac65c42b25 100644
--- a/tests/unittest/llmapi/apps/_test_openai_multi_gpu.py
+++ b/tests/unittest/llmapi/apps/_test_openai_multi_gpu.py
@@ -15,9 +15,7 @@ def model_name():
     return "llama-models-v3/llama-v3-8b-instruct-hf"
 
 
-@pytest.fixture(scope="module",
-                params=[None, 'pytorch'],
-                ids=["trt", "pytorch"])
+@pytest.fixture(scope="module", params=["trt", "pytorch"])
 def backend(request):
     return request.param
 
@@ -55,13 +53,10 @@ def temp_extra_llm_api_options_file(request):
 def server(model_name: str, backend: str, extra_llm_api_options: bool,
            temp_extra_llm_api_options_file: str):
     model_path = get_model_path(model_name)
-    args = ["--tp_size", "2", "--max_beam_width", "1"]
-    if backend is not None:
-        args.append("--backend")
-        args.append(backend)
+    args = ["--tp_size", "2", "--max_beam_width", "1", "--backend", backend]
     if extra_llm_api_options:
-        args.append("--extra_llm_api_options")
-        args.append(temp_extra_llm_api_options_file)
+        args.extend(
+            ["--extra_llm_api_options", temp_extra_llm_api_options_file])
     with RemoteOpenAIServer(model_path, args) as remote_server:
         yield remote_server
 
@@ -95,7 +90,7 @@ def test_chat_tp2(client: openai.OpenAI, model_name: str):
     assert len(chat_completion.choices) == 1
     assert chat_completion.usage.completion_tokens == 1
     message = chat_completion.choices[0].message
-    assert message.content == 'Two'
+    assert message.content == "Two"
 
 
 @skip_single_gpu
diff --git a/tests/unittest/llmapi/apps/_test_openai_multi_nodes.py b/tests/unittest/llmapi/apps/_test_openai_multi_nodes.py
index eaea27597a9..7413745e51a 100644
--- a/tests/unittest/llmapi/apps/_test_openai_multi_nodes.py
+++ b/tests/unittest/llmapi/apps/_test_openai_multi_nodes.py
@@ -48,12 +48,17 @@ def server(model_name: str, backend: str, tp_pp_size: tuple):
     tp_size, pp_size = tp_pp_size
     device_count = torch.cuda.device_count()
     args = [
-        "--tp_size", f"{tp_size}", "--pp_size", f"{pp_size}", "--gpus_per_node",
-        f"{device_count}", "--kv_cache_free_gpu_memory_fraction", "0.95"
+        "--tp_size",
+        f"{tp_size}",
+        "--pp_size",
+        f"{pp_size}",
+        "--gpus_per_node",
+        f"{device_count}",
+        "--kv_cache_free_gpu_memory_fraction",
+        "0.95",
+        "--backend",
+        backend,
     ]
-    if backend is not None:
-        args.append("--backend")
-        args.append(backend)
     with RemoteOpenAIServer(model_path, args, llmapi_launch=True,
                             port=8001) as remote_server:
         yield remote_server
diff --git a/tests/unittest/llmapi/apps/_test_openai_reasoning.py b/tests/unittest/llmapi/apps/_test_openai_reasoning.py
index b20c365c3e0..d5cd7eb9eec 100644
--- a/tests/unittest/llmapi/apps/_test_openai_reasoning.py
+++ b/tests/unittest/llmapi/apps/_test_openai_reasoning.py
@@ -14,19 +14,15 @@ def model_name() -> str:
     return "DeepSeek-R1-Distill-Qwen-1.5B"
 
 
-@pytest.fixture(scope="module",
-                params=[None, 'pytorch'],
-                ids=["trt", "pytorch"])
+@pytest.fixture(scope="module", params=["trt", "pytorch"])
 def backend(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def server(model_name: str, backend: str) -> RemoteOpenAIServer:
+def server(model_name: str, backend: str):
     model_path = get_model_path(model_name)
-    args = []
-    if backend == "pytorch":
-        args.extend(["--backend", f"{backend}"])
+    args = ["--backend", f"{backend}"]
     max_beam_width = 1 if backend == "pytorch" else 2
     args.extend(["--max_beam_width", str(max_beam_width)])
     args.extend(["--max_batch_size", "2", "--max_seq_len", "1024"])
@@ -68,7 +64,7 @@ def test_reasoning_parser(client: openai.OpenAI, model_name: str, backend: str):
 
 
 @pytest.fixture(scope="module")
-def oning_client(server: RemoteOpenAIServer) -> openai.OpenAI:
+def async_client(server: RemoteOpenAIServer) -> openai.AsyncOpenAI:
     return server.get_async_client()
 
 
@@ -90,10 +86,10 @@ async def process_stream(
 
 
 @pytest.mark.asyncio(loop_scope="module")
-async def test_reasoning_parser_streaming(oning_client: openai.OpenAI,
-                                          model_name: str, backend: str):
+async def test_reasoning_parser_streaming(async_client: openai.AsyncOpenAI,
+                                          model_name: str):
     messages = [{"role": "user", "content": "hi"}]
-    stream = await oning_client.chat.completions.create(
+    stream = await async_client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_completion_tokens=1000,
@@ -106,7 +102,7 @@ async def test_reasoning_parser_streaming(oning_client: openai.OpenAI,
     assert len(content_chunks) > 0
     assert len(reasoning_content_chunks) > 0
 
-    stream = await oning_client.chat.completions.create(
+    stream = await async_client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_completion_tokens=1,