NVIDIA · HuiGao-NV · Aug 26, 2025 · Jul 14, 2025
@@ -1,17 +1,11 @@
-from tensorrt_llm import BuildConfig, SamplingParams
-from tensorrt_llm._tensorrt_engine import LLM  # NOTE the change
+from tensorrt_llm import LLM, SamplingParams
 
 
 def main():
 
-    build_config = BuildConfig()
-    build_config.max_batch_size = 256
-    build_config.max_num_tokens = 1024
-
     # Model could accept HF model name, a path to local HF model,
     # or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
-    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-              build_config=build_config)
+    llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 
     # Sample prompts.
     prompts = [

@@ -2095,7 +2095,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
                         trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 uninstall -y tensorrt")
                         if (values[5] != DLFW_IMAGE) {
                             def ubuntu_version = key.contains("UB2404") ? "ubuntu2404" : "ubuntu2204"
-                            def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa"
+                            def platform = values[2] == X86_64_TRIPLE ? "x86_64" : "sbsa"
                             trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
                             trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
                             trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update")