NVIDIA · chzblych · Jun 17, 2025 · Jun 16, 2025 · Jun 17, 2025 · Jun 17, 2025
@@ -43,8 +43,8 @@ Unit tests live under `tests/unittest/` and run during the merge-request pipelin
 `jenkins/L0_Test.groovy` maps stage names to these YAML files.  For A100 the mapping includes:
 
 ```groovy
-    "A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
-    "A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
+    "A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
+    "A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
 ```
 
 The array elements are: GPU type, YAML file (without extension), shard index, and total number of shards. Only tests with `stage: post_merge` from that YAML file are selected when a `Post-Merge` stage runs.
@@ -57,12 +57,12 @@ The array elements are: GPU type, YAML file (without extension), shard index, an
 
 ### Example
 
-`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`.  The corresponding Jenkins stages are `A100X-Triton-Python-[Post-Merge]-1` and `A100X-Triton-Python-[Post-Merge]-2` (two shards).
+`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`.  The corresponding Jenkins stages are `A100X-Triton-[Post-Merge]-1` and `A100X-Triton-[Post-Merge]-2` (two shards).
 
 To run the same tests on your pull request, comment:
 
 ```bash
-/bot run --stage-list "A100X-Triton-Python-[Post-Merge]-1,A100X-Triton-Python-[Post-Merge]-2"
+/bot run --stage-list "A100X-Triton-[Post-Merge]-1,A100X-Triton-[Post-Merge]-2"
 ```
 
 This executes the same tests that run post-merge for this hardware/backend.

@@ -19,6 +19,7 @@ LLM_DOCKER_IMAGE = env.dockerImage
 AGENT_IMAGE = env.dockerImage
 
 POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
+POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"
 
 // Literals for easier access.
 @Field
@@ -151,7 +152,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
         containerConfig = """
                   - name: trt-llm
                     image: ${image}
-                    command: ['sleep', ${POD_TIMEOUT_SECONDS}]
+                    command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}]
                     volumeMounts:
                     - name: sw-tensorrt-pvc
                       mountPath: "/mnt/sw-tensorrt-pvc"

@@ -193,15 +193,15 @@ def buildImage(config, imageKeyToTag)
     def postTag = config.postTag
     def dependent = config.dependent
     def arch = config.arch == 'arm64' ? 'sbsa' : 'x86_64'
-    def makefileStage = config.makefileStage
+    def dockerfileStage = config.dockerfileStage
 
     def tag = "${arch}-${target}-torch_${torchInstallType}${postTag}-${LLM_DEFAULT_TAG}"
 
     def dependentTag = tag.replace("${arch}-${target}-", "${arch}-${dependent.target}-")
 
-    def imageWithTag = "${IMAGE_NAME}/${makefileStage}:${tag}"
-    def dependentImageWithTag = "${IMAGE_NAME}/${dependent.makefileStage}:${dependentTag}"
-    def customImageWithTag = "${IMAGE_NAME}/${makefileStage}:${customTag}"
+    def imageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${tag}"
+    def dependentImageWithTag = "${IMAGE_NAME}/${dependent.dockerfileStage}:${dependentTag}"
+    def customImageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${customTag}"
 
     if (target == "ngc-release") {
         if (params.triggerType == "post-merge") {
@@ -261,7 +261,7 @@ def buildImage(config, imageKeyToTag)
                     cd ${LLM_ROOT} && make -C docker ${dependent.target}_${action} \
                     TORCH_INSTALL_TYPE=${torchInstallType} \
                     IMAGE_WITH_TAG=${dependentImageWithTag} \
-                    STAGE=${dependent.makefileStage} \
+                    STAGE=${dependent.dockerfileStage} \
                     BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
                     """
                 }
@@ -286,7 +286,7 @@ def buildImage(config, imageKeyToTag)
                 cd ${LLM_ROOT} && make -C docker ${target}_${action} \
                 TORCH_INSTALL_TYPE=${torchInstallType} \
                 IMAGE_WITH_TAG=${imageWithTag} \
-                STAGE=${makefileStage} \
+                STAGE=${dockerfileStage} \
                 BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
                 """
             }
@@ -298,7 +298,7 @@ def buildImage(config, imageKeyToTag)
                 cd ${LLM_ROOT} && make -C docker ${target}_${action} \
                 TORCH_INSTALL_TYPE=${torchInstallType} \
                 IMAGE_WITH_TAG=${customImageWithTag} \
-                STAGE=${makefileStage} \
+                STAGE=${dockerfileStage} \
                 BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
                 """
             }
@@ -330,7 +330,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
         arch: "amd64",
         build_wheel: false,
         dependent: [:],
-        makefileStage: "tritondevel",
+        dockerfileStage: "tritondevel",
     ]
 
     def release_action = params.action
@@ -340,15 +340,15 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
             action: release_action,
             customTag: LLM_BRANCH_TAG + "-x86_64",
             build_wheel: true,
-            makefileStage: "release",
+            dockerfileStage: "release",
         ],
         "Build trtllm release (SBSA)": [
             target: "trtllm",
             action: release_action,
             customTag: LLM_BRANCH_TAG + "-sbsa",
             build_wheel: true,
             arch: "arm64",
-            makefileStage: "release",
+            dockerfileStage: "release",
         ],
         "Build CI image (x86_64 tritondevel)": [:],
         "Build CI image (SBSA tritondevel)": [
@@ -359,7 +359,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
             args: "PYTHON_VERSION=3.10.12",
             postTag: "-py310",
         ],
-        "Build CI image(RockyLinux8 Python312)": [
+        "Build CI image (RockyLinux8 Python312)": [
             target: "rockylinux8",
             args: "PYTHON_VERSION=3.12.3",
             postTag: "-py312",
@@ -371,21 +371,21 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
             build_wheel: true,
             dependent: [
                 target: "ngc-devel",
-                makefileStage: "devel",
+                dockerfileStage: "devel",
             ],
-            makefileStage: "release",
+            dockerfileStage: "release",
         ],
-        "Build NGC devel and release(SBSA)": [
+        "Build NGC devel and release (SBSA)": [
             target: "ngc-release",
             action: release_action,
             args: "DOCKER_BUILD_OPTS='--load --platform linux/arm64'",
             arch: "arm64",
             build_wheel: true,
             dependent: [
                 target: "ngc-devel",
-                makefileStage: "devel",
+                dockerfileStage: "devel",
             ],
-            makefileStage: "release",
+            dockerfileStage: "release",
         ],
     ]
     // Override all fields in build config with default values
@@ -423,7 +423,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
     }
 
     echo "enableFailFast is: ${params.enableFailFast}, but we currently don't use it due to random ucxx issue"
-    //pipeline.failFast = params.enableFailFast
+    // pipeline.failFast = params.enableFailFast
     pipeline.parallel buildJobs
 
 }
@@ -459,7 +459,7 @@ pipeline {
         PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
     }
     stages {
-        stage("Setup environment") {
+        stage("Setup Environment") {
             steps {
                 script {
                     echo "branch is: ${LLM_BRANCH}"
@@ -490,9 +490,7 @@ pipeline {
                     echo "imageKeyToTag is: ${imageKeyToTagJson}"
                     writeFile file: "imageKeyToTag.json", text: imageKeyToTagJson
                     archiveArtifacts artifacts: 'imageKeyToTag.json', fingerprint: true
-                    retry(3) {
-                        trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/")
-                    }
+                    trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/")
                 }
             }
         }

@@ -46,6 +46,7 @@ UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
 UBUNTU_24_04_IMAGE = "urm.nvidia.com/docker/ubuntu:24.04"
 
 POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
+POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"
 
 // Literals for easier access.
 @Field
@@ -412,7 +413,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
         containerConfig = """
                   - name: trt-llm
                     image: ${image}
-                    command: ['sleep', ${POD_TIMEOUT_SECONDS}]
+                    command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}]
                     volumeMounts:
                     - name: sw-tensorrt-pvc
                       mountPath: "/mnt/sw-tensorrt-pvc"
@@ -1564,16 +1565,16 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
         "A30-TensorRT-[Post-Merge]-5": ["a30", "l0_a30", 5, 6],
         "A30-TensorRT-[Post-Merge]-6": ["a30", "l0_a30", 6, 6],
         "A30-CPP-[Post-Merge]-1": ["a30", "l0_a30", 1, 1],
-        "A30-Triton-Python-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
-        "A30-Triton-Python-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
+        "A30-Triton-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
+        "A30-Triton-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
         "A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 6],
         "A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 6],
         "A100X-TensorRT-[Post-Merge]-3": ["a100x", "l0_a100", 3, 6],
         "A100X-TensorRT-[Post-Merge]-4": ["a100x", "l0_a100", 4, 6],
         "A100X-TensorRT-[Post-Merge]-5": ["a100x", "l0_a100", 5, 6],
         "A100X-TensorRT-[Post-Merge]-6": ["a100x", "l0_a100", 6, 6],
-        "A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
-        "A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
+        "A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
+        "A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
         "L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 5],
         "L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 5],
         "L40S-TensorRT-[Post-Merge]-3": ["l40s", "l0_l40s", 3, 5],
@@ -1586,7 +1587,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
         "H100_PCIe-TensorRT-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 5],
         "H100_PCIe-TensorRT-[Post-Merge]-4": ["h100-cr", "l0_h100", 4, 5],
         "H100_PCIe-TensorRT-[Post-Merge]-5": ["h100-cr", "l0_h100", 5, 5],
-        "B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
+        "B200_PCIe-Triton-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
         "H100_PCIe-TensorRT-Perf-1": ["h100-cr", "l0_perf", 1, 1],
         "H100_PCIe-PyTorch-Perf-1": ["h100-cr", "l0_perf", 1, 1],
         "DGX_H200-8_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x8", "l0_dgx_h200", 1, 1, 8],
@@ -1630,9 +1631,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
     // Try to match what are being tested on x86 H100_PCIe.
     // The total machine time is scaled proportionally according to the number of each GPU.
     SBSATestConfigs = [
-        "GH200-1": ["gh200", "l0_gh200", 1, 2],
-        "GH200-2": ["gh200", "l0_gh200", 2, 2],
-        "GH200-[Post-Merge]": ["gh200", "l0_gh200", 1, 1],
+        "GH200-TensorRT-[Post-Merge]-1": ["gh200", "l0_gh200", 1, 2],
+        "GH200-TensorRT-[Post-Merge]-2": ["gh200", "l0_gh200", 2, 2],
     ]
     fullSet += SBSATestConfigs.keySet()
 

diff --git a/tests/integration/test_lists/test-db/l0_gh200.yml b/tests/integration/test_lists/test-db/l0_gh200.yml
@@ -11,7 +11,7 @@ l0_gh200:
       linux_distribution_name: ubuntu*
       cpu: aarch64
     terms:
-      stage: pre_merge
+      stage: post_merge
       backend: tensorrt
   tests:
   - unittest/trt/attention/test_gpt_attention.py -k "partition0"
@@ -21,26 +21,12 @@ l0_gh200:
   - unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
   - unittest/trt/model/test_gpt_e2e.py
   - unittest/bindings
-  - test_cache.py::test_cache_sanity # 1 sec
+  - test_cache.py::test_cache_sanity
   - unittest/llmapi/test_llm_quant.py
   - llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit
-- condition:
-    ranges:
-      system_gpu_count:
-        gte: 1
-        lte: 1
-    wildcards:
-      gpu:
-      - '*h200*'
-      linux_distribution_name: ubuntu*
-      cpu: aarch64
-    terms:
-      stage: post_merge
-      backend: tensorrt
-  tests:
   - unittest/test_model_runner_cpp.py
-  - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
-  - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins
+  - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype
+  - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype
   - examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]
   - examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]
   - unittest/trt/model/eagle