diff --git a/docs/source/reference/ci-overview.md b/docs/source/reference/ci-overview.md index 63a1a969fa9..9002ae6ab33 100644 --- a/docs/source/reference/ci-overview.md +++ b/docs/source/reference/ci-overview.md @@ -43,8 +43,8 @@ Unit tests live under `tests/unittest/` and run during the merge-request pipelin `jenkins/L0_Test.groovy` maps stage names to these YAML files. For A100 the mapping includes: ```groovy - "A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], - "A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], + "A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], + "A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], ``` The array elements are: GPU type, YAML file (without extension), shard index, and total number of shards. Only tests with `stage: post_merge` from that YAML file are selected when a `Post-Merge` stage runs. @@ -57,12 +57,12 @@ The array elements are: GPU type, YAML file (without extension), shard index, an ### Example -`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`. The corresponding Jenkins stages are `A100X-Triton-Python-[Post-Merge]-1` and `A100X-Triton-Python-[Post-Merge]-2` (two shards). +`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`. The corresponding Jenkins stages are `A100X-Triton-[Post-Merge]-1` and `A100X-Triton-[Post-Merge]-2` (two shards). To run the same tests on your pull request, comment: ```bash -/bot run --stage-list "A100X-Triton-Python-[Post-Merge]-1,A100X-Triton-Python-[Post-Merge]-2" +/bot run --stage-list "A100X-Triton-[Post-Merge]-1,A100X-Triton-[Post-Merge]-2" ``` This executes the same tests that run post-merge for this hardware/backend. diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index 6b8bafd1fc6..1c63eb63327 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -19,6 +19,7 @@ LLM_DOCKER_IMAGE = env.dockerImage AGENT_IMAGE = env.dockerImage POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600" +POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200" // Literals for easier access. @Field @@ -151,7 +152,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64") containerConfig = """ - name: trt-llm image: ${image} - command: ['sleep', ${POD_TIMEOUT_SECONDS}] + command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}] volumeMounts: - name: sw-tensorrt-pvc mountPath: "/mnt/sw-tensorrt-pvc" diff --git a/jenkins/BuildDockerImage.groovy b/jenkins/BuildDockerImage.groovy index 09361249ca9..0e82f45c789 100644 --- a/jenkins/BuildDockerImage.groovy +++ b/jenkins/BuildDockerImage.groovy @@ -193,15 +193,15 @@ def buildImage(config, imageKeyToTag) def postTag = config.postTag def dependent = config.dependent def arch = config.arch == 'arm64' ? 'sbsa' : 'x86_64' - def makefileStage = config.makefileStage + def dockerfileStage = config.dockerfileStage def tag = "${arch}-${target}-torch_${torchInstallType}${postTag}-${LLM_DEFAULT_TAG}" def dependentTag = tag.replace("${arch}-${target}-", "${arch}-${dependent.target}-") - def imageWithTag = "${IMAGE_NAME}/${makefileStage}:${tag}" - def dependentImageWithTag = "${IMAGE_NAME}/${dependent.makefileStage}:${dependentTag}" - def customImageWithTag = "${IMAGE_NAME}/${makefileStage}:${customTag}" + def imageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${tag}" + def dependentImageWithTag = "${IMAGE_NAME}/${dependent.dockerfileStage}:${dependentTag}" + def customImageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${customTag}" if (target == "ngc-release") { if (params.triggerType == "post-merge") { @@ -261,7 +261,7 @@ def buildImage(config, imageKeyToTag) cd ${LLM_ROOT} && make -C docker ${dependent.target}_${action} \ TORCH_INSTALL_TYPE=${torchInstallType} \ IMAGE_WITH_TAG=${dependentImageWithTag} \ - STAGE=${dependent.makefileStage} \ + STAGE=${dependent.dockerfileStage} \ BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} """ } @@ -286,7 +286,7 @@ def buildImage(config, imageKeyToTag) cd ${LLM_ROOT} && make -C docker ${target}_${action} \ TORCH_INSTALL_TYPE=${torchInstallType} \ IMAGE_WITH_TAG=${imageWithTag} \ - STAGE=${makefileStage} \ + STAGE=${dockerfileStage} \ BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} """ } @@ -298,7 +298,7 @@ def buildImage(config, imageKeyToTag) cd ${LLM_ROOT} && make -C docker ${target}_${action} \ TORCH_INSTALL_TYPE=${torchInstallType} \ IMAGE_WITH_TAG=${customImageWithTag} \ - STAGE=${makefileStage} \ + STAGE=${dockerfileStage} \ BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args} """ } @@ -330,7 +330,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { arch: "amd64", build_wheel: false, dependent: [:], - makefileStage: "tritondevel", + dockerfileStage: "tritondevel", ] def release_action = params.action @@ -340,7 +340,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { action: release_action, customTag: LLM_BRANCH_TAG + "-x86_64", build_wheel: true, - makefileStage: "release", + dockerfileStage: "release", ], "Build trtllm release (SBSA)": [ target: "trtllm", @@ -348,7 +348,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { customTag: LLM_BRANCH_TAG + "-sbsa", build_wheel: true, arch: "arm64", - makefileStage: "release", + dockerfileStage: "release", ], "Build CI image (x86_64 tritondevel)": [:], "Build CI image (SBSA tritondevel)": [ @@ -359,7 +359,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { args: "PYTHON_VERSION=3.10.12", postTag: "-py310", ], - "Build CI image(RockyLinux8 Python312)": [ + "Build CI image (RockyLinux8 Python312)": [ target: "rockylinux8", args: "PYTHON_VERSION=3.12.3", postTag: "-py312", @@ -371,11 +371,11 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { build_wheel: true, dependent: [ target: "ngc-devel", - makefileStage: "devel", + dockerfileStage: "devel", ], - makefileStage: "release", + dockerfileStage: "release", ], - "Build NGC devel and release(SBSA)": [ + "Build NGC devel and release (SBSA)": [ target: "ngc-release", action: release_action, args: "DOCKER_BUILD_OPTS='--load --platform linux/arm64'", @@ -383,9 +383,9 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { build_wheel: true, dependent: [ target: "ngc-devel", - makefileStage: "devel", + dockerfileStage: "devel", ], - makefileStage: "release", + dockerfileStage: "release", ], ] // Override all fields in build config with default values @@ -423,7 +423,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) { } echo "enableFailFast is: ${params.enableFailFast}, but we currently don't use it due to random ucxx issue" - //pipeline.failFast = params.enableFailFast + // pipeline.failFast = params.enableFailFast pipeline.parallel buildJobs } @@ -459,7 +459,7 @@ pipeline { PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple" } stages { - stage("Setup environment") { + stage("Setup Environment") { steps { script { echo "branch is: ${LLM_BRANCH}" @@ -490,9 +490,7 @@ pipeline { echo "imageKeyToTag is: ${imageKeyToTagJson}" writeFile file: "imageKeyToTag.json", text: imageKeyToTagJson archiveArtifacts artifacts: 'imageKeyToTag.json', fingerprint: true - retry(3) { - trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/") - } + trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/") } } } diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index 05c27315bc2..88f69957e9d 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -46,6 +46,7 @@ UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04" UBUNTU_24_04_IMAGE = "urm.nvidia.com/docker/ubuntu:24.04" POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600" +POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200" // Literals for easier access. @Field @@ -412,7 +413,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod containerConfig = """ - name: trt-llm image: ${image} - command: ['sleep', ${POD_TIMEOUT_SECONDS}] + command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}] volumeMounts: - name: sw-tensorrt-pvc mountPath: "/mnt/sw-tensorrt-pvc" @@ -1564,16 +1565,16 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) "A30-TensorRT-[Post-Merge]-5": ["a30", "l0_a30", 5, 6], "A30-TensorRT-[Post-Merge]-6": ["a30", "l0_a30", 6, 6], "A30-CPP-[Post-Merge]-1": ["a30", "l0_a30", 1, 1], - "A30-Triton-Python-[Post-Merge]-1": ["a30", "l0_a30", 1, 2], - "A30-Triton-Python-[Post-Merge]-2": ["a30", "l0_a30", 2, 2], + "A30-Triton-[Post-Merge]-1": ["a30", "l0_a30", 1, 2], + "A30-Triton-[Post-Merge]-2": ["a30", "l0_a30", 2, 2], "A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 6], "A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 6], "A100X-TensorRT-[Post-Merge]-3": ["a100x", "l0_a100", 3, 6], "A100X-TensorRT-[Post-Merge]-4": ["a100x", "l0_a100", 4, 6], "A100X-TensorRT-[Post-Merge]-5": ["a100x", "l0_a100", 5, 6], "A100X-TensorRT-[Post-Merge]-6": ["a100x", "l0_a100", 6, 6], - "A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], - "A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], + "A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2], + "A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2], "L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 5], "L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 5], "L40S-TensorRT-[Post-Merge]-3": ["l40s", "l0_l40s", 3, 5], @@ -1586,7 +1587,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) "H100_PCIe-TensorRT-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 5], "H100_PCIe-TensorRT-[Post-Merge]-4": ["h100-cr", "l0_h100", 4, 5], "H100_PCIe-TensorRT-[Post-Merge]-5": ["h100-cr", "l0_h100", 5, 5], - "B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1], + "B200_PCIe-Triton-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1], "H100_PCIe-TensorRT-Perf-1": ["h100-cr", "l0_perf", 1, 1], "H100_PCIe-PyTorch-Perf-1": ["h100-cr", "l0_perf", 1, 1], "DGX_H200-8_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x8", "l0_dgx_h200", 1, 1, 8], @@ -1630,9 +1631,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null) // Try to match what are being tested on x86 H100_PCIe. // The total machine time is scaled proportionally according to the number of each GPU. SBSATestConfigs = [ - "GH200-1": ["gh200", "l0_gh200", 1, 2], - "GH200-2": ["gh200", "l0_gh200", 2, 2], - "GH200-[Post-Merge]": ["gh200", "l0_gh200", 1, 1], + "GH200-TensorRT-[Post-Merge]-1": ["gh200", "l0_gh200", 1, 2], + "GH200-TensorRT-[Post-Merge]-2": ["gh200", "l0_gh200", 2, 2], ] fullSet += SBSATestConfigs.keySet() diff --git a/tests/integration/test_lists/test-db/l0_gh200.yml b/tests/integration/test_lists/test-db/l0_gh200.yml index 88f8d3ef6af..9ac7d3129d4 100644 --- a/tests/integration/test_lists/test-db/l0_gh200.yml +++ b/tests/integration/test_lists/test-db/l0_gh200.yml @@ -11,7 +11,7 @@ l0_gh200: linux_distribution_name: ubuntu* cpu: aarch64 terms: - stage: pre_merge + stage: post_merge backend: tensorrt tests: - unittest/trt/attention/test_gpt_attention.py -k "partition0" @@ -21,26 +21,12 @@ l0_gh200: - unittest/trt/attention/test_gpt_attention.py -k "xqa_generic" - unittest/trt/model/test_gpt_e2e.py - unittest/bindings - - test_cache.py::test_cache_sanity # 1 sec + - test_cache.py::test_cache_sanity - unittest/llmapi/test_llm_quant.py - llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit -- condition: - ranges: - system_gpu_count: - gte: 1 - lte: 1 - wildcards: - gpu: - - '*h200*' - linux_distribution_name: ubuntu* - cpu: aarch64 - terms: - stage: post_merge - backend: tensorrt - tests: - unittest/test_model_runner_cpp.py - - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins - - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins + - accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype + - accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype - examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] - examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1] - unittest/trt/model/eagle