Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/reference/ci-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ Unit tests live under `tests/unittest/` and run during the merge-request pipelin
`jenkins/L0_Test.groovy` maps stage names to these YAML files. For A100 the mapping includes:

```groovy
"A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
"A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
```

The array elements are: GPU type, YAML file (without extension), shard index, and total number of shards. Only tests with `stage: post_merge` from that YAML file are selected when a `Post-Merge` stage runs.
Expand All @@ -57,12 +57,12 @@ The array elements are: GPU type, YAML file (without extension), shard index, an

### Example

`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`. The corresponding Jenkins stages are `A100X-Triton-Python-[Post-Merge]-1` and `A100X-Triton-Python-[Post-Merge]-2` (two shards).
`triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]` appears in `l0_a100.yml` under `stage: post_merge` and `backend: triton`. The corresponding Jenkins stages are `A100X-Triton-[Post-Merge]-1` and `A100X-Triton-[Post-Merge]-2` (two shards).

To run the same tests on your pull request, comment:

```bash
/bot run --stage-list "A100X-Triton-Python-[Post-Merge]-1,A100X-Triton-Python-[Post-Merge]-2"
/bot run --stage-list "A100X-Triton-[Post-Merge]-1,A100X-Triton-[Post-Merge]-2"
```

This executes the same tests that run post-merge for this hardware/backend.
Expand Down
3 changes: 2 additions & 1 deletion jenkins/Build.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ LLM_DOCKER_IMAGE = env.dockerImage
AGENT_IMAGE = env.dockerImage

POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"

// Literals for easier access.
@Field
Expand Down Expand Up @@ -151,7 +152,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64")
containerConfig = """
- name: trt-llm
image: ${image}
command: ['sleep', ${POD_TIMEOUT_SECONDS}]
command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}]
volumeMounts:
- name: sw-tensorrt-pvc
mountPath: "/mnt/sw-tensorrt-pvc"
Expand Down
40 changes: 19 additions & 21 deletions jenkins/BuildDockerImage.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ def buildImage(config, imageKeyToTag)
def postTag = config.postTag
def dependent = config.dependent
def arch = config.arch == 'arm64' ? 'sbsa' : 'x86_64'
def makefileStage = config.makefileStage
def dockerfileStage = config.dockerfileStage

def tag = "${arch}-${target}-torch_${torchInstallType}${postTag}-${LLM_DEFAULT_TAG}"

def dependentTag = tag.replace("${arch}-${target}-", "${arch}-${dependent.target}-")

def imageWithTag = "${IMAGE_NAME}/${makefileStage}:${tag}"
def dependentImageWithTag = "${IMAGE_NAME}/${dependent.makefileStage}:${dependentTag}"
def customImageWithTag = "${IMAGE_NAME}/${makefileStage}:${customTag}"
def imageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${tag}"
def dependentImageWithTag = "${IMAGE_NAME}/${dependent.dockerfileStage}:${dependentTag}"
def customImageWithTag = "${IMAGE_NAME}/${dockerfileStage}:${customTag}"

if (target == "ngc-release") {
if (params.triggerType == "post-merge") {
Expand Down Expand Up @@ -261,7 +261,7 @@ def buildImage(config, imageKeyToTag)
cd ${LLM_ROOT} && make -C docker ${dependent.target}_${action} \
TORCH_INSTALL_TYPE=${torchInstallType} \
IMAGE_WITH_TAG=${dependentImageWithTag} \
STAGE=${dependent.makefileStage} \
STAGE=${dependent.dockerfileStage} \
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
"""
}
Expand All @@ -286,7 +286,7 @@ def buildImage(config, imageKeyToTag)
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
TORCH_INSTALL_TYPE=${torchInstallType} \
IMAGE_WITH_TAG=${imageWithTag} \
STAGE=${makefileStage} \
STAGE=${dockerfileStage} \
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
"""
}
Expand All @@ -298,7 +298,7 @@ def buildImage(config, imageKeyToTag)
cd ${LLM_ROOT} && make -C docker ${target}_${action} \
TORCH_INSTALL_TYPE=${torchInstallType} \
IMAGE_WITH_TAG=${customImageWithTag} \
STAGE=${makefileStage} \
STAGE=${dockerfileStage} \
BUILD_WHEEL_OPTS='-j ${build_jobs}' ${args}
"""
}
Expand Down Expand Up @@ -330,7 +330,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
arch: "amd64",
build_wheel: false,
dependent: [:],
makefileStage: "tritondevel",
dockerfileStage: "tritondevel",
]

def release_action = params.action
Expand All @@ -340,15 +340,15 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
action: release_action,
customTag: LLM_BRANCH_TAG + "-x86_64",
build_wheel: true,
makefileStage: "release",
dockerfileStage: "release",
],
"Build trtllm release (SBSA)": [
target: "trtllm",
action: release_action,
customTag: LLM_BRANCH_TAG + "-sbsa",
build_wheel: true,
arch: "arm64",
makefileStage: "release",
dockerfileStage: "release",
],
"Build CI image (x86_64 tritondevel)": [:],
"Build CI image (SBSA tritondevel)": [
Expand All @@ -359,7 +359,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
args: "PYTHON_VERSION=3.10.12",
postTag: "-py310",
],
"Build CI image(RockyLinux8 Python312)": [
"Build CI image (RockyLinux8 Python312)": [
target: "rockylinux8",
args: "PYTHON_VERSION=3.12.3",
postTag: "-py312",
Expand All @@ -371,21 +371,21 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
build_wheel: true,
dependent: [
target: "ngc-devel",
makefileStage: "devel",
dockerfileStage: "devel",
],
makefileStage: "release",
dockerfileStage: "release",
],
"Build NGC devel and release(SBSA)": [
"Build NGC devel and release (SBSA)": [
target: "ngc-release",
action: release_action,
args: "DOCKER_BUILD_OPTS='--load --platform linux/arm64'",
arch: "arm64",
build_wheel: true,
dependent: [
target: "ngc-devel",
makefileStage: "devel",
dockerfileStage: "devel",
],
makefileStage: "release",
dockerfileStage: "release",
],
]
// Override all fields in build config with default values
Expand Down Expand Up @@ -423,7 +423,7 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
}

echo "enableFailFast is: ${params.enableFailFast}, but we currently don't use it due to random ucxx issue"
//pipeline.failFast = params.enableFailFast
// pipeline.failFast = params.enableFailFast
pipeline.parallel buildJobs

}
Expand Down Expand Up @@ -459,7 +459,7 @@ pipeline {
PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
}
stages {
stage("Setup environment") {
stage("Setup Environment") {
steps {
script {
echo "branch is: ${LLM_BRANCH}"
Expand Down Expand Up @@ -490,9 +490,7 @@ pipeline {
echo "imageKeyToTag is: ${imageKeyToTagJson}"
writeFile file: "imageKeyToTag.json", text: imageKeyToTagJson
archiveArtifacts artifacts: 'imageKeyToTag.json', fingerprint: true
retry(3) {
trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/")
}
trtllm_utils.uploadArtifacts("imageKeyToTag.json", "${UPLOAD_PATH}/")
}
}
}
Expand Down
18 changes: 9 additions & 9 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
UBUNTU_24_04_IMAGE = "urm.nvidia.com/docker/ubuntu:24.04"

POD_TIMEOUT_SECONDS = env.podTimeoutSeconds ? env.podTimeoutSeconds : "21600"
POD_TIMEOUT_SECONDS_TMP = env.podTimeoutSeconds ? env.podTimeoutSeconds : "43200"

// Literals for easier access.
@Field
Expand Down Expand Up @@ -412,7 +413,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
containerConfig = """
- name: trt-llm
image: ${image}
command: ['sleep', ${POD_TIMEOUT_SECONDS}]
command: ['sleep', ${POD_TIMEOUT_SECONDS_TMP}]
volumeMounts:
- name: sw-tensorrt-pvc
mountPath: "/mnt/sw-tensorrt-pvc"
Expand Down Expand Up @@ -1564,16 +1565,16 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
"A30-TensorRT-[Post-Merge]-5": ["a30", "l0_a30", 5, 6],
"A30-TensorRT-[Post-Merge]-6": ["a30", "l0_a30", 6, 6],
"A30-CPP-[Post-Merge]-1": ["a30", "l0_a30", 1, 1],
"A30-Triton-Python-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
"A30-Triton-Python-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
"A30-Triton-[Post-Merge]-1": ["a30", "l0_a30", 1, 2],
"A30-Triton-[Post-Merge]-2": ["a30", "l0_a30", 2, 2],
"A100X-TensorRT-[Post-Merge]-1": ["a100x", "l0_a100", 1, 6],
"A100X-TensorRT-[Post-Merge]-2": ["a100x", "l0_a100", 2, 6],
"A100X-TensorRT-[Post-Merge]-3": ["a100x", "l0_a100", 3, 6],
"A100X-TensorRT-[Post-Merge]-4": ["a100x", "l0_a100", 4, 6],
"A100X-TensorRT-[Post-Merge]-5": ["a100x", "l0_a100", 5, 6],
"A100X-TensorRT-[Post-Merge]-6": ["a100x", "l0_a100", 6, 6],
"A100X-Triton-Python-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-Triton-Python-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
"A100X-Triton-[Post-Merge]-1": ["a100x", "l0_a100", 1, 2],
"A100X-Triton-[Post-Merge]-2": ["a100x", "l0_a100", 2, 2],
"L40S-TensorRT-[Post-Merge]-1": ["l40s", "l0_l40s", 1, 5],
"L40S-TensorRT-[Post-Merge]-2": ["l40s", "l0_l40s", 2, 5],
"L40S-TensorRT-[Post-Merge]-3": ["l40s", "l0_l40s", 3, 5],
Expand All @@ -1586,7 +1587,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
"H100_PCIe-TensorRT-[Post-Merge]-3": ["h100-cr", "l0_h100", 3, 5],
"H100_PCIe-TensorRT-[Post-Merge]-4": ["h100-cr", "l0_h100", 4, 5],
"H100_PCIe-TensorRT-[Post-Merge]-5": ["h100-cr", "l0_h100", 5, 5],
"B200_PCIe-Triton-Python-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
"B200_PCIe-Triton-[Post-Merge]-1": ["b100-ts2", "l0_b200", 1, 1],
"H100_PCIe-TensorRT-Perf-1": ["h100-cr", "l0_perf", 1, 1],
"H100_PCIe-PyTorch-Perf-1": ["h100-cr", "l0_perf", 1, 1],
"DGX_H200-8_GPUs-PyTorch-[Post-Merge]-1": ["dgx-h200-x8", "l0_dgx_h200", 1, 1, 8],
Expand Down Expand Up @@ -1630,9 +1631,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
// Try to match what are being tested on x86 H100_PCIe.
// The total machine time is scaled proportionally according to the number of each GPU.
SBSATestConfigs = [
"GH200-1": ["gh200", "l0_gh200", 1, 2],
"GH200-2": ["gh200", "l0_gh200", 2, 2],
"GH200-[Post-Merge]": ["gh200", "l0_gh200", 1, 1],
"GH200-TensorRT-[Post-Merge]-1": ["gh200", "l0_gh200", 1, 2],
"GH200-TensorRT-[Post-Merge]-2": ["gh200", "l0_gh200", 2, 2],
]
fullSet += SBSATestConfigs.keySet()

Expand Down
22 changes: 4 additions & 18 deletions tests/integration/test_lists/test-db/l0_gh200.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ l0_gh200:
linux_distribution_name: ubuntu*
cpu: aarch64
terms:
stage: pre_merge
stage: post_merge
backend: tensorrt
tests:
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
Expand All @@ -21,26 +21,12 @@ l0_gh200:
- unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
- unittest/trt/model/test_gpt_e2e.py
- unittest/bindings
- test_cache.py::test_cache_sanity # 1 sec
- test_cache.py::test_cache_sanity
- unittest/llmapi/test_llm_quant.py
- llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit
- condition:
ranges:
system_gpu_count:
gte: 1
lte: 1
wildcards:
gpu:
- '*h200*'
linux_distribution_name: ubuntu*
cpu: aarch64
terms:
stage: post_merge
backend: tensorrt
tests:
- unittest/test_model_runner_cpp.py
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype # 1.5 mins
- accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype # 1.5 mins
- accuracy/test_cli_flow.py::TestGptNext::test_auto_dtype
- accuracy/test_cli_flow.py::TestSantacoder::test_auto_dtype
- examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_py_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]
- examples/test_medusa.py::test_llm_medusa_with_qaunt_base_model_1gpu[fp8-use_cpp_session-medusa-vicuna-7b-v1.3-4-heads-float16-bs1]
- unittest/trt/model/eagle
Expand Down