@@ -12,6 +12,7 @@ withCredentials([string(credentialsId: 'default-llm-repo', variable: 'DEFAULT_LL
12
12
LLM_REPO = env. gitlabSourceRepoHttpUrl ? env. gitlabSourceRepoHttpUrl : " ${ DEFAULT_LLM_REPO} "
13
13
}
14
14
15
+ ARTIFACT_PATH = env. artifactPath ? env. artifactPath : " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ BUILD_NUMBER} "
15
16
UPLOAD_PATH = env. uploadPath ? env. uploadPath : " sw-tensorrt-generic/llm-artifacts/${ JOB_NAME} /${ BUILD_NUMBER} "
16
17
17
18
LLM_ROOT = " llm"
@@ -25,6 +26,8 @@ LLM_SHORT_COMMIT = env.gitlabCommit ? env.gitlabCommit.substring(0, 7) : "undefi
25
26
26
27
LLM_DEFAULT_TAG = env. defaultTag ?: " ${ LLM_SHORT_COMMIT} -${ LLM_BRANCH_TAG} -${ BUILD_NUMBER} "
27
28
29
+ RUN_SANITY_CHECK = params. runSanityCheck ?: false
30
+
28
31
BUILD_JOBS = " 32"
29
32
BUILD_JOBS_RELEASE_X86_64 = " 32"
30
33
BUILD_JOBS_RELEASE_SBSA = " 32"
@@ -37,10 +40,13 @@ def GITHUB_PR_API_URL = "github_pr_api_url"
37
40
def CACHED_CHANGED_FILE_LIST = " cached_changed_file_list"
38
41
@Field
39
42
def ACTION_INFO = " action_info"
43
+ @Field
44
+ def IMAGE_KEY_TO_TAG = " image_key_to_tag"
40
45
def globalVars = [
41
46
(GITHUB_PR_API_URL ): null ,
42
47
(CACHED_CHANGED_FILE_LIST ): null ,
43
48
(ACTION_INFO ): null ,
49
+ (IMAGE_KEY_TO_TAG ): [:],
44
50
]
45
51
46
52
@Field
@@ -203,15 +209,11 @@ def buildImage(config, imageKeyToTag)
203
209
def dependentImageWithTag = " ${ IMAGE_NAME} /${ dependent.dockerfileStage} :${ dependentTag} "
204
210
def customImageWithTag = " ${ IMAGE_NAME} /${ dockerfileStage} :${ customTag} "
205
211
206
- if (target == " ngc-release" ) {
207
- if (params. triggerType == " post-merge" ) {
208
- echo " Use NGC artifacts for post merge build"
209
- dependentImageWithTag = " ${ NGC_IMAGE_NAME} :${ dependentTag} "
210
- imageWithTag = " ${ NGC_IMAGE_NAME} :${ tag} "
211
- customImageWithTag = " ${ NGC_IMAGE_NAME} :${ customTag} "
212
- }
213
- imageKeyToTag[" NGC Devel Image ${ config.arch} " ] = dependentImageWithTag
214
- imageKeyToTag[" NGC Release Image ${ config.arch} " ] = imageWithTag
212
+ if (target == " ngc-release" && params. triggerType == " post-merge" ) {
213
+ echo " Use NGC artifacts for post merge build"
214
+ dependentImageWithTag = " ${ NGC_IMAGE_NAME} :${ dependentTag} "
215
+ imageWithTag = " ${ NGC_IMAGE_NAME} :${ tag} "
216
+ customImageWithTag = " ${ NGC_IMAGE_NAME} :${ customTag} "
215
217
}
216
218
217
219
args + = " GITHUB_MIRROR=https://urm.nvidia.com/artifactory/github-go-remote"
@@ -266,6 +268,9 @@ def buildImage(config, imageKeyToTag)
266
268
"""
267
269
}
268
270
args + = " DEVEL_IMAGE=${ dependentImageWithTag} "
271
+ if (target == " ngc-release" ) {
272
+ imageKeyToTag[" NGC Devel Image ${ config.arch} " ] = dependentImageWithTag
273
+ }
269
274
}
270
275
}
271
276
@@ -290,6 +295,9 @@ def buildImage(config, imageKeyToTag)
290
295
BUILD_WHEEL_OPTS='-j ${ build_jobs} ' ${ args}
291
296
"""
292
297
}
298
+ if (target == " ngc-release" ) {
299
+ imageKeyToTag[" NGC Release Image ${ config.arch} " ] = imageWithTag
300
+ }
293
301
}
294
302
295
303
if (customTag) {
@@ -429,6 +437,17 @@ def launchBuildJobs(pipeline, globalVars, imageKeyToTag) {
429
437
}
430
438
431
439
440
+ def getCommonParameters ()
441
+ {
442
+ return [
443
+ ' gitlabSourceRepoHttpUrl' : LLM_REPO ,
444
+ ' gitlabCommit' : env. gitlabCommit,
445
+ ' artifactPath' : ARTIFACT_PATH ,
446
+ ' uploadPath' : UPLOAD_PATH ,
447
+ ]
448
+ }
449
+
450
+
432
451
pipeline {
433
452
agent {
434
453
kubernetes createKubernetesPodConfig(" agent" )
@@ -494,7 +513,100 @@ pipeline {
494
513
}
495
514
}
496
515
}
497
- stage(" Register Images for Security Checks" ) {
516
+ stage(" Wait for Build Jobs Complete" ) {
517
+ when {
518
+ expression {
519
+ RUN_SANITY_CHECK
520
+ }
521
+ }
522
+ steps {
523
+ script {
524
+ container(" python3" ) {
525
+ // Install wget
526
+ trtllm_utils. llmExecStepWithRetry(this , script : " apt-get update && apt-get -y install wget" )
527
+
528
+ // Poll for build artifacts
529
+ def artifactBaseUrl = " https://urm.nvidia.com/artifactory/${ UPLOAD_PATH} /"
530
+ def requiredFiles = [
531
+ " TensorRT-LLM-GH200.tar.gz" ,
532
+ " TensorRT-LLM.tar.gz"
533
+ ]
534
+ def maxWaitMinutes = 60
535
+ def pollIntervalSeconds = 60
536
+
537
+ echo " Waiting for build artifacts..."
538
+ echo " Required files: ${ requiredFiles} "
539
+
540
+ def startTime = System . currentTimeMillis()
541
+ def maxWaitMs = maxWaitMinutes * 60 * 1000
542
+
543
+ while ((System . currentTimeMillis() - startTime) < maxWaitMs) {
544
+ def missingFiles = []
545
+
546
+ for (file in requiredFiles) {
547
+ def fileUrl = " ${ artifactBaseUrl}${ file} "
548
+ def exitCode = sh(
549
+ script : " wget --spider --quiet --timeout=30 --tries=1 '${ fileUrl} '" ,
550
+ returnStatus : true
551
+ )
552
+
553
+ if (exitCode != 0 ) {
554
+ missingFiles. add(file)
555
+ }
556
+ }
557
+
558
+ if (missingFiles. isEmpty()) {
559
+ echo " All build artifacts are ready!"
560
+ return
561
+ }
562
+
563
+ def elapsedMinutes = (System . currentTimeMillis() - startTime) / (60 * 1000 )
564
+ echo " Waiting... (${ elapsedMinutes.intValue()} minutes elapsed)"
565
+ echo " Missing files: ${ missingFiles} "
566
+ sleep(pollIntervalSeconds)
567
+ }
568
+
569
+ def elapsedMinutes = (System . currentTimeMillis() - startTime) / (60 * 1000 )
570
+ error " Timeout waiting for build artifacts (${ elapsedMinutes.intValue()} minutes)"
571
+ }
572
+ }
573
+ }
574
+ }
575
+ stage(" Sanity Check for NGC Images" ) {
576
+ when {
577
+ expression {
578
+ RUN_SANITY_CHECK
579
+ }
580
+ }
581
+ steps {
582
+ script {
583
+ globalVars[IMAGE_KEY_TO_TAG ] = imageKeyToTag
584
+ String globalVarsJson = writeJSON returnText : true , json : globalVars
585
+ def parameters = getCommonParameters()
586
+ parameters + = [
587
+ ' enableFailFast' : false ,
588
+ ' globalVars' : globalVarsJson,
589
+ ]
590
+
591
+ echo " Trigger BuildDockerImageSanityTest job, params: ${ parameters} "
592
+
593
+ def status = " "
594
+ def jobName = " /LLM/helpers/BuildDockerImageSanityTest"
595
+ def handle = build(
596
+ job : jobName,
597
+ parameters : trtllm_utils. toBuildParameters(parameters),
598
+ propagate : false ,
599
+ )
600
+ echo " Triggered job: ${ handle.absoluteUrl} "
601
+ status = handle. result
602
+
603
+ if (status != " SUCCESS" ) {
604
+ error " Downstream job did not succeed"
605
+ }
606
+ }
607
+ }
608
+ }
609
+ stage(" Register NGC Images for Security Checks" ) {
498
610
when {
499
611
expression {
500
612
return params. nspect_id && params. action == " push"
0 commit comments