diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 92a1bcada387..53b5b23db3c2 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -1,21 +1,24 @@ steps: - # aarch64 + CUDA builds - - label: "Build arm64 wheel - CUDA 12.8" - id: build-wheel-arm64-cuda-12-8 + # aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9 + - label: "Build arm64 wheel - CUDA 12.9" + id: build-wheel-arm64-cuda-12-9 agents: queue: arm64_cpu_queue_postmerge commands: # #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here: # https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7 - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" env: DOCKER_BUILDKIT: "1" - # x86 + CUDA builds + - block: "Build CUDA 12.8 wheel" + key: block-build-cu128-wheel + - label: "Build wheel - CUDA 12.8" + depends_on: block-build-cu128-wheel id: build-wheel-cuda-12-8 agents: queue: cpu_queue_postmerge @@ -44,18 +47,14 @@ steps: env: DOCKER_BUILDKIT: "1" - # Note(simon): We can always build CUDA 11.8 wheel to ensure the build is working. - # However, this block can be uncommented to save some compute hours. - # - block: "Build CUDA 11.8 wheel" - # key: block-build-cu118-wheel - - - label: "Build wheel - CUDA 11.8" - # depends_on: block-build-cu118-wheel - id: build-wheel-cuda-11-8 + # x86 + CUDA builds + - label: "Build wheel - CUDA 12.9" + depends_on: ~ + id: build-wheel-cuda-12-9 agents: queue: cpu_queue_postmerge commands: - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ." - "mkdir artifacts" - "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'" - "bash .buildkite/scripts/upload-wheels.sh" @@ -75,6 +74,7 @@ steps: - "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT" + # PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9 - label: "Build release image (arm64)" depends_on: ~ id: build-release-image-arm64 @@ -82,7 +82,7 @@ steps: queue: arm64_cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" # Add job to create multi-arch manifest @@ -103,7 +103,7 @@ steps: - create-multi-arch-manifest - build-wheel-cuda-12-8 - build-wheel-cuda-12-6 - - build-wheel-cuda-11-8 + - build-wheel-cuda-12-9 id: annotate-release-workflow agents: queue: cpu_queue_postmerge diff --git a/.buildkite/scripts/upload-wheels.sh b/.buildkite/scripts/upload-wheels.sh index 745f285c008a..43aa8c47be29 100644 --- a/.buildkite/scripts/upload-wheels.sh +++ b/.buildkite/scripts/upload-wheels.sh @@ -58,14 +58,15 @@ python3 .buildkite/generate_index.py --wheel "$normal_wheel" aws s3 cp "$wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/" aws s3 cp "$normal_wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/" -if [[ $normal_wheel == *"cu118"* ]]; then - # if $normal_wheel matches cu118, do not upload the index.html - echo "Skipping index files for cu118 wheels" -elif [[ $normal_wheel == *"cu126"* ]]; then +if [[ $normal_wheel == *"cu126"* ]]; then # if $normal_wheel matches cu126, do not upload the index.html echo "Skipping index files for cu126 wheels" +elif [[ $normal_wheel == *"cu128"* ]]; then + # if $normal_wheel matches cu128, do not upload the index.html + echo "Skipping index files for cu128 wheels" else - # only upload index.html for cu128 wheels (default wheels) + # only upload index.html for cu129 wheels (default wheels) as it + # is available on both x86 and arm64 aws s3 cp index.html "s3://vllm-wheels/$BUILDKITE_COMMIT/vllm/index.html" aws s3 cp "s3://vllm-wheels/nightly/index.html" "s3://vllm-wheels/$BUILDKITE_COMMIT/index.html" fi @@ -74,14 +75,15 @@ fi aws s3 cp "$wheel" "s3://vllm-wheels/nightly/" aws s3 cp "$normal_wheel" "s3://vllm-wheels/nightly/" -if [[ $normal_wheel == *"cu118"* ]]; then - # if $normal_wheel matches cu118, do not upload the index.html - echo "Skipping index files for cu118 wheels" -elif [[ $normal_wheel == *"cu126"* ]]; then +if [[ $normal_wheel == *"cu126"* ]]; then # if $normal_wheel matches cu126, do not upload the index.html echo "Skipping index files for cu126 wheels" +elif [[ $normal_wheel == *"cu128"* ]]; then + # if $normal_wheel matches cu128, do not upload the index.html + echo "Skipping index files for cu128 wheels" else - # only upload index.html for cu128 wheels (default wheels) + # only upload index.html for cu129 wheels (default wheels) as it + # is available on both x86 and arm64 aws s3 cp index.html "s3://vllm-wheels/nightly/vllm/index.html" fi diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh index b125cda96f17..98427f1835ec 100755 --- a/tools/install_deepgemm.sh +++ b/tools/install_deepgemm.sh @@ -105,4 +105,4 @@ fi popd -echo "✅ DeepGEMM installation completed successfully" \ No newline at end of file +echo "✅ DeepGEMM installation completed successfully"