Skip to content

Commit 793766c

Browse files
huydhnProExpertProg
authored andcommitted
Update PyTorch to 2.9.0+cu129 (vllm-project#24994)
Co-authored-by: Luka Govedič <[email protected]>
1 parent 50de065 commit 793766c

File tree

16 files changed

+68
-67
lines changed

16 files changed

+68
-67
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ steps:
172172
- tests/v1/engine/test_engine_core_client.py
173173
- tests/distributed/test_symm_mem_allreduce.py
174174
commands:
175+
# https://github.com/NVIDIA/nccl/issues/1838
176+
- export NCCL_CUMEM_HOST_ENABLE=0
175177
# test with torchrun tp=2 and external_dp=2
176178
- torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
177179
# test with torchrun tp=2 and pp=2
@@ -349,7 +351,8 @@ steps:
349351
- python3 offline_inference/basic/embed.py
350352
- python3 offline_inference/basic/score.py
351353
- python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
352-
- python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048
354+
# https://github.com/vllm-project/vllm/pull/26682 uses slightly more memory in PyTorch 2.9+ causing this test to OOM in 1xL4 GPU
355+
- python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 1536
353356

354357
- label: Platform Tests (CUDA) # 4min
355358
timeout_in_minutes: 15
@@ -534,7 +537,7 @@ steps:
534537
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
535538
# we can only upgrade after this is resolved
536539
# TODO(jerryzh168): resolve the above comment
537-
- uv pip install --system torchao==0.13.0
540+
- uv pip install --system torchao==0.13.0 --index-url https://download.pytorch.org/whl/cu129
538541
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
539542

540543
- label: LM Eval Small Models # 53min
@@ -975,6 +978,8 @@ steps:
975978
- tests/v1/shutdown
976979
- tests/v1/worker/test_worker_memory_snapshot.py
977980
commands:
981+
# https://github.com/NVIDIA/nccl/issues/1838
982+
- export NCCL_CUMEM_HOST_ENABLE=0
978983
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
979984
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
980985
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ repos:
3838
rev: 0.9.1
3939
hooks:
4040
- id: pip-compile
41-
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128, --python-platform, x86_64-manylinux_2_28]
41+
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu129, --python-platform, x86_64-manylinux_2_28]
4242
files: ^requirements/test\.(in|txt)$
4343
- repo: local
4444
hooks:

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
4949
# requirements.txt files and should be kept consistent. The ROCm torch
5050
# versions are derived from docker/Dockerfile.rocm
5151
#
52-
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
53-
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
52+
set(TORCH_SUPPORTED_VERSION_CUDA "2.9.0")
53+
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.0")
5454

5555
#
5656
# Try to find python package with an executable that exactly matches

docker/Dockerfile

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# docs/contributing/dockerfile/dockerfile.md and
66
# docs/assets/contributing/dockerfile-stages-dependency.png
77

8-
ARG CUDA_VERSION=12.8.1
8+
ARG CUDA_VERSION=12.9.1
99
ARG PYTHON_VERSION=3.12
1010

1111
# By parameterizing the base images, we allow third-party to use their own
@@ -275,6 +275,7 @@ WORKDIR /vllm-workspace
275275
ENV DEBIAN_FRONTEND=noninteractive
276276
ARG TARGETPLATFORM
277277

278+
# TODO (huydhn): There is no prebuilt gdrcopy package on 12.9 at the moment
278279
ARG GDRCOPY_CUDA_VERSION=12.8
279280
# Keep in line with FINAL_BASE_IMAGE
280281
ARG GDRCOPY_OS_VERSION=Ubuntu22_04
@@ -360,6 +361,13 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
360361
&& uv pip install --system dist/*.whl --verbose \
361362
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
362363

364+
# TODO (huydhn): Remove this once xformers is released for 2.9.0
365+
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
366+
. /etc/environment
367+
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
368+
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/[email protected]"
369+
BASH
370+
363371
# Install FlashInfer pre-compiled kernel cache and binaries
364372
# https://docs.flashinfer.ai/installation.html
365373
RUN --mount=type=cache,target=/root/.cache/uv \
@@ -426,6 +434,7 @@ ARG PYTHON_VERSION
426434

427435
ARG PIP_INDEX_URL UV_INDEX_URL
428436
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
437+
ARG PYTORCH_CUDA_INDEX_BASE_URL
429438

430439
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
431440
# Reference: https://github.com/astral-sh/uv/pull/1694
@@ -438,7 +447,8 @@ ENV UV_LINK_MODE=copy
438447
RUN --mount=type=cache,target=/root/.cache/uv \
439448
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
440449
if [ "$CUDA_MAJOR" -ge 12 ]; then \
441-
uv pip install --system -r requirements/dev.txt; \
450+
uv pip install --system -r requirements/dev.txt \
451+
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
442452
fi
443453

444454
# install development dependencies (for testing)

docker/Dockerfile.cpu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,13 @@ FROM base AS vllm-test-deps
199199

200200
WORKDIR /workspace/vllm
201201

202+
# TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version
202203
RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
203204
cp requirements/test.in requirements/cpu-test.in && \
204205
sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
206+
sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \
207+
sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
208+
sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
205209
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
206210

207211
RUN --mount=type=cache,target=/root/.cache/uv \
-126 Bytes
Loading

docs/contributing/ci/update_pytorch_version.md

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ is ineffective.
8787
8888
While ongoing efforts like <https://github.com/vllm-project/vllm/issues/17419>
8989
address the long build time at its source, the current workaround is to set `VLLM_CI_BRANCH`
90-
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/use_postmerge_q`)
90+
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/long_build`)
9191
when manually triggering a build on Buildkite. This branch accomplishes two things:
9292
9393
1. Increase the timeout limit to 10 hours so that the build doesn't time out.
@@ -100,35 +100,17 @@ to warm it up so that future builds are faster.
100100

101101
## Update dependencies
102102

103-
Several vLLM dependencies, such as FlashInfer, also depend on PyTorch and need
103+
Several vLLM dependencies like xFormers depend on PyTorch and need
104104
to be updated accordingly. Rather than waiting for all of them to publish new
105105
releases (which would take too much time), they can be built from
106106
source to unblock the update process.
107107

108-
### FlashInfer
109-
110-
Here is how to build and install it from source with `torch2.7.0+cu128` in vLLM [Dockerfile](https://github.com/vllm-project/vllm/blob/27bebcd89792d5c4b08af7a65095759526f2f9e1/docker/Dockerfile#L259-L271):
111-
112-
```bash
113-
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX'
114-
export FLASHINFER_ENABLE_SM90=1
115-
uv pip install --system \
116-
--no-build-isolation "git+https://github.com/flashinfer-ai/[email protected]"
117-
```
118-
119-
One caveat is that building FlashInfer from source adds approximately 30
120-
minutes to the vLLM build time. Therefore, it's preferable to cache the wheel in a
121-
public location for immediate installation, such as [this FlashInfer wheel link](https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl). For future releases, contact the PyTorch release
122-
team if you want to get the package published there.
123-
124108
### xFormers
125109

126-
Similar to FlashInfer, here is how to build and install xFormers from source:
127-
128110
```bash
129-
export TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
111+
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
130112
MAX_JOBS=16 uv pip install --system \
131-
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].30"
113+
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].32.post2"
132114
```
133115

134116
## Update all the different vLLM platforms

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ requires = [
66
"packaging>=24.2",
77
"setuptools>=77.0.3,<80.0.0",
88
"setuptools-scm>=8.0",
9-
"torch == 2.8.0",
9+
"torch == 2.9.0",
1010
"wheel",
1111
"jinja2",
1212
]

requirements/build.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ninja
44
packaging>=24.2
55
setuptools>=77.0.3,<80.0.0
66
setuptools-scm>=8
7-
torch==2.8.0
7+
torch==2.9.0
88
wheel
99
jinja2>=3.1.6
1010
regex

requirements/cuda.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ numba == 0.61.2 # Required for N-gram speculative decoding
55

66
# Dependencies for NVIDIA GPUs
77
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
8-
torch==2.8.0
9-
torchaudio==2.8.0
8+
torch==2.9.0
9+
torchaudio==2.9.0
1010
# These must be updated alongside torch
11-
torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
11+
torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
1212
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
13-
xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
13+
# xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
1414
# FlashInfer should be updated together with the Dockerfile
15-
flashinfer-python==0.4.1
15+
flashinfer-python==0.4.1

0 commit comments

Comments
 (0)