Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
e738cba
Update PyTorch to 2.9.0
huydhn Sep 16, 2025
497cffd
Add a comment
huydhn Sep 16, 2025
89ba43a
Not setting --extra-index-url in test.in
huydhn Sep 16, 2025
157aae3
Use https://download.pytorch.org/whl/test
huydhn Sep 17, 2025
ea1ef7a
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 17, 2025
0a81fb2
Put torchao back to the same state
huydhn Sep 17, 2025
0325966
Install the latest torchao nightly for quantization test
huydhn Sep 17, 2025
39b9cbf
Debug distributed failures
huydhn Sep 17, 2025
0272040
Wrong torchao package
huydhn Sep 18, 2025
c2e0eaf
Attempt the fix in https://github.com/NVIDIA/nccl/issues/1838
huydhn Sep 18, 2025
c16db74
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 23, 2025
d3436a8
Set inductor_graph_partition to True by default
huydhn Sep 23, 2025
0e581a3
Rerun with RC3
huydhn Sep 23, 2025
84c6cc3
Rerun with RC4
huydhn Sep 24, 2025
3637adb
Merge branch 'main' into pytorch-2.9.0
huydhn Sep 30, 2025
23c6427
Build CPU docker image
huydhn Sep 30, 2025
ba8a85f
Leave CPU for later
huydhn Sep 30, 2025
ec7b5c4
CPU build should work now
huydhn Sep 30, 2025
869d13e
Rebuild flashinfer-python for 2.9.0
huydhn Oct 1, 2025
a670c2e
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 1, 2025
145e225
Fix precommit
huydhn Oct 1, 2025
9cd7683
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 2, 2025
47ae5d8
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 2, 2025
e7064b4
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 3, 2025
106bd40
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 5, 2025
76e438d
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 8, 2025
ebaa419
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 9, 2025
b4ed78c
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 11, 2025
5d50c59
Skip some test unless it's B200
huydhn Oct 11, 2025
210aa68
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 14, 2025
39cd1b2
Merge branch 'main' into pytorch-2.9.0
huydhn Oct 15, 2025
2b337fc
Merge remote-tracking branch 'upstream/main' into pytorch-2.9.0
ProExpertProg Oct 15, 2025
737ea15
Fix test.txt
ProExpertProg Oct 14, 2025
7108bd6
Enable `use_inductor_graph_partition` by default in >=2.9
ProExpertProg Oct 2, 2025
4b39e6f
Turn standalone compile back on
ProExpertProg Oct 4, 2025
9327eb5
PR #26735: Squashed commit of the following:
angelayi Oct 13, 2025
725a571
PR #26878: Squashed commit of the following:
ProExpertProg Oct 15, 2025
125c888
[Graph Partition] pass tests for decorator (#26831)
ProExpertProg Oct 15, 2025
e811cb5
TEMP: disable nested torch compilation
ProExpertProg Oct 14, 2025
f1dcb6d
TEMP force spawn for tests
ProExpertProg Oct 14, 2025
4e2976b
TEMP: use spawn to circumvent CUDA init issue
ProExpertProg Oct 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ steps:
- tests/v1/engine/test_engine_core_client.py
- tests/distributed/test_symm_mem_allreduce.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
# test with torchrun tp=2 and external_dp=2
- torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
# test with torchrun tp=2 and pp=2
Expand Down Expand Up @@ -527,8 +529,7 @@ steps:
# since torchao nightly is only compatible with torch nightly currently
# https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now
# we can only upgrade after this is resolved
# TODO(jerryzh168): resolve the above comment
- uv pip install --system torchao==0.13.0
- pip install --pre torchao==0.15.0.dev20251014 --index-url https://download.pytorch.org/whl/nightly/cu128
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/

- label: LM Eval Small Models # 53min
Expand Down Expand Up @@ -944,6 +945,8 @@ steps:
- tests/v1/shutdown
- tests/v1/worker/test_worker_memory_snapshot.py
commands:
# https://github.com/NVIDIA/nccl/issues/1838
- export NCCL_CUMEM_HOST_ENABLE=0
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ repos:
rev: 0.9.1
hooks:
- id: pip-compile
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128, --python-platform, x86_64-manylinux_2_28]
args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --extra-index-url, https://download.pytorch.org/whl/test/cu128, --python-platform, x86_64-manylinux_2_28]
files: ^requirements/test\.(in|txt)$
- repo: local
hooks:
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.9.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.0")

#
# Try to find python package with an executable that exactly matches
Expand Down
13 changes: 11 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ ARG UV_INDEX_URL=${PIP_INDEX_URL}
ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}

# PyTorch provides its own indexes for standard and nightly builds
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl/test
ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly

# PIP supports multiple authentication schemes, including keyring
Expand Down Expand Up @@ -356,6 +356,13 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
uv pip install --system dist/*.whl --verbose \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')

# TODO (huydhn): Remove this once xformers is released for 2.9.0
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
. /etc/environment
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
uv pip install --system --no-build-isolation "git+https://github.com/facebookresearch/[email protected]"
BASH

# Install FlashInfer pre-compiled kernel cache and binaries
# https://docs.flashinfer.ai/installation.html
RUN --mount=type=cache,target=/root/.cache/uv \
Expand Down Expand Up @@ -422,6 +429,7 @@ ARG PYTHON_VERSION

ARG PIP_INDEX_URL UV_INDEX_URL
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
ARG PYTORCH_CUDA_INDEX_BASE_URL

# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
Expand All @@ -434,7 +442,8 @@ ENV UV_LINK_MODE=copy
RUN --mount=type=cache,target=/root/.cache/uv \
CUDA_MAJOR="${CUDA_VERSION%%.*}"; \
if [ "$CUDA_MAJOR" -ge 12 ]; then \
uv pip install --system -r requirements/dev.txt; \
uv pip install --system -r requirements/dev.txt \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.'); \
fi

# install development dependencies (for testing)
Expand Down
4 changes: 4 additions & 0 deletions docker/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,13 @@ FROM base AS vllm-test-deps

WORKDIR /workspace/vllm

# TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version
RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
cp requirements/test.in requirements/cpu-test.in && \
sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
sed -i 's/^torch==.*/torch==2.8.0/g' requirements/cpu-test.in && \
sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \
sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

RUN --mount=type=cache,target=/root/.cache/uv \
Expand Down
22 changes: 9 additions & 13 deletions docs/contributing/ci/update_pytorch_version.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ is ineffective.

While ongoing efforts like [#17419](gh-issue:17419)
address the long build time at its source, the current workaround is to set `VLLM_CI_BRANCH`
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/use_postmerge_q`)
to a custom branch provided by @khluu (`VLLM_CI_BRANCH=khluu/long_build`)
when manually triggering a build on Buildkite. This branch accomplishes two things:

1. Increase the timeout limit to 10 hours so that the build doesn't time out.
Expand All @@ -107,28 +107,24 @@ source to unblock the update process.

### FlashInfer

Here is how to build and install it from source with `torch2.7.0+cu128` in vLLM [Dockerfile](https://github.com/vllm-project/vllm/blob/27bebcd89792d5c4b08af7a65095759526f2f9e1/docker/Dockerfile#L259-L271):
After #25782, the pre-compiled FlashInfer wheel can be built using tools/flashinfer-build.sh
script. The new wheel can then be uploaded to [PyTorch test index](https://download.pytorch.org/whl/test/cu128/flashinfer_python-0.3.1-cp39-abi3-linux_x86_64.whl) and used during the update.

During PyTorch 2.9 update, using the old FlashInfer wheel built for
2.8 led to a crash with the following error:

```bash
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0 10.0+PTX'
export FLASHINFER_ENABLE_SM90=1
uv pip install --system \
--no-build-isolation "git+https://github.com/flashinfer-ai/[email protected]"
terminate called after throwing an instance of 'std::bad_array_new_length'
```

One caveat is that building FlashInfer from source adds approximately 30
minutes to the vLLM build time. Therefore, it's preferable to cache the wheel in a
public location for immediate installation, such as [this FlashInfer wheel link](https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl). For future releases, contact the PyTorch release
team if you want to get the package published there.

### xFormers

Similar to FlashInfer, here is how to build and install xFormers from source:

```bash
export TORCH_CUDA_ARCH_LIST='7.0 7.5 8.0 8.9 9.0 10.0+PTX'
export TORCH_CUDA_ARCH_LIST='7.5 8.0+PTX 9.0a'
MAX_JOBS=16 uv pip install --system \
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].30"
--no-build-isolation "git+https://github.com/facebookresearch/[email protected].32.post2"
```

## Update all the different vLLM platforms
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.8.0",
"torch == 2.9.0",
"wheel",
"jinja2",
]
Expand Down
2 changes: 1 addition & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
torch==2.8.0
torch==2.9.0
wheel
jinja2>=3.1.6
regex
Expand Down
10 changes: 5 additions & 5 deletions requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ numba == 0.61.2 # Required for N-gram speculative decoding

# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.8.0
torchaudio==2.8.0
torch==2.9.0
torchaudio==2.9.0
# These must be updated alongside torch
torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision==0.24.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
# xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8
# FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.4.0
flashinfer-python==0.4.0
8 changes: 4 additions & 4 deletions requirements/rocm-build.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Common dependencies
-r common.txt

--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.8.0
torchvision==0.23.0
torchaudio==2.8.0
--extra-index-url https://download.pytorch.org/whl/test/rocm6.3
torch==2.9.0
torchvision==0.24.0
torchaudio==2.9.0

triton==3.3.0
cmake>=3.26.1,<4
Expand Down
6 changes: 3 additions & 3 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ soundfile # required for audio tests
jiwer # required for audio tests
tblib # for pickling test exceptions
timm >=1.0.17 # required for internvl and gemma3n-mm test
torch==2.8.0
torchaudio==2.8.0
torchvision==0.23.0
torch==2.9.0
torchaudio==2.9.0
torchvision==0.24.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.5 # required for voxtral test
Expand Down
Loading
Loading