Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/release_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ jobs:
--exclude libc10.so \
--exclude libc_sec.so \
--exclude "libascend*.so" \
--exclude "libtorch*.so"
--exclude "libtorch*.so" \
--exclude "liberror_manager.so"
done
rm -f dist/*.whl
mv dist/repaired/*.whl dist/
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
- Software:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1
* PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1.dev20250619
* PyTorch >= 2.7.1, torch-npu >= 2.7.1.dev20250724
* vLLM (the same version as vllm-ascend)

## Getting Started
Expand Down
2 changes: 1 addition & 1 deletion README.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
- 软件:
* Python >= 3.9, < 3.12
* CANN >= 8.2.rc1
* PyTorch >= 2.5.1, torch-npu >= 2.5.1.post1.dev20250619
* PyTorch >= 2.5.1, torch-npu >= 2.7.1.dev20250724
* vLLM (与vllm-ascend版本一致)

## 开始使用
Expand Down
4 changes: 2 additions & 2 deletions docs/source/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ This document describes how to install vllm-ascend manually.
| Software | Supported version | Note |
|---------------|----------------------------------|-------------------------------------------|
| CANN | >= 8.2.RC1 | Required for vllm-ascend and torch-npu |
| torch-npu | >= 2.5.1.post1.dev20250619 | Required for vllm-ascend, No need to install manually, it will be auto installed in below steps |
| torch | >= 2.5.1 | Required for torch-npu and vllm |
| torch-npu | >= 2.7.1.dev20250724 | Required for vllm-ascend, No need to install manually, it will be auto installed in below steps |
| torch | >= 2.7.1 | Required for torch-npu and vllm |

You have 2 way to install:
- **Using pip**: first prepare env manually or via CANN image, then install `vllm-ascend` using pip.
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ requires = [
"scipy",
"setuptools>=64",
"setuptools-scm>=8",
"torch-npu==2.5.1.post1.dev20250619",
"torch>=2.5.1",
"torchvision<0.21.0",
"torch-npu==2.7.1.dev20250724",
"torch>=2.7.1",
"torchvision",
"wheel",
"msgpack",
"quart",
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ pyyaml
scipy
setuptools>=64
setuptools-scm>=8
torch>=2.5.1
torchvision<0.21.0
torch>=2.7.1
torchvision
wheel
# Remove after https://github.com/vllm-project/vllm-ascend/issues/2034
transformers<4.54.0
Expand All @@ -26,4 +26,4 @@ numba
# Install torch_npu
--pre
--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
torch-npu==2.5.1.post1.dev20250619
torch-npu==2.7.1.dev20250724
3 changes: 0 additions & 3 deletions tests/e2e/singlecard/ops/test_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

Run `pytest tests/ops/test_fused_moe.py`.
"""
# fused moe ops test will hit the infer_schema error, we need add the patch
# here to make the test pass.
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa

from unittest.mock import MagicMock, patch

Expand Down
104 changes: 0 additions & 104 deletions tests/ut/patch/worker/patch_common/test_patch_utils.py

This file was deleted.

9 changes: 2 additions & 7 deletions tests/ut/test_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,13 +543,9 @@ def test_get_piecewise_backend_cls_returns_correct_value(self):

@patch("torch.distributed.is_hccl_available", return_value=True)
@patch("torch_npu._C._distributed_c10d.ProcessGroupHCCL")
@patch("torch_npu._C._distributed_c10d.ProcessGroupHCCL.Options")
@patch("torch.distributed.ProcessGroup")
def test_successful_initialization(self, mock_pg, mock_options_cls,
mock_pg_hccl, _):
def test_successful_initialization(self, mock_pg, mock_pg_hccl, _):
mock_prefix = MagicMock(spec=PrefixStore)
mock_options = MagicMock(spec=ProcessGroup.Options)
mock_options_cls.return_value = mock_options
mock_backend = MagicMock()
mock_pg_hccl.return_value = mock_backend
group_rank = 0
Expand All @@ -574,8 +570,7 @@ def test_successful_initialization(self, mock_pg, mock_options_cls,
timeout=timedelta(seconds=30),
)

mock_pg.assert_called_once_with(mock_prefix, group_rank, group_size,
unittest.mock.ANY)
mock_pg.assert_called_once_with(mock_prefix, group_rank, group_size)
mock_pg_hccl.assert_called_once_with(mock_prefix, group_rank,
group_size, unittest.mock.ANY)
mock_backend._set_sequence_number_for_group.assert_called_once()
Expand Down
4 changes: 0 additions & 4 deletions vllm_ascend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,5 @@ def register():


def register_model():
# fix pytorch schema check error, remove this line after pytorch
# is upgraded to 2.7.0
import vllm_ascend.patch.worker.patch_common.patch_utils # noqa: F401

from .models import register_model
register_model()
14 changes: 0 additions & 14 deletions vllm_ascend/patch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,6 @@
# Future Plan:
# Remove this patch when vllm merged them.
#
# ** File: worker/patch_common/patch_utils.py **
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 1. `vllm.utils.direct_register_custom_op`
# Why:
# pytorch 2.7.o is not compatible with pytorch 2.5.1. While vllm is based on pytorch 2.7.0, but vllm ascend
# is based on pytorch 2.5.1, so we need to use this patch to make vllm compatible with pytorch 2.5.1.
# How:
# patch __annotations__ check to make it compatible with pytorch 2.5.1.
# Related PR (if no, explain why):
# This is the problem in vllm-ascend
# Future Plan:
# Remove this patch once pytorch 2.7.0 is supported for vllm ascend.
#
# ** File: worker/patch_0_10_0/patch_sampler_gather_logprobs.py **
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 1. `vllm.v1.sample.sampler.Sampler.gather_logprobs`
# Why:
Expand Down
3 changes: 0 additions & 3 deletions vllm_ascend/patch/worker/patch_common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
# limitations under the License.
#

# patch_utils should be the first import, because it will be used by other
# patch files.
import vllm_ascend.patch.worker.patch_common.patch_utils # noqa isort:skip
import vllm_ascend.patch.worker.patch_common.patch_distributed # noqa
import vllm_ascend.patch.worker.patch_common.patch_linear # noqa
import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa
38 changes: 0 additions & 38 deletions vllm_ascend/patch/worker/patch_common/patch_utils.py

This file was deleted.

6 changes: 0 additions & 6 deletions vllm_ascend/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,16 +259,10 @@ def stateless_init_device_torch_dist_pg(

assert is_hccl_available()

# TODO(Yizhou): The reason we need to set options while vllm does not
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yiz-liu FYI, we can finally clean up this, thanks for your note.

# seems to be related to the version of PyTorch. In the latest version,
# there is no need to set options. While in the older version, 2.5.1
# specifically, we need to set options.
options = ProcessGroup.Options(backend=backend)
pg: ProcessGroup = ProcessGroup(
prefix_store,
group_rank,
group_size,
options,
)

backend_options = ProcessGroupHCCL.Options()
Expand Down
4 changes: 0 additions & 4 deletions vllm_ascend/quantization/quant_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
# By using quantization case, this file is called before worker patch achieve,
# we need to import patch_utils here first to make sure the patch is applied.
import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa

from types import MappingProxyType
from typing import Any, Callable, Dict, List, Mapping, Optional

Expand Down
Loading