Skip to content

Commit 7edb2fd

Browse files
authored
Update Dockerfile to 6.2, update ROCm components, remove Cython (#166)
* Miscellaneous changes, Dockerfile components update, remove Cython * Restore Dockerfile and Cython for now
1 parent 7fd46eb commit 7edb2fd

File tree

4 files changed

+10
-10
lines changed

4 files changed

+10
-10
lines changed

csrc/custom/custom.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include <torch/all.h>
22
#include <ATen/cuda/CUDAContext.h>
33
#include <cuda_runtime.h>
4-
#include "core/registration.h"
54

65
// declare templates for front (cpp) and back (cuda) sides of function:
76
// template <typename T>

vllm/_custom_ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ def register_buffer(fa: int, t: torch.Tensor, handles: List[str],
646646
return torch.ops._C_custom_ar.register_buffer(fa, t, handles, offsets)
647647

648648

649-
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[str], List[int]]:
649+
def get_graph_buffer_ipc_meta(fa: int) -> Tuple[torch.Tensor, List[int]]:
650650
return torch.ops._C_custom_ar.get_graph_buffer_ipc_meta(fa)
651651

652652

vllm/entrypoints/sync_openai/api_server.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,11 @@ async def _check_model(request: Union[CompletionRequest,
174174

175175
async def _guided_decode_logits_processor(request, tokenizer):
176176
decoding_config = runner.engine_config.decoding_config
177-
assert decoding_config is not None
178-
guided_decoding_backend = (request.guided_decoding_backend
179-
or decoding_config.guided_decoding_backend)
177+
if request.guided_decoding_backend:
178+
guided_decoding_backend = request.guided_decoding_backend
179+
else:
180+
assert decoding_config is not None
181+
guided_decoding_backend = decoding_config.guided_decoding_backend
180182
return await get_guided_decoding_logits_processor(guided_decoding_backend,
181183
request, tokenizer)
182184

vllm/platforms/rocm.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
# the major benefit of using AMDSMI is that it will not initialize CUDA
3535

3636

37-
def with_nvml_context(fn):
37+
def with_amdsmi_context(fn):
3838

3939
@wraps(fn)
4040
def wrapper(*args, **kwargs):
@@ -65,12 +65,11 @@ def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
6565
return torch.cuda.get_device_capability(device_id)
6666

6767
@staticmethod
68-
@with_nvml_context
68+
@with_amdsmi_context
6969
def is_full_nvlink(physical_device_ids: List[int]) -> bool:
7070
"""
71-
query if the set of gpus are fully connected by xgmi (1 hop)
71+
Query if the set of gpus are fully connected by xgmi (1 hop)
7272
"""
73-
# On ROCm, we instead query if GPUs are connected by 1 hop XGMI
7473
handles = [
7574
amdsmi_get_processor_handles()[i] for i in physical_device_ids
7675
]
@@ -90,7 +89,7 @@ def is_full_nvlink(physical_device_ids: List[int]) -> bool:
9089
return True
9190

9291
@staticmethod
93-
@with_nvml_context
92+
@with_amdsmi_context
9493
@lru_cache(maxsize=8)
9594
def get_device_name(device_id: int = 0) -> str:
9695
physical_device_id = device_id_to_physical_device_id(device_id)

0 commit comments

Comments
 (0)