-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
The output of python collect_env.py
Your output of `python collect_env.py` here
🐛 Describe the bug
vllm serve --model="deepseek-ai/DeepSeek-V2-lite" --max-num-seqs 512 --data-parallel-size 2 --enable-expert-parallel --gpu-memory-utilization 0.9 --port 9256
Cause
(EngineCore_DP0 pid=397122) Process EngineCore_DP0:
(EngineCore_DP0 pid=397122) Traceback (most recent call last):
(EngineCore_DP0 pid=397122) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_DP0 pid=397122) self.run()
(EngineCore_DP0 pid=397122) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=397122) self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 722, in run_engine_core
(EngineCore_DP0 pid=397122) raise e
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 705, in run_engine_core
(EngineCore_DP0 pid=397122) engine_core = DPEngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 975, in __init__
(EngineCore_DP0 pid=397122) super().__init__(vllm_config, local_client, handshake_address,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 505, in __init__
(EngineCore_DP0 pid=397122) super().__init__(vllm_config, executor_class, log_stats,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 82, in __init__
(EngineCore_DP0 pid=397122) self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/executor_base.py", line 54, in __init__
(EngineCore_DP0 pid=397122) self._init_executor()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 48, in _init_executor
(EngineCore_DP0 pid=397122) self.collective_rpc("init_device")
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
(EngineCore_DP0 pid=397122) answer = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/utils/__init__.py", line 3060, in run_method
(EngineCore_DP0 pid=397122) return func(*args, **kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/worker/worker_base.py", line 611, in init_device
(EngineCore_DP0 pid=397122) self.worker.init_device() # type: ignore
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 193, in init_device
(EngineCore_DP0 pid=397122) init_worker_distributed_environment(self.vllm_config, self.rank,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 692, in init_worker_distributed_environment
(EngineCore_DP0 pid=397122) ensure_model_parallel_initialized(
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 1185, in ensure_model_parallel_initialized
(EngineCore_DP0 pid=397122) initialize_model_parallel(tensor_model_parallel_size,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 1150, in initialize_model_parallel
(EngineCore_DP0 pid=397122) _DP = init_model_parallel_group(group_ranks,
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 883, in init_model_parallel_group
(EngineCore_DP0 pid=397122) return GroupCoordinator(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 262, in __init__
(EngineCore_DP0 pid=397122) self.device_communicator = device_comm_cls(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/device_communicators/cuda_communicator.py", line 61, in __init__
(EngineCore_DP0 pid=397122) self.symm_mem_comm = SymmMemCommunicator(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/device_communicators/symm_mem.py", line 88, in __init__
(EngineCore_DP0 pid=397122) handle = torch_symm_mem.rendezvous(self.buffer, self.group.group_name)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/.venv/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__init__.py", line 1609, in rendezvous
(EngineCore_DP0 pid=397122) return _SymmetricMemory.rendezvous(tensor, group_name)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) RuntimeError: CUDASymmetricMemoryAllocator::rendezvous: detected allocations from overlapping devices from different ranks.
(EngineCore_DP0 pid=397122) Exception ignored in: <function ExecutorBase.__del__ at 0x742b826a68e0>
(EngineCore_DP0 pid=397122) Traceback (most recent call last):
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/executor_base.py", line 237, in __del__
(EngineCore_DP0 pid=397122) self.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 76, in shutdown
(EngineCore_DP0 pid=397122) worker.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/worker/worker_base.py", line 528, in shutdown
(EngineCore_DP0 pid=397122) self.worker.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 675, in shutdown
(EngineCore_DP0 pid=397122) self.model_runner.ensure_kv_transfer_shutdown()
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) AttributeError: 'NoneType' object has no attribute 'ensure_kv_transfer_shutdown'Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working