diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index d5bc3446edb8..30980316ecfc 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -198,9 +198,8 @@ def forward(self, x: torch.Tensor): def all_gather_interleave(local_tensor, hidden_size: int, tp_size: int): """All-gather the input tensor interleavely across model parallel group.""" - import torch.distributed as dist gathered_tensors = [torch.zeros_like(local_tensor) for _ in range(tp_size)] - dist.all_gather(gathered_tensors, local_tensor) + parallel_state.get_tp_group().all_gather(gathered_tensors, local_tensor) gathered_tensors_split = [ torch.split(tensor, hidden_size // tp_size, -1)