diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 0ebd7a15eab9..3767b6ebfc84 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -115,6 +115,12 @@ def init_distributed_environment( local_rank = rank global _LOCAL_RANK _LOCAL_RANK = local_rank + + if world_size == 1: + # Avoid to warmup if world size is 1, to be compatible with vGPU. + # See https://github.com/vllm-project/vllm/issues/4587 for details. + return + # A small all_reduce for warmup. data = torch.zeros(1) if torch.cuda.is_available():