From 595c086e82559e62aabf10dc8761788bdf0c4bed Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Tue, 7 Jan 2025 14:12:37 +0800 Subject: [PATCH] [Bugfix] Significant performance drop on CPUs with --num-scheduler-steps > 1 Signed-off-by: Jie Fu --- vllm/engine/arg_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index e94664308cf8..0850bab6bb7e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1157,6 +1157,12 @@ def create_engine_config(self, if self.enable_chunked_prefill and self.pipeline_parallel_size > 1: raise ValueError("Multi-Step Chunked-Prefill is not supported " "for pipeline-parallel-size > 1") + from vllm.platforms import current_platform + if current_platform.is_cpu(): + logger.warning("Multi-Step (--num-scheduler-steps > 1) is " + "currently not supported for CPUs and has been " + "disabled.") + self.num_scheduler_steps = 1 # make sure num_lookahead_slots is set the higher value depending on # if we are using speculative decoding or multi-step