From a2d5ef088a55428a853c3204b2cc757def8ae9d0 Mon Sep 17 00:00:00 2001
From: FENP <32334296+FENP@users.noreply.github.com>
Date: Thu, 16 Oct 2025 21:55:26 +0800
Subject: [PATCH] bugfix: set reorder_batch_threshold back to 1 when using
 FlashMLA and enable DCP

Signed-off-by: FENP <32334296+FENP@users.noreply.github.com>
---
 vllm/v1/attention/backends/mla/common.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
index 1d4e3e4cfe22..38a4d5df1b03 100755
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -558,6 +558,19 @@ def __init__(
             self.dcp_world_size = 1
             self.dcp_rank = 0
 
+        if (
+            self.dcp_world_size > 1
+            and self.__class__.reorder_batch_threshold > 1
+            and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
+        ):
+            logger.warning_once(
+                "DCP is enabled but not FlashAttnMLA is used. "
+                "Set query_len_support back to SINGLE_ONLY "
+                "and reorder_batch_threshold back to 1."
+            )
+            self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
+            self.__class__.reorder_batch_threshold = 1
+
         # Don't try to access the runner on AMD
         if self.aot_schedule:
             self.page_size = self.kv_cache_spec.block_size