File tree Expand file tree Collapse file tree 2 files changed +15
-1
lines changed Expand file tree Collapse file tree 2 files changed +15
-1
lines changed Original file line number Diff line number Diff line change 4949from vllm .model_executor .layers .quantization .utils .mxfp4_utils import dequant_mxfp4
5050from vllm .model_executor .layers .quantization .utils .mxfp6_utils import dequant_mxfp6
5151from vllm .model_executor .layers .quantization .utils .ocp_mx_utils import OCP_MX_Scheme
52+ from vllm .model_executor .utils import maybe_disable_graph_partition
5253from vllm .platforms import current_platform
5354from vllm .triton_utils import tl , triton
5455from vllm .utils import direct_register_custom_op , is_torch_equal_or_newer
@@ -1145,7 +1146,11 @@ def fused_topk_bias(
11451146
11461147
11471148# This is used by the Deepseek-V2 and Deepseek-V3 model
1148- @torch .compile (dynamic = True , backend = current_platform .simple_compile_backend )
1149+ @torch .compile (
1150+ dynamic = True ,
1151+ backend = current_platform .simple_compile_backend ,
1152+ options = maybe_disable_graph_partition (current_platform .simple_compile_backend ),
1153+ )
11491154def grouped_topk (
11501155 hidden_states : torch .Tensor ,
11511156 gating_output : torch .Tensor ,
Original file line number Diff line number Diff line change 77
88import torch
99
10+ from vllm .utils import is_torch_equal_or_newer
11+
1012
1113def set_random_seed (seed : int ) -> None :
1214 from vllm .platforms import current_platform
@@ -83,3 +85,10 @@ def get_moe_expert_mapping(
8385 if child_map is not None :
8486 return child_map ()
8587 return []
88+
89+
90+ def maybe_disable_graph_partition (current_backend : str ) -> dict [str , bool ]:
91+ if current_backend == "inductor" and is_torch_equal_or_newer ("2.9.0.dev" ):
92+ return {"graph_partition" : False }
93+ else :
94+ return {}
You can’t perform that action at this time.
0 commit comments