Skip to content

Commit 66ab1b0

Browse files
committed
Enable use_inductor_graph_partition by default in >=2.9. Also fix tests that would require it to be False by default
Signed-off-by: ProExpertProg <[email protected]>
1 parent 47ae5d8 commit 66ab1b0

File tree

4 files changed

+10
-3
lines changed

4 files changed

+10
-3
lines changed

tests/compile/piecewise/test_multiple_graphs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ def run_model(vllm_config: VllmConfig, model: nn.Module, inputs: torch.Tensor,
174174
return output.cpu()
175175

176176

177+
# TODO use_inductor_cg_partition both true and false
177178
def test_multi_graph_piecewise_compile_outputs_equal():
178179
outputs = []
179180

@@ -212,7 +213,9 @@ def test_multi_graph_piecewise_compile_outputs_equal():
212213

213214
# no compile or cudagraph
214215
vllm_config = VllmConfig(compilation_config=CompilationConfig(
215-
level=CompilationLevel.NO_COMPILATION, ))
216+
level=CompilationLevel.NO_COMPILATION,
217+
use_inductor_graph_partition=False,
218+
))
216219
cudagraph_runtime_mode = CUDAGraphMode.NONE
217220

218221
with set_current_vllm_config(vllm_config):

tests/compile/piecewise/test_toy_llama.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,9 @@ def run_model(llama_config,
256256
cudagraph_runtime_mode = CUDAGraphMode.PIECEWISE
257257
else:
258258
compilation_config = CompilationConfig(
259-
level=CompilationLevel.NO_COMPILATION, )
259+
level=CompilationLevel.NO_COMPILATION,
260+
use_inductor_graph_partition=False, # TODO try both?
261+
)
260262
cudagraph_runtime_mode = CUDAGraphMode.NONE
261263

262264
vllm_config = VllmConfig(compilation_config=compilation_config,

tests/compile/test_decorator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def test_ignore_torch_compile_decorator():
5757
use_cudagraph=True,
5858
splitting_ops=["silly.attention"],
5959
cudagraph_capture_sizes=[1, 2],
60+
use_inductor_graph_partition=False, # TODO test both
6061
))
6162
cudagraph_runtime_mode = CUDAGraphMode.PIECEWISE
6263

@@ -182,6 +183,7 @@ def test_conditional_compile_enable_if():
182183
use_cudagraph=True,
183184
splitting_ops=["silly.attention"],
184185
cudagraph_capture_sizes=[1, 2],
186+
use_inductor_graph_partition=False, # TODO test both
185187
))
186188
cudagraph_runtime_mode = CUDAGraphMode.PIECEWISE
187189

vllm/config/compilation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ class CompilationConfig:
311311
FULL_AND_PIECEWISE instead.
312312
"""
313313

314-
use_inductor_graph_partition: bool = False
314+
use_inductor_graph_partition: bool = is_torch_equal_or_newer("2.9.0")
315315
"""Use inductor graph partition to split the graph at cudagraph_unsafe ops.
316316
This partition happens at inductor codegen time after all passes and fusions
317317
are finished. It generates a single `call` function which wraps

0 commit comments

Comments
 (0)