vllm-project
diff --git a/‎tests/compile/backend.py‎
Lines changed: 4 additions & 2 deletions b/‎tests/compile/backend.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎tests/compile/test_fusion.py‎
Lines changed: 50 additions & 27 deletions b/‎tests/compile/test_fusion.py‎
Lines changed: 50 additions & 27 deletions
@@ -56,8 +56,10 @@ def __init__(self, *passes: Union[InductorPass, Callable[[fx.Graph], None]]):
         self.inductor_config["post_grad_custom_post_pass"] = self.post_pass
 
         if compile_config.debug_dump_path:
-            self.debug_dump_path = (Path(compile_config.debug_dump_path) /
-                                    f"rank_{vllm_config.parallel_config.rank}")
+            self.debug_dump_path = (
+                Path(compile_config.debug_dump_path)
+                / f"rank_{vllm_config.parallel_config.rank}"
+            )
             self.ctx = depyf.prepare_debug(str(self.debug_dump_path))
             self.ctx.__enter__()
         else:
 
@@ -8,13 +8,24 @@
 from vllm.compilation.fusion import RMSNormQuantFusionPass
 from vllm.compilation.noop_elimination import NoOpEliminationPass
 from vllm.compilation.post_cleanup import PostCleanupPass
-from vllm.config import (CompilationConfig, CompilationLevel, ModelConfig,
-                         PassConfig, VllmConfig)
+from vllm.config import (
+    CompilationConfig,
+    CompilationLevel,
+    ModelConfig,
+    PassConfig,
+    VllmConfig,
+)
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    GroupShape, QuantKey, ScaleDesc)
+    GroupShape,
+    QuantKey,
+    ScaleDesc,
+)
 from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
-    Fp8LinearOp, cutlass_fp8_supported, maybe_create_device_identity)
+    Fp8LinearOp,
+    cutlass_fp8_supported,
+    maybe_create_device_identity,
+)
 from vllm.platforms import current_platform
 
 from ..utils import override_cutlass_fp8_supported
@@ -24,9 +35,15 @@
 
 
 class TestModel(torch.nn.Module):
-
-    def __init__(self, hidden_size: int, eps: float, static: bool,
-                 cuda_force_torch: bool, *args, **kwargs):
+    def __init__(
+        self,
+        hidden_size: int,
+        eps: float,
+        static: bool,
+        cuda_force_torch: bool,
+        *args,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
         self.cuda_force_torch = cuda_force_torch
         self.norm = [RMSNorm(hidden_size, eps) for _ in range(4)]
@@ -57,30 +74,27 @@ def forward(self, x):
         x = resid = torch.relu(x)
         y = self.norm[0](x)
 
-        x2 = self.fp8_linear.apply(y,
-                                   self.w[0],
-                                   self.wscale[0],
-                                   input_scale=self.scale[0])
+        x2 = self.fp8_linear.apply(
+            y, self.w[0], self.wscale[0], input_scale=self.scale[0]
+        )
         # make sure resid is used for replacement to work
         y2, resid = self.norm[1](x2, resid)
 
-        x3 = self.fp8_linear.apply(y2,
-                                   self.w[1],
-                                   self.wscale[1],
-                                   input_scale=self.scale[1])
+        x3 = self.fp8_linear.apply(
+            y2, self.w[1], self.wscale[1], input_scale=self.scale[1]
+        )
 
         y3, resid = self.norm[2](x3, resid)  # use resid here
 
-        x4 = self.fp8_linear.apply(y3,
-                                   self.w[2],
-                                   self.wscale[2],
-                                   input_scale=self.scale[2])
+        x4 = self.fp8_linear.apply(
+            y3, self.w[2], self.wscale[2], input_scale=self.scale[2]
+        )
 
         y4, resid = self.norm[3](x4, resid)  # use resid here
         return y4
 
 
-@pytest.mark.parametrize("dtype", [torch.float16])  #, torch.bfloat16])
+@pytest.mark.parametrize("dtype", [torch.float16])  # , torch.bfloat16])
 @pytest.mark.parametrize("hidden_size", [64])
 @pytest.mark.parametrize("num_tokens", [257])
 @pytest.mark.parametrize("eps", [1e-5, 1e-6])
@@ -89,13 +103,22 @@ def forward(self, x):
 @pytest.mark.parametrize("enable_quant_fp8", [True, False])
 # cuda_force_torch used to test torch code path on platforms that
 # cutlass_fp8_supported() == True.
-@pytest.mark.parametrize("cuda_force_torch",
-                         [True, False] if cutlass_fp8_supported() else [True])
-@pytest.mark.skipif(not current_platform.is_cuda_alike(),
-                    reason="Only test on CUDA and ROCm")
-def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps, static,
-                              enable_rms_norm, enable_quant_fp8,
-                              cuda_force_torch):
+@pytest.mark.parametrize(
+    "cuda_force_torch", [True, False] if cutlass_fp8_supported() else [True]
+)
+@pytest.mark.skipif(
+    not current_platform.is_cuda_alike(), reason="Only test on CUDA and ROCm"
+)
+def test_fusion_rmsnorm_quant(
+    dtype,
+    hidden_size,
+    num_tokens,
+    eps,
+    static,
+    enable_rms_norm,
+    enable_quant_fp8,
+    cuda_force_torch,
+):
     torch.set_default_device("cuda")
     torch.set_default_dtype(dtype)
     torch.manual_seed(1)