Disable H20

dongfengy · dongfengy · commit 7794e137645b · 2025-08-07T19:54:58.000Z
Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_triton.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_triton.py
@@ -643,6 +643,15 @@ def swizzle_weight_and_scale(w: torch.Tensor, w_scale: torch.Tensor):
         mx_axis=1)
     scale_layout, scale_layout_opts = layout.make_default_matmul_mxfp4_w_scale_layout(
         mx_axis=1, num_warps=num_warps)
+    # Swizzeling path is broken for H20
+    if torch.cuda.get_device_name() == "NVIDIA H20":
+        from triton_kernels.tensor_details.layout_details.strided import \
+            StridedLayout
+        value_layout = StridedLayout
+        value_layout_opts = dict()
+        scale_layout = StridedLayout
+        scale_layout_opts = dict()
+
     opt = {"value_layout": value_layout, "value_layout_opts": value_layout_opts, \
             "scale_layout": scale_layout, "scale_layout_opts": scale_layout_opts}