Update

lw · lw · commit d3aba6654e12 · 2024-12-04T18:14:03.000Z
[ghstack-poisoned]
diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py
@@ -885,7 +885,6 @@ def _test_lin_weight_subclass_api_impl(
 
 
     @parameterized.expand(COMMON_DEVICE_DTYPE)
-    @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_4, "skip because there is some bug in inductor codegen")
     def test_int8_dynamic_quant_subclass_api(self, device, dtype):
         self._test_lin_weight_subclass_api_impl(
             _int8da_int8w_api, device, 35, test_dtype=dtype
diff --git a/torchao/dtypes/uintx/semi_sparse_layout.py b/torchao/dtypes/uintx/semi_sparse_layout.py
@@ -44,14 +44,15 @@ def _linear_int8_act_int8_weight_semi_structured_sparse_impl(
     # must pad
     row, col = tmp.shape
     from torch.sparse import SparseSemiStructuredTensorCUSPARSELT
+
     tmp_padded = SparseSemiStructuredTensorCUSPARSELT._pad_dense_input(tmp)
     # we fuse one of the scalar matrix multiplications (w_scales) into the sparse mm
     y_dot_bf16_w_scales_fused = torch._cslt_sparse_mm(
         w_vals_int8,
         tmp_padded.t(),
         alpha=w_scales.to(torch.float32),
         out_dtype=torch.bfloat16,
-        ).t()[:row, :]
+    ).t()[:row, :]
     y = (y_dot_bf16_w_scales_fused * x_scales.reshape(-1, 1)).reshape(
         *x_vals_int8.shape[:-1], y_dot_bf16_w_scales_fused.shape[-1]
     )