comments

drisspg · drisspg · commit 2f0031989b4b · 2024-04-08T14:42:49.000-07:00
diff --git a/float8_experimental/float8_linear.py b/float8_experimental/float8_linear.py
@@ -160,7 +160,7 @@ def __init__(self, *args, **kwargs):
         )
         self.register_always_float32_buffer("fp8_scale_dL_dY", torch.tensor([1.0]))
 
-        # Whether to emulate the fp8 matmul logic in float32
+        # Defines the behavior of the matmul in the forward and backward pass
         self.forward_config = ScaledMMConfig()
         self.backward_config = ScaledMMConfig()
 
diff --git a/float8_experimental/float8_linear_utils.py b/float8_experimental/float8_linear_utils.py
@@ -122,10 +122,7 @@ def swap_linear_with_float8_linear(
             raise AssertionError(
                 f"Does not support a root nn.Linear with children: {module}"
             )
-        print(f"Emulating: {emulate}")
-        new_mod = module_cls.from_float(module, emulate=emulate)
-        print(f"New mod: {new_mod.forward_config}")
-        return new_mod
+        return module_cls.from_float(module, emulate=emulate)
 
     # Mark all modules to skip as visited
     root_module = module
diff --git a/float8_experimental/float8_tensor.py b/float8_experimental/float8_tensor.py
@@ -84,7 +84,7 @@ def to_fp8_no_autograd(
         x: the tensor to convert
         scale: the scale to use to convert the tensor
         float8_dtype: the float8 dtype to use
-        mm_config: configuration for the scaled_mm will bread from this dataclass
+        mm_config: Defines the configuration for the scaled_mm
     """
     x_scaled = x * x_scale
     bits_fp8 = to_fp8_saturated(x_scaled, float8_dtype)
diff --git a/test/test_base.py b/test/test_base.py
@@ -23,7 +23,11 @@
     sync_float8_amax_and_scale_history,
 )
 from float8_experimental.float8_python_api import addmm_float8_unwrapped
-from float8_experimental.float8_tensor import Float8Tensor
+from float8_experimental.float8_tensor import (
+    Float8Tensor,
+    merge_mm_configs,
+    ScaledMMConfig,
+)
 from float8_experimental.float8_utils import (
     amax_to_scale,
     compute_error,
@@ -326,6 +330,43 @@ def test_scaled_mm_vs_emulated(self, base_dtype, use_fast_accum):
             atol, rtol = 2e-3, 2e-3
         torch.testing.assert_close(out_scaled_mm, out_emulated, atol=atol, rtol=rtol)
 
+    @unittest.skipIf(not is_H100, "CUDA not available")
+    def test_different_configs_error(self):
+        x_fp32 = torch.randn(16, 16, device="cuda")
+        x_scale = torch.tensor(1.0, device="cuda")
+        fp8_dtype = torch.float8_e4m3fn
+        a = Float8Tensor.to_float8(x_fp32, x_scale, fp8_dtype)
+        b = Float8Tensor.to_float8(
+            x_fp32, x_scale, fp8_dtype, mm_config=ScaledMMConfig(True)
+        )
+        with pytest.raises(
+            AssertionError,
+            match="Both mm_configs must have the same emulate value, but got False and True",
+        ):
+            a @ b
+
+    def test_merge_configs(sel):
+        a = ScaledMMConfig(False, True, True)
+        b = ScaledMMConfig(True, False, False)
+        with pytest.raises(
+            AssertionError,
+            match="Both mm_configs must have the same emulate value, but got False and True",
+        ):
+            merge_mm_configs(a, b)
+        a = ScaledMMConfig(False, True, True)
+        b = ScaledMMConfig(False, False, False)
+        c = merge_mm_configs(a, b)
+        assert c.emulate is False
+        assert c.use_fast_accum is False
+        assert c.fp8_output is False
+
+        a = ScaledMMConfig(False, True, False)
+        b = ScaledMMConfig(False, True, False)
+        c = merge_mm_configs(a, b)
+        assert c.emulate is False
+        assert c.use_fast_accum is True
+        assert c.fp8_output is False
+
 
 class TestNumerics:
     @pytest.mark.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])

Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,7 @@ def __init__(self, args, *kwargs):`
`160`	`160`	`)`
`161`	`161`	`self.register_always_float32_buffer("fp8_scale_dL_dY", torch.tensor([1.0]))`
`162`	`162`
`163`		`- # Whether to emulate the fp8 matmul logic in float32`
	`163`	`+ # Defines the behavior of the matmul in the forward and backward pass`
`164`	`164`	`self.forward_config = ScaledMMConfig()`
`165`	`165`	`self.backward_config = ScaledMMConfig()`
`166`	`166`