From b0084e6d920c524faa299127aa9c16071c0da9f4 Mon Sep 17 00:00:00 2001 From: yucai-intel <108388355+yucai-intel@users.noreply.github.com> Date: Fri, 10 Oct 2025 15:38:14 +0800 Subject: [PATCH 1/5] Update CompareKernels.cpp --- src/ATen/native/xpu/sycl/CompareKernels.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ATen/native/xpu/sycl/CompareKernels.cpp b/src/ATen/native/xpu/sycl/CompareKernels.cpp index 91c3ac6141..3126742b38 100644 --- a/src/ATen/native/xpu/sycl/CompareKernels.cpp +++ b/src/ATen/native/xpu/sycl/CompareKernels.cpp @@ -124,10 +124,15 @@ void compare_kernel_impl(TensorIteratorBase& iter, OpType op) { } inline void compare_kernel_with_scalars(TensorIteratorBase& iter, OpType op) { - AT_DISPATCH_ALL_TYPES_AND3( - kHalf, kBFloat16, kBool, iter.common_dtype(), "compare_xpu", [&]() { - compare_kernel_impl(iter, op); - }); + AT_DISPATCH_V2( + iter.common_dtype(), + "compare_xpu", + [&]() { compare_kernel_impl(iter, op); }, + AT_EXPAND(AT_ALL_TYPES), + kHalf, + kBFloat16, + kBool, + AT_EXPAND(AT_FLOAT8_TYPES)); } void ge_kernel(TensorIteratorBase& iter) { From 8aafd316529f4ef9b47e53b246c0374afd51d36c Mon Sep 17 00:00:00 2001 From: yucai-intel <108388355+yucai-intel@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:54:08 +0800 Subject: [PATCH 2/5] Update test_compare.py --- test/regressions/test_compare.py | 126 +++++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 23 deletions(-) diff --git a/test/regressions/test_compare.py b/test/regressions/test_compare.py index 72d3a14f3b..d2ff466880 100644 --- a/test/regressions/test_compare.py +++ b/test/regressions/test_compare.py @@ -2,46 +2,126 @@ import torch from torch.testing._internal.common_utils import TestCase - class TestTorchMethod(TestCase): + # Define float8 dtypes + FLOAT8_DTYPES = ( + torch.float8_e5m2, + torch.float8_e4m3fn, + torch.float8_e4m3fnuz, + torch.float8_e5m2fnuz, + torch.float8_e8m0fnu + ) + + # Define the set of all dtypes to be tested + TEST_DTYPES = ( + torch.float32, + torch.float64, + torch.half, + torch.bfloat16, + torch.bool, + ) + FLOAT8_DTYPES + def _test_compare_fn(self, fn, dtype): - # test tensor + # --- Tensor Test --- x1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x2 = torch.tensor([[1.0, 1.0], [4.0, 4.0]], dtype=dtype) + + # Handle boolean input + if dtype == torch.bool: + x1 = x1.bool() + x2 = x2.bool() + + # Determine the golden reference tensor on CPU + if dtype in self.FLOAT8_DTYPES: + # For float8, use float32 as the CPU reference type + x1_ref = x1.cpu().to(torch.float32) + x2_ref = x2.cpu().to(torch.float32) + else: + # For other types, use the original dtype + x1_ref = x1.cpu() + x2_ref = x2.cpu() + + y_ref = fn(x1_ref, x2_ref) + + # XPU operation x1_xpu = x1.xpu() x2_xpu = x2.xpu() - y = fn(x1, x2) y_xpu = fn(x1_xpu, x2_xpu) - self.assertEqual(y_xpu.cpu(), y) - y_xpu.zero_() + + # Compare XPU result and CPU golden reference (comparison ops yield exact boolean values) + self.assertEqual(y_xpu.cpu(), y_ref) + + # Test the version with out= argument + # For comparison ops, the output is bool, which doesn't support zero_(). + # We must create a new out tensor. + if y_xpu.dtype != torch.bool: + y_xpu.zero_() + else: + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + fn(x1_xpu, x2_xpu, out=y_xpu) - self.assertEqual(y_xpu.cpu(), y) - # test scalar + self.assertEqual(y_xpu.cpu(), y_ref) + + # --- 2. Scalar Test --- x1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x2 = 2.0 + + if dtype == torch.bool: + x1 = x1.bool() + + # Determine the golden reference tensor on CPU + if dtype in self.FLOAT8_DTYPES: + x1_ref = x1.cpu().to(torch.float32) + else: + x1_ref = x1.cpu() + + x2_ref = x2 # Scalar remains the same + y_ref = fn(x1_ref, x2_ref) + + # XPU operation x1_xpu = x1.xpu() x2_xpu = x2 - y = fn(x1, x2) y_xpu = fn(x1_xpu, x2_xpu) - self.assertEqual(y_xpu.cpu(), y) - y_xpu.zero_() + + self.assertEqual(y_xpu.cpu(), y_ref) + + # Test the version with out= argument + if y_xpu.dtype != torch.bool: + y_xpu.zero_() + else: + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + fn(x1_xpu, x2_xpu, out=y_xpu) - self.assertEqual(y_xpu.cpu(), y) + self.assertEqual(y_xpu.cpu(), y_ref) + + # --- Test methods iterating over DTypes --- - def test_eq(self, dtype=torch.float): - self._test_compare_fn(torch.eq, dtype) + def test_eq(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.eq, dtype) - def test_ne(self, dtype=torch.float): - self._test_compare_fn(torch.ne, dtype) + def test_ne(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.ne, dtype) - def test_lt(self, dtype=torch.float): - self._test_compare_fn(torch.lt, dtype) + def test_lt(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.lt, dtype) - def test_le(self, dtype=torch.float): - self._test_compare_fn(torch.le, dtype) + def test_le(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.le, dtype) - def test_gt(self, dtype=torch.float): - self._test_compare_fn(torch.gt, dtype) + def test_gt(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.gt, dtype) - def test_ge(self, dtype=torch.float): - self._test_compare_fn(torch.ge, dtype) + def test_ge(self): + for dtype in self.TEST_DTYPES: + with self.subTest(dtype=dtype): + self._test_compare_fn(torch.ge, dtype) From 4e9725026aa9757cce667fdf5fd294c5aa808e1b Mon Sep 17 00:00:00 2001 From: yucai-intel <108388355+yucai-intel@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:58:02 +0800 Subject: [PATCH 3/5] Update test_compare.py --- test/regressions/test_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/regressions/test_compare.py b/test/regressions/test_compare.py index d2ff466880..e0f2efa35b 100644 --- a/test/regressions/test_compare.py +++ b/test/regressions/test_compare.py @@ -9,7 +9,7 @@ class TestTorchMethod(TestCase): torch.float8_e4m3fn, torch.float8_e4m3fnuz, torch.float8_e5m2fnuz, - torch.float8_e8m0fnu + torch.float8_e8m0fnu, ) # Define the set of all dtypes to be tested From edbeb7958ec96ed9a606a52ff8cfe85c9045ac70 Mon Sep 17 00:00:00 2001 From: yucai-intel <108388355+yucai-intel@users.noreply.github.com> Date: Mon, 13 Oct 2025 16:20:43 +0800 Subject: [PATCH 4/5] format --- test/regressions/test_compare.py | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test/regressions/test_compare.py b/test/regressions/test_compare.py index e0f2efa35b..b45c503855 100644 --- a/test/regressions/test_compare.py +++ b/test/regressions/test_compare.py @@ -5,27 +5,27 @@ class TestTorchMethod(TestCase): # Define float8 dtypes FLOAT8_DTYPES = ( - torch.float8_e5m2, + torch.float8_e5m2, torch.float8_e4m3fn, torch.float8_e4m3fnuz, torch.float8_e5m2fnuz, torch.float8_e8m0fnu, ) - + # Define the set of all dtypes to be tested TEST_DTYPES = ( - torch.float32, - torch.float64, - torch.half, + torch.float32, + torch.float64, + torch.half, torch.bfloat16, - torch.bool, + torch.bool, ) + FLOAT8_DTYPES def _test_compare_fn(self, fn, dtype): - # --- Tensor Test --- + # --- 1. Tensor Test --- x1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x2 = torch.tensor([[1.0, 1.0], [4.0, 4.0]], dtype=dtype) - + # Handle boolean input if dtype == torch.bool: x1 = x1.bool() @@ -40,32 +40,32 @@ def _test_compare_fn(self, fn, dtype): # For other types, use the original dtype x1_ref = x1.cpu() x2_ref = x2.cpu() - + y_ref = fn(x1_ref, x2_ref) # XPU operation x1_xpu = x1.xpu() x2_xpu = x2.xpu() y_xpu = fn(x1_xpu, x2_xpu) - + # Compare XPU result and CPU golden reference (comparison ops yield exact boolean values) - self.assertEqual(y_xpu.cpu(), y_ref) - + self.assertEqual(y_xpu.cpu(), y_ref) + # Test the version with out= argument # For comparison ops, the output is bool, which doesn't support zero_(). # We must create a new out tensor. if y_xpu.dtype != torch.bool: y_xpu.zero_() else: - y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) - + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + fn(x1_xpu, x2_xpu, out=y_xpu) self.assertEqual(y_xpu.cpu(), y_ref) # --- 2. Scalar Test --- x1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=dtype) x2 = 2.0 - + if dtype == torch.bool: x1 = x1.bool() @@ -74,7 +74,7 @@ def _test_compare_fn(self, fn, dtype): x1_ref = x1.cpu().to(torch.float32) else: x1_ref = x1.cpu() - + x2_ref = x2 # Scalar remains the same y_ref = fn(x1_ref, x2_ref) @@ -82,14 +82,14 @@ def _test_compare_fn(self, fn, dtype): x1_xpu = x1.xpu() x2_xpu = x2 y_xpu = fn(x1_xpu, x2_xpu) - + self.assertEqual(y_xpu.cpu(), y_ref) - + # Test the version with out= argument if y_xpu.dtype != torch.bool: y_xpu.zero_() else: - y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) fn(x1_xpu, x2_xpu, out=y_xpu) self.assertEqual(y_xpu.cpu(), y_ref) From fbcf36a1612aad31ee2bc58a1998e4a397249f63 Mon Sep 17 00:00:00 2001 From: yucai-intel <108388355+yucai-intel@users.noreply.github.com> Date: Mon, 13 Oct 2025 16:23:45 +0800 Subject: [PATCH 5/5] format --- test/regressions/test_compare.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/regressions/test_compare.py b/test/regressions/test_compare.py index b45c503855..fae6563e3a 100644 --- a/test/regressions/test_compare.py +++ b/test/regressions/test_compare.py @@ -2,6 +2,7 @@ import torch from torch.testing._internal.common_utils import TestCase + class TestTorchMethod(TestCase): # Define float8 dtypes FLOAT8_DTYPES = ( @@ -28,8 +29,8 @@ def _test_compare_fn(self, fn, dtype): # Handle boolean input if dtype == torch.bool: - x1 = x1.bool() - x2 = x2.bool() + x1 = x1.bool() + x2 = x2.bool() # Determine the golden reference tensor on CPU if dtype in self.FLOAT8_DTYPES: @@ -55,9 +56,9 @@ def _test_compare_fn(self, fn, dtype): # For comparison ops, the output is bool, which doesn't support zero_(). # We must create a new out tensor. if y_xpu.dtype != torch.bool: - y_xpu.zero_() + y_xpu.zero_() else: - y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) fn(x1_xpu, x2_xpu, out=y_xpu) self.assertEqual(y_xpu.cpu(), y_ref) @@ -67,7 +68,7 @@ def _test_compare_fn(self, fn, dtype): x2 = 2.0 if dtype == torch.bool: - x1 = x1.bool() + x1 = x1.bool() # Determine the golden reference tensor on CPU if dtype in self.FLOAT8_DTYPES: @@ -75,7 +76,7 @@ def _test_compare_fn(self, fn, dtype): else: x1_ref = x1.cpu() - x2_ref = x2 # Scalar remains the same + x2_ref = x2 # Scalar remains the same y_ref = fn(x1_ref, x2_ref) # XPU operation @@ -87,9 +88,9 @@ def _test_compare_fn(self, fn, dtype): # Test the version with out= argument if y_xpu.dtype != torch.bool: - y_xpu.zero_() + y_xpu.zero_() else: - y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) + y_xpu = torch.empty_like(y_xpu, dtype=torch.bool) fn(x1_xpu, x2_xpu, out=y_xpu) self.assertEqual(y_xpu.cpu(), y_ref)