[Bugfix][ONNX] Improve broadcast and batch_matmul conversion (#16961)

xhmelon · web-flow · commit 59ef0ee9d87a · 2024-05-05T17:17:18.000+08:00
* [Bugfix][VTA] Fix FSIM compile error on macOS. VTA FSIM could not be built on macOS, for it leverages malloc.h and memalign, yet both have been deprecated and are not provided by macOS. This issue was captured in #13173. This commit stops including malloc.h in VTA Runtime as stdlib.h has provided functions we need. This commit uses posix_memalign instead of memalign. It is a portable standard function. * Fix format. * [Bugfix][ONNX] Improve broadcast and batch_matmul conversion This commit provides batch_matmul conversions between a 3D or above matrix and a 1D matrix with proper broadcasting, which improves the robustness of the ONNX frontend. This issue was captured in #16891. * Fix format.
diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
@@ -307,6 +307,21 @@ def matmul_out_dtype(inputs, out_dtype):
             a = flatten_to_nd(inputs[0], a_shape, 2)
             b = _op.transpose(inputs[1])
             output = _op.nn.dense(a, b, out_dtype=out_dtype)
+        elif a_rank == 1 or b_rank == 1:
+            a, b = inputs
+            _a_shape = tuple(a_shape.data.numpy())
+            _b_shape = tuple(b_shape.data.numpy())
+            if a_rank == 1:
+                axis = -2
+                a = _op.expand_dims(a, axis=0)
+                batches = _b_shape[:-2]
+                a = _op.broadcast_to(a, (*batches, 1, _a_shape[0]))
+            else:
+                axis = -1
+                b = _op.expand_dims(b, axis=-1)
+                batches = _a_shape[:-2]
+                b = _op.broadcast_to(b, (*batches, _b_shape[0], 1))
+            return _op.squeeze(_op.nn.batch_matmul(a, b, transpose_b=False), axis=axis)
         else:
             a = inputs[0]
             b = inputs[1]
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
@@ -1493,6 +1493,8 @@ def verify_batch_matmul(a_shape, b_shape, out_shape, convert_config=None):
     verify_batch_matmul((2, 4, 3), (3, 4), (2, 4, 4))
     verify_batch_matmul((2, 3, 4, 3), (3, 4), (2, 3, 4, 4))
     # Test implicit broadcasting.
+    verify_batch_matmul((5,), (5, 5, 4), (5, 4))
+    verify_batch_matmul((5, 4, 5), (5,), (5, 4))
     verify_batch_matmul((4, 3), (2, 3, 4), (2, 4, 4))
     verify_batch_matmul((2, 4, 3), (1, 3, 4), (2, 4, 4))
     verify_batch_matmul((1, 4, 3), (2, 3, 4), (2, 4, 4))