Skip to content

Commit eb0cae2

Browse files
committed
dp4a works
1 parent 4915c6a commit eb0cae2

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

python/tvm/tir/tensor_intrin/dot_product_common.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,9 @@ def dp4a_impl(
4545
T.reads(C[0], A[0:4], B[0:4])
4646
T.writes(C[0])
4747

48-
A_i8x4 = B.vload([0], "int8x4")
49-
B_i8x4 = B.vload([0], "int8x4")
50-
51-
C[0] = T.call_pure_extern("__dp4a", A_i8x4, B_i8x4, C[0], dtype="int32")
48+
C[0] += T.call_pure_extern(
49+
"__dp4a", A.vload([0], "int8x4"), B.vload([0], "int8x4"), T.int32(0), dtype="int32"
50+
)
5251

5352

5453
DP4A_INTRIN = "dp4a"

0 commit comments

Comments
 (0)