From 68c0ea90973ee50f187dd501a0df1306c0eaff06 Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Fri, 3 Feb 2023 17:16:17 +0000
Subject: [PATCH 1/5] [Arith] ConstIntBound was incorrectly assuming bounds
 were over int64_t range

---
 src/tir/transforms/lower_intrin.cc            | 22 +++++++++++++++----
 .../python/topi/python/test_topi_transform.py |  1 -
 2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc
index 8c850f0dea41..6faa5aba5872 100644
--- a/src/tir/transforms/lower_intrin.cc
+++ b/src/tir/transforms/lower_intrin.cc
@@ -118,8 +118,15 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer {
       // If the numerator's lower bound is known, express the floordiv
       // in terms of truncdiv using only positive operands.
       arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a);
-      if (const_int_bound->min_value != arith::ConstIntBound::kNegInf &&
-          const_int_bound->min_value < 0 &&
+      // this will depend on the size of op->a
+      int64_t kNegInf;
+      if (op->a.dtype() == DataType::Int(32)) {
+        kNegInf = -std::numeric_limits<int32_t>::max();
+      } else {
+        kNegInf = arith::ConstIntBound::kNegInf;
+      }
+
+      if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 &&
           const_int_bound->min_value > Downcast<IntImm>(tvm::min_value(op->a->dtype))->value) {
         // The goal is to write floordiv(a,b) in terms of truncdiv, without using
         // negative operands.
@@ -214,8 +221,15 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer {
       // If the numerator's lower bound is known, express the floormod
       // in terms of truncmod using only positive operands.
       arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a);
-      if (const_int_bound->min_value != arith::ConstIntBound::kNegInf &&
-          const_int_bound->min_value < 0 &&
+      // this will depend on the size of op->a
+      int64_t kNegInf;
+      if (op->a.dtype() == DataType::Int(32)) {
+        kNegInf = -std::numeric_limits<int32_t>::max();
+      } else {
+        kNegInf = arith::ConstIntBound::kNegInf;
+      }
+
+      if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 &&
           const_int_bound->min_value > Downcast<IntImm>(tvm::min_value(op->a->dtype))->value) {
         // The goal is to write floormod(a,b) in terms of truncdiv and truncmod,
         // without using negative operands.
diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index e34905f15379..0f64b486f375 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -859,7 +859,6 @@ def test_dynamic_strided_slice():
     verify_dynamic_strided_slice((3, 4, 3), [0, 2, 0], [1, 2, 3])
 
 
-@tvm.testing.requires_gpu
 @tvm.testing.uses_gpu
 def test_strided_set():
     verify_strided_set((3, 4, 3), (3, 2, 2), [0, 3, 0], [4, 1, 4], [1, -1, 2])

From 7817c8bb898b98f41d87671086ea6d1eeeff5e30 Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Mon, 20 Feb 2023 12:39:45 +0000
Subject: [PATCH 2/5] [Arith] Improved the floormod and floordiv conversion
 check to be simpler.

Added test to cover all integer data types
---
 src/tir/transforms/lower_intrin.cc     |  24 +-----
 tests/python/relay/test_op_floordiv.py | 112 +++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 20 deletions(-)
 create mode 100644 tests/python/relay/test_op_floordiv.py

diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc
index 6faa5aba5872..4cffe2a19d60 100644
--- a/src/tir/transforms/lower_intrin.cc
+++ b/src/tir/transforms/lower_intrin.cc
@@ -118,16 +118,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer {
       // If the numerator's lower bound is known, express the floordiv
       // in terms of truncdiv using only positive operands.
       arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a);
-      // this will depend on the size of op->a
-      int64_t kNegInf;
-      if (op->a.dtype() == DataType::Int(32)) {
-        kNegInf = -std::numeric_limits<int32_t>::max();
-      } else {
-        kNegInf = arith::ConstIntBound::kNegInf;
-      }
-
-      if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 &&
-          const_int_bound->min_value > Downcast<IntImm>(tvm::min_value(op->a->dtype))->value) {
+      if (const_int_bound->min_value < 0 &&
+          const_int_bound->min_value > -(Downcast<IntImm>(tvm::max_value(op->a->dtype))->value)) {
         // The goal is to write floordiv(a,b) in terms of truncdiv, without using
         // negative operands.
         //
@@ -221,16 +213,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer {
       // If the numerator's lower bound is known, express the floormod
       // in terms of truncmod using only positive operands.
       arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a);
-      // this will depend on the size of op->a
-      int64_t kNegInf;
-      if (op->a.dtype() == DataType::Int(32)) {
-        kNegInf = -std::numeric_limits<int32_t>::max();
-      } else {
-        kNegInf = arith::ConstIntBound::kNegInf;
-      }
-
-      if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 &&
-          const_int_bound->min_value > Downcast<IntImm>(tvm::min_value(op->a->dtype))->value) {
+      if (const_int_bound->min_value < 0 &&
+          const_int_bound->min_value > -(Downcast<IntImm>(tvm::max_value(op->a->dtype))->value)) {
         // The goal is to write floormod(a,b) in terms of truncdiv and truncmod,
         // without using negative operands.
         //
diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py
new file mode 100644
index 000000000000..57b4c77bb447
--- /dev/null
+++ b/tests/python/relay/test_op_floordiv.py
@@ -0,0 +1,112 @@
+import numpy as np
+import pytest
+import tvm
+from tvm import te
+import scipy
+from tvm import relay
+import pytest
+from tvm.relay.testing import run_infer_type
+import tvm.topi.testing
+from tvm.contrib.nvcc import have_fp16
+import tvm.testing
+from tvm.topi.utils import get_const_tuple
+from tvm.script import tir
+
+executor_kind = tvm.testing.parameter("graph", "vm")
+
+
+@tvm.testing.uses_gpu
+def test_floor_div_op(target, dev):
+    N = 100
+    divisor = 5
+
+    @tir.prim_func
+    def func_64(
+        A: tir.Buffer[(N + 100, 2), "int64"],
+        B: tir.Buffer[(N), "int64"],
+        C: tir.Buffer[(N), "int64"],
+    ):
+        for i in tir.serial(N):
+            with tir.block("A"):
+                v_i = tir.axis.spatial(N, i)
+                A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int64"), divisor)
+                A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int64"), divisor)
+                A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor)
+                A[v_i + 100, 1] = tir.floormod(B[v_i], divisor)
+
+    @tir.prim_func
+    def func_32(
+        A: tir.Buffer[(N + 100, 2), "int32"],
+        B: tir.Buffer[(N), "int32"],
+        C: tir.Buffer[(N), "int32"],
+    ):
+        for i in tir.serial(N):
+            with tir.block("A"):
+                v_i = tir.axis.spatial(N, i)
+                A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int32"), divisor)
+                A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int32"), divisor)
+                A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor)
+                A[v_i + 100, 1] = tir.floormod(B[v_i], divisor)
+
+    @tir.prim_func
+    def func_16(
+        A: tir.Buffer[(N + 100, 2), "int16"],
+        B: tir.Buffer[(N), "int16"],
+        C: tir.Buffer[(N), "int16"],
+    ):
+        for i in tir.serial(N):
+            with tir.block("A"):
+                v_i = tir.axis.spatial(N, i)
+                A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int16"), divisor)
+                A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int16"), divisor)
+                A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor)
+                A[v_i + 100, 1] = tir.floormod(B[v_i], divisor)
+
+    @tir.prim_func
+    def func_8(
+        A: tir.Buffer[(N + 100, 2), "int8"], B: tir.Buffer[(N), "int8"], C: tir.Buffer[(N), "int8"]
+    ):
+        for i in tir.serial(N):
+            with tir.block("A"):
+                v_i = tir.axis.spatial(N, i)
+                A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int8"), divisor)
+                A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int8"), divisor)
+                A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor)
+                A[v_i + 100, 1] = tir.floormod(B[v_i], divisor)
+
+    for opfunc, type in [
+        (func_8, "int8"),
+        (func_16, "int16"),
+        (func_32, "int32"),
+        (func_64, "int64"),
+    ]:
+        built = tvm.build(opfunc, target=target)
+        x_data = np.random.randint(te.min_value(type), te.max_value(type), size=(100), dtype=type)
+        data = []
+        for i in range(N):
+            data.append(i)
+
+        y_data = np.asarray(data, dtype=type)
+
+        a_dev = tvm.nd.empty([N + 100, 2], type, dev)
+        b_dev = tvm.nd.array(x_data, dev)
+        c_dev = tvm.nd.array(y_data, dev)
+
+        built(a_dev, b_dev, c_dev)
+
+        a = a_dev.numpy()
+        b = b_dev.numpy()
+        c = c_dev.numpy()
+
+        #python modulo behaves a bit different to tvm floormod for negative numbers
+        for i in range(N+100):
+            if a[i, 1] < 0:
+                a[i, 1] = divisor+a[i, 1]
+
+        np.testing.assert_array_equal(a[:100, 0], (c-te.max_value(type)) // divisor)
+        np.testing.assert_array_equal(a[:100, 1], (c-te.max_value(type)) % divisor)
+        np.testing.assert_array_equal(a[100:N+100, 0], b // divisor)
+        np.testing.assert_array_equal(a[100:N+100, 1], b % divisor)
+    
+if __name__ == "__main__":
+    tvm.testing.main()

From f8578cf5cf23b2cbc52679d859523884fdf250a5 Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Mon, 20 Feb 2023 16:28:35 +0000
Subject: [PATCH 3/5] Fixing lint issues and adding copyright.

Adds a copyright message to new file and cleans up unnecessary imports
---
 tests/python/relay/test_op_floordiv.py | 40 ++++++++++++++++----------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py
index 57b4c77bb447..96f52c9fb77c 100644
--- a/tests/python/relay/test_op_floordiv.py
+++ b/tests/python/relay/test_op_floordiv.py
@@ -1,15 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 import numpy as np
 import pytest
 import tvm
 from tvm import te
-import scipy
-from tvm import relay
-import pytest
-from tvm.relay.testing import run_infer_type
-import tvm.topi.testing
-from tvm.contrib.nvcc import have_fp16
 import tvm.testing
-from tvm.topi.utils import get_const_tuple
 from tvm.script import tir
 
 executor_kind = tvm.testing.parameter("graph", "vm")
@@ -98,15 +107,16 @@ def func_8(
         b = b_dev.numpy()
         c = c_dev.numpy()
 
-        #python modulo behaves a bit different to tvm floormod for negative numbers
-        for i in range(N+100):
+        # python modulo behaves a bit different to tvm floormod for negative numbers
+        for i in range(N + 100):
             if a[i, 1] < 0:
-                a[i, 1] = divisor+a[i, 1]
+                a[i, 1] = divisor + a[i, 1]
+
+        np.testing.assert_array_equal(a[:100, 0], (c - te.max_value(type)) // divisor)
+        np.testing.assert_array_equal(a[:100, 1], (c - te.max_value(type)) % divisor)
+        np.testing.assert_array_equal(a[100 : N + 100, 0], b // divisor)
+        np.testing.assert_array_equal(a[100 : N + 100, 1], b % divisor)
+
 
-        np.testing.assert_array_equal(a[:100, 0], (c-te.max_value(type)) // divisor)
-        np.testing.assert_array_equal(a[:100, 1], (c-te.max_value(type)) % divisor)
-        np.testing.assert_array_equal(a[100:N+100, 0], b // divisor)
-        np.testing.assert_array_equal(a[100:N+100, 1], b % divisor)
-    
 if __name__ == "__main__":
     tvm.testing.main()

From c4084ff9ba809e0bf6d52c87b0f25103e555adf0 Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Fri, 24 Feb 2023 10:45:33 +0000
Subject: [PATCH 4/5] [Arith] Fix review comments

---
 tests/python/relay/test_op_floordiv.py | 29 ++++++++++----------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py
index 96f52c9fb77c..7c59cf3a88bf 100644
--- a/tests/python/relay/test_op_floordiv.py
+++ b/tests/python/relay/test_op_floordiv.py
@@ -21,19 +21,16 @@
 import tvm.testing
 from tvm.script import tir
 
-executor_kind = tvm.testing.parameter("graph", "vm")
 
-
-@tvm.testing.uses_gpu
 def test_floor_div_op(target, dev):
     N = 100
     divisor = 5
 
     @tir.prim_func
     def func_64(
-        A: tir.Buffer[(N + 100, 2), "int64"],
-        B: tir.Buffer[(N), "int64"],
-        C: tir.Buffer[(N), "int64"],
+        A: tir.Buffer((N + 100, 2), "int64"),
+        B: tir.Buffer((N), "int64"),
+        C: tir.Buffer((N), "int64"),
     ):
         for i in tir.serial(N):
             with tir.block("A"):
@@ -45,9 +42,9 @@ def func_64(
 
     @tir.prim_func
     def func_32(
-        A: tir.Buffer[(N + 100, 2), "int32"],
-        B: tir.Buffer[(N), "int32"],
-        C: tir.Buffer[(N), "int32"],
+        A: tir.Buffer((N + 100, 2), "int32"),
+        B: tir.Buffer((N), "int32"),
+        C: tir.Buffer((N), "int32"),
     ):
         for i in tir.serial(N):
             with tir.block("A"):
@@ -59,9 +56,9 @@ def func_32(
 
     @tir.prim_func
     def func_16(
-        A: tir.Buffer[(N + 100, 2), "int16"],
-        B: tir.Buffer[(N), "int16"],
-        C: tir.Buffer[(N), "int16"],
+        A: tir.Buffer((N + 100, 2), "int16"),
+        B: tir.Buffer((N), "int16"),
+        C: tir.Buffer((N), "int16"),
     ):
         for i in tir.serial(N):
             with tir.block("A"):
@@ -73,7 +70,7 @@ def func_16(
 
     @tir.prim_func
     def func_8(
-        A: tir.Buffer[(N + 100, 2), "int8"], B: tir.Buffer[(N), "int8"], C: tir.Buffer[(N), "int8"]
+        A: tir.Buffer((N + 100, 2), "int8"), B: tir.Buffer((N), "int8"), C: tir.Buffer((N), "int8")
     ):
         for i in tir.serial(N):
             with tir.block("A"):
@@ -91,11 +88,7 @@ def func_8(
     ]:
         built = tvm.build(opfunc, target=target)
         x_data = np.random.randint(te.min_value(type), te.max_value(type), size=(100), dtype=type)
-        data = []
-        for i in range(N):
-            data.append(i)
-
-        y_data = np.asarray(data, dtype=type)
+        y_data = np.asarray([i for i in range(N)], dtype=type)
 
         a_dev = tvm.nd.empty([N + 100, 2], type, dev)
         b_dev = tvm.nd.array(x_data, dev)

From e54d48b44b7fde8da70cf41f5f4d59dd012ce530 Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Fri, 24 Feb 2023 14:07:53 +0000
Subject: [PATCH 5/5] Fixing target definition to llvm

---
 tests/python/relay/test_op_floordiv.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py
index 7c59cf3a88bf..8828a0155c89 100644
--- a/tests/python/relay/test_op_floordiv.py
+++ b/tests/python/relay/test_op_floordiv.py
@@ -22,7 +22,9 @@
 from tvm.script import tir
 
 
-def test_floor_div_op(target, dev):
+def test_floor_div_op():
+    target = "llvm"
+    dev = tvm.device(target)
     N = 100
     divisor = 5