From 68c0ea90973ee50f187dd501a0df1306c0eaff06 Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Fri, 3 Feb 2023 17:16:17 +0000 Subject: [PATCH 1/5] [Arith] ConstIntBound was incorrectly assuming bounds were over int64_t range --- src/tir/transforms/lower_intrin.cc | 22 +++++++++++++++---- .../python/topi/python/test_topi_transform.py | 1 - 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 8c850f0dea41..6faa5aba5872 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -118,8 +118,15 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floordiv // in terms of truncdiv using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - if (const_int_bound->min_value != arith::ConstIntBound::kNegInf && - const_int_bound->min_value < 0 && + // this will depend on the size of op->a + int64_t kNegInf; + if (op->a.dtype() == DataType::Int(32)) { + kNegInf = -std::numeric_limits::max(); + } else { + kNegInf = arith::ConstIntBound::kNegInf; + } + + if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 && const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { // The goal is to write floordiv(a,b) in terms of truncdiv, without using // negative operands. @@ -214,8 +221,15 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floormod // in terms of truncmod using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - if (const_int_bound->min_value != arith::ConstIntBound::kNegInf && - const_int_bound->min_value < 0 && + // this will depend on the size of op->a + int64_t kNegInf; + if (op->a.dtype() == DataType::Int(32)) { + kNegInf = -std::numeric_limits::max(); + } else { + kNegInf = arith::ConstIntBound::kNegInf; + } + + if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 && const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { // The goal is to write floormod(a,b) in terms of truncdiv and truncmod, // without using negative operands. diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index e34905f15379..0f64b486f375 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -859,7 +859,6 @@ def test_dynamic_strided_slice(): verify_dynamic_strided_slice((3, 4, 3), [0, 2, 0], [1, 2, 3]) -@tvm.testing.requires_gpu @tvm.testing.uses_gpu def test_strided_set(): verify_strided_set((3, 4, 3), (3, 2, 2), [0, 3, 0], [4, 1, 4], [1, -1, 2]) From 7817c8bb898b98f41d87671086ea6d1eeeff5e30 Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Mon, 20 Feb 2023 12:39:45 +0000 Subject: [PATCH 2/5] [Arith] Improved the floormod and floordiv conversion check to be simpler. Added test to cover all integer data types --- src/tir/transforms/lower_intrin.cc | 24 +----- tests/python/relay/test_op_floordiv.py | 112 +++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 20 deletions(-) create mode 100644 tests/python/relay/test_op_floordiv.py diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 6faa5aba5872..4cffe2a19d60 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -118,16 +118,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floordiv // in terms of truncdiv using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - // this will depend on the size of op->a - int64_t kNegInf; - if (op->a.dtype() == DataType::Int(32)) { - kNegInf = -std::numeric_limits::max(); - } else { - kNegInf = arith::ConstIntBound::kNegInf; - } - - if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 && - const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { + if (const_int_bound->min_value < 0 && + const_int_bound->min_value > -(Downcast(tvm::max_value(op->a->dtype))->value)) { // The goal is to write floordiv(a,b) in terms of truncdiv, without using // negative operands. // @@ -221,16 +213,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floormod // in terms of truncmod using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - // this will depend on the size of op->a - int64_t kNegInf; - if (op->a.dtype() == DataType::Int(32)) { - kNegInf = -std::numeric_limits::max(); - } else { - kNegInf = arith::ConstIntBound::kNegInf; - } - - if (const_int_bound->min_value > kNegInf && const_int_bound->min_value < 0 && - const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { + if (const_int_bound->min_value < 0 && + const_int_bound->min_value > -(Downcast(tvm::max_value(op->a->dtype))->value)) { // The goal is to write floormod(a,b) in terms of truncdiv and truncmod, // without using negative operands. // diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py new file mode 100644 index 000000000000..57b4c77bb447 --- /dev/null +++ b/tests/python/relay/test_op_floordiv.py @@ -0,0 +1,112 @@ +import numpy as np +import pytest +import tvm +from tvm import te +import scipy +from tvm import relay +import pytest +from tvm.relay.testing import run_infer_type +import tvm.topi.testing +from tvm.contrib.nvcc import have_fp16 +import tvm.testing +from tvm.topi.utils import get_const_tuple +from tvm.script import tir + +executor_kind = tvm.testing.parameter("graph", "vm") + + +@tvm.testing.uses_gpu +def test_floor_div_op(target, dev): + N = 100 + divisor = 5 + + @tir.prim_func + def func_64( + A: tir.Buffer[(N + 100, 2), "int64"], + B: tir.Buffer[(N), "int64"], + C: tir.Buffer[(N), "int64"], + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int64"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int64"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_32( + A: tir.Buffer[(N + 100, 2), "int32"], + B: tir.Buffer[(N), "int32"], + C: tir.Buffer[(N), "int32"], + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int32"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int32"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_16( + A: tir.Buffer[(N + 100, 2), "int16"], + B: tir.Buffer[(N), "int16"], + C: tir.Buffer[(N), "int16"], + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int16"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int16"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_8( + A: tir.Buffer[(N + 100, 2), "int8"], B: tir.Buffer[(N), "int8"], C: tir.Buffer[(N), "int8"] + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int8"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int8"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + for opfunc, type in [ + (func_8, "int8"), + (func_16, "int16"), + (func_32, "int32"), + (func_64, "int64"), + ]: + built = tvm.build(opfunc, target=target) + x_data = np.random.randint(te.min_value(type), te.max_value(type), size=(100), dtype=type) + data = [] + for i in range(N): + data.append(i) + + y_data = np.asarray(data, dtype=type) + + a_dev = tvm.nd.empty([N + 100, 2], type, dev) + b_dev = tvm.nd.array(x_data, dev) + c_dev = tvm.nd.array(y_data, dev) + + built(a_dev, b_dev, c_dev) + + a = a_dev.numpy() + b = b_dev.numpy() + c = c_dev.numpy() + + #python modulo behaves a bit different to tvm floormod for negative numbers + for i in range(N+100): + if a[i, 1] < 0: + a[i, 1] = divisor+a[i, 1] + + np.testing.assert_array_equal(a[:100, 0], (c-te.max_value(type)) // divisor) + np.testing.assert_array_equal(a[:100, 1], (c-te.max_value(type)) % divisor) + np.testing.assert_array_equal(a[100:N+100, 0], b // divisor) + np.testing.assert_array_equal(a[100:N+100, 1], b % divisor) + +if __name__ == "__main__": + tvm.testing.main() From f8578cf5cf23b2cbc52679d859523884fdf250a5 Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Mon, 20 Feb 2023 16:28:35 +0000 Subject: [PATCH 3/5] Fixing lint issues and adding copyright. Adds a copyright message to new file and cleans up unnecessary imports --- tests/python/relay/test_op_floordiv.py | 40 ++++++++++++++++---------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py index 57b4c77bb447..96f52c9fb77c 100644 --- a/tests/python/relay/test_op_floordiv.py +++ b/tests/python/relay/test_op_floordiv.py @@ -1,15 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import numpy as np import pytest import tvm from tvm import te -import scipy -from tvm import relay -import pytest -from tvm.relay.testing import run_infer_type -import tvm.topi.testing -from tvm.contrib.nvcc import have_fp16 import tvm.testing -from tvm.topi.utils import get_const_tuple from tvm.script import tir executor_kind = tvm.testing.parameter("graph", "vm") @@ -98,15 +107,16 @@ def func_8( b = b_dev.numpy() c = c_dev.numpy() - #python modulo behaves a bit different to tvm floormod for negative numbers - for i in range(N+100): + # python modulo behaves a bit different to tvm floormod for negative numbers + for i in range(N + 100): if a[i, 1] < 0: - a[i, 1] = divisor+a[i, 1] + a[i, 1] = divisor + a[i, 1] + + np.testing.assert_array_equal(a[:100, 0], (c - te.max_value(type)) // divisor) + np.testing.assert_array_equal(a[:100, 1], (c - te.max_value(type)) % divisor) + np.testing.assert_array_equal(a[100 : N + 100, 0], b // divisor) + np.testing.assert_array_equal(a[100 : N + 100, 1], b % divisor) + - np.testing.assert_array_equal(a[:100, 0], (c-te.max_value(type)) // divisor) - np.testing.assert_array_equal(a[:100, 1], (c-te.max_value(type)) % divisor) - np.testing.assert_array_equal(a[100:N+100, 0], b // divisor) - np.testing.assert_array_equal(a[100:N+100, 1], b % divisor) - if __name__ == "__main__": tvm.testing.main() From c4084ff9ba809e0bf6d52c87b0f25103e555adf0 Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Fri, 24 Feb 2023 10:45:33 +0000 Subject: [PATCH 4/5] [Arith] Fix review comments --- tests/python/relay/test_op_floordiv.py | 29 ++++++++++---------------- 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py index 96f52c9fb77c..7c59cf3a88bf 100644 --- a/tests/python/relay/test_op_floordiv.py +++ b/tests/python/relay/test_op_floordiv.py @@ -21,19 +21,16 @@ import tvm.testing from tvm.script import tir -executor_kind = tvm.testing.parameter("graph", "vm") - -@tvm.testing.uses_gpu def test_floor_div_op(target, dev): N = 100 divisor = 5 @tir.prim_func def func_64( - A: tir.Buffer[(N + 100, 2), "int64"], - B: tir.Buffer[(N), "int64"], - C: tir.Buffer[(N), "int64"], + A: tir.Buffer((N + 100, 2), "int64"), + B: tir.Buffer((N), "int64"), + C: tir.Buffer((N), "int64"), ): for i in tir.serial(N): with tir.block("A"): @@ -45,9 +42,9 @@ def func_64( @tir.prim_func def func_32( - A: tir.Buffer[(N + 100, 2), "int32"], - B: tir.Buffer[(N), "int32"], - C: tir.Buffer[(N), "int32"], + A: tir.Buffer((N + 100, 2), "int32"), + B: tir.Buffer((N), "int32"), + C: tir.Buffer((N), "int32"), ): for i in tir.serial(N): with tir.block("A"): @@ -59,9 +56,9 @@ def func_32( @tir.prim_func def func_16( - A: tir.Buffer[(N + 100, 2), "int16"], - B: tir.Buffer[(N), "int16"], - C: tir.Buffer[(N), "int16"], + A: tir.Buffer((N + 100, 2), "int16"), + B: tir.Buffer((N), "int16"), + C: tir.Buffer((N), "int16"), ): for i in tir.serial(N): with tir.block("A"): @@ -73,7 +70,7 @@ def func_16( @tir.prim_func def func_8( - A: tir.Buffer[(N + 100, 2), "int8"], B: tir.Buffer[(N), "int8"], C: tir.Buffer[(N), "int8"] + A: tir.Buffer((N + 100, 2), "int8"), B: tir.Buffer((N), "int8"), C: tir.Buffer((N), "int8") ): for i in tir.serial(N): with tir.block("A"): @@ -91,11 +88,7 @@ def func_8( ]: built = tvm.build(opfunc, target=target) x_data = np.random.randint(te.min_value(type), te.max_value(type), size=(100), dtype=type) - data = [] - for i in range(N): - data.append(i) - - y_data = np.asarray(data, dtype=type) + y_data = np.asarray([i for i in range(N)], dtype=type) a_dev = tvm.nd.empty([N + 100, 2], type, dev) b_dev = tvm.nd.array(x_data, dev) From e54d48b44b7fde8da70cf41f5f4d59dd012ce530 Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Fri, 24 Feb 2023 14:07:53 +0000 Subject: [PATCH 5/5] Fixing target definition to llvm --- tests/python/relay/test_op_floordiv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py index 7c59cf3a88bf..8828a0155c89 100644 --- a/tests/python/relay/test_op_floordiv.py +++ b/tests/python/relay/test_op_floordiv.py @@ -22,7 +22,9 @@ from tvm.script import tir -def test_floor_div_op(target, dev): +def test_floor_div_op(): + target = "llvm" + dev = tvm.device(target) N = 100 divisor = 5