From 74603eeac3b30ca8ca807e56e6be355e7f9ef829 Mon Sep 17 00:00:00 2001 From: neildhickey Date: Mon, 27 Feb 2023 09:57:42 +0000 Subject: [PATCH] =?UTF-8?q?[Arith]=20ConstIntBound=20was=20incorrectly=20a?= =?UTF-8?q?ssuming=20bounds=20were=20over=20int64=E2=80=A6=20(#13918)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Arith] ConstIntBound was incorrectly assuming bounds were over int64_t range This commit improved the floormod and floordiv conversion check to be simpler for the negative range and adds a test to cover all integer data types. --- src/tir/transforms/lower_intrin.cc | 10 +- tests/python/relay/test_op_floordiv.py | 117 ++++++++++++++++++ .../python/topi/python/test_topi_transform.py | 1 - 3 files changed, 121 insertions(+), 7 deletions(-) create mode 100644 tests/python/relay/test_op_floordiv.py diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 8c850f0dea41..4cffe2a19d60 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -118,9 +118,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floordiv // in terms of truncdiv using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - if (const_int_bound->min_value != arith::ConstIntBound::kNegInf && - const_int_bound->min_value < 0 && - const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { + if (const_int_bound->min_value < 0 && + const_int_bound->min_value > -(Downcast(tvm::max_value(op->a->dtype))->value)) { // The goal is to write floordiv(a,b) in terms of truncdiv, without using // negative operands. // @@ -214,9 +213,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { // If the numerator's lower bound is known, express the floormod // in terms of truncmod using only positive operands. arith::ConstIntBound const_int_bound = analyzer_->const_int_bound(op->a); - if (const_int_bound->min_value != arith::ConstIntBound::kNegInf && - const_int_bound->min_value < 0 && - const_int_bound->min_value > Downcast(tvm::min_value(op->a->dtype))->value) { + if (const_int_bound->min_value < 0 && + const_int_bound->min_value > -(Downcast(tvm::max_value(op->a->dtype))->value)) { // The goal is to write floormod(a,b) in terms of truncdiv and truncmod, // without using negative operands. // diff --git a/tests/python/relay/test_op_floordiv.py b/tests/python/relay/test_op_floordiv.py new file mode 100644 index 000000000000..8828a0155c89 --- /dev/null +++ b/tests/python/relay/test_op_floordiv.py @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import pytest +import tvm +from tvm import te +import tvm.testing +from tvm.script import tir + + +def test_floor_div_op(): + target = "llvm" + dev = tvm.device(target) + N = 100 + divisor = 5 + + @tir.prim_func + def func_64( + A: tir.Buffer((N + 100, 2), "int64"), + B: tir.Buffer((N), "int64"), + C: tir.Buffer((N), "int64"), + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int64"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int64"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_32( + A: tir.Buffer((N + 100, 2), "int32"), + B: tir.Buffer((N), "int32"), + C: tir.Buffer((N), "int32"), + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int32"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int32"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_16( + A: tir.Buffer((N + 100, 2), "int16"), + B: tir.Buffer((N), "int16"), + C: tir.Buffer((N), "int16"), + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int16"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int16"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + @tir.prim_func + def func_8( + A: tir.Buffer((N + 100, 2), "int8"), B: tir.Buffer((N), "int8"), C: tir.Buffer((N), "int8") + ): + for i in tir.serial(N): + with tir.block("A"): + v_i = tir.axis.spatial(N, i) + A[v_i, 0] = tir.floordiv(C[v_i] - tir.max_value("int8"), divisor) + A[v_i, 1] = tir.floormod(C[v_i] - tir.max_value("int8"), divisor) + A[v_i + 100, 0] = tir.floordiv(B[v_i], divisor) + A[v_i + 100, 1] = tir.floormod(B[v_i], divisor) + + for opfunc, type in [ + (func_8, "int8"), + (func_16, "int16"), + (func_32, "int32"), + (func_64, "int64"), + ]: + built = tvm.build(opfunc, target=target) + x_data = np.random.randint(te.min_value(type), te.max_value(type), size=(100), dtype=type) + y_data = np.asarray([i for i in range(N)], dtype=type) + + a_dev = tvm.nd.empty([N + 100, 2], type, dev) + b_dev = tvm.nd.array(x_data, dev) + c_dev = tvm.nd.array(y_data, dev) + + built(a_dev, b_dev, c_dev) + + a = a_dev.numpy() + b = b_dev.numpy() + c = c_dev.numpy() + + # python modulo behaves a bit different to tvm floormod for negative numbers + for i in range(N + 100): + if a[i, 1] < 0: + a[i, 1] = divisor + a[i, 1] + + np.testing.assert_array_equal(a[:100, 0], (c - te.max_value(type)) // divisor) + np.testing.assert_array_equal(a[:100, 1], (c - te.max_value(type)) % divisor) + np.testing.assert_array_equal(a[100 : N + 100, 0], b // divisor) + np.testing.assert_array_equal(a[100 : N + 100, 1], b % divisor) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index e34905f15379..0f64b486f375 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -859,7 +859,6 @@ def test_dynamic_strided_slice(): verify_dynamic_strided_slice((3, 4, 3), [0, 2, 0], [1, 2, 3]) -@tvm.testing.requires_gpu @tvm.testing.uses_gpu def test_strided_set(): verify_strided_set((3, 4, 3), (3, 2, 2), [0, 3, 0], [4, 1, 4], [1, -1, 2])