From c2b63d8ce308e0a88ecfcfd605c6eb8a4da734a6 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 14 May 2020 19:39:58 -0700 Subject: [PATCH 1/7] numeric tests --- .../java/ai/rapids/cudf/ColumnVector.java | 23 +++++++++++ java/src/main/native/src/ColumnVectorJni.cpp | 25 ++++++++++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 40 +++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 48de6b35c03..b0ca859e9c5 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -384,6 +384,24 @@ public ColumnVector isNull() { return new ColumnVector(isNullNative(getNativeView())); } + /** + * Returns a Boolean vector with the same number of rows as this instance, that has + * TRUE for any entry that is an integer, and FALSE if its not an integer + * @return - Boolean vector + */ + public ColumnVector isInteger() { + return new ColumnVector(isInteger(getNativeView())); + } + + /** + * Returns a Boolean vector with the same number of rows as this instance, that has + * TRUE for any entry that is a float, and FALSE if its not a float + * @return - Boolean vector + */ + public ColumnVector isFloat() { + return new ColumnVector(isFloat(getNativeView())); + } + /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is NaN, and FALSE if null or a valid floating point value @@ -2359,6 +2377,10 @@ private static native long rollingWindow(long viewHandle, int min_periods, int a private static native long isNanNative(long viewHandle); + private static native long isFloat(long viewHandle); + + private static native long isInteger(long viewHandle); + private static native long isNotNanNative(long viewHandle); private static native long isNotNullNative(long viewHandle); @@ -2912,4 +2934,5 @@ public static ColumnVector timestampMicroSecondsFromBoxedLongs(Long... values) { public static ColumnVector timestampNanoSecondsFromBoxedLongs(Long... values) { return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendBoxed(values)); } + } diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index 615a51f9738..a3a8b63898a 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include "jni_utils.hpp" @@ -1335,4 +1336,28 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_nansToNulls(JNIEnv *env CATCH_STD(env, 0) } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isFloat(JNIEnv *env, jobject j_object, jlong handle) { + + JNI_NULL_CHECK(env, handle, "native view handle is null", 0) + + try { + cudf::column_view * view = reinterpret_cast(handle); + std::unique_ptr result = cudf::strings::is_float(*view); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0) +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isInteger(JNIEnv *env, jobject j_object, jlong handle) { + + JNI_NULL_CHECK(env, handle, "native view handle is null", 0) + + try { + cudf::column_view * view = reinterpret_cast(handle); + std::unique_ptr result = cudf::strings::is_integer(*view); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0) +} + } // extern "C" diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 78067e413e9..59f3f6cd180 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -2118,4 +2118,44 @@ void testNansToNulls() { assertColumnsAreEqual(expectedDouble, resultDouble); } } + + @Test + void testIsInteger() { + String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", "2147483648", "-2147483648", "-2147483649", "NULL"}; + String[] longStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "9223372036854775807", "9223372036854775808", "-9223372036854775808", "-9223372036854775809", "NULL"}; + try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); + ColumnVector longStringCV = ColumnVector.fromStrings(longStrings); + ColumnVector isInt = intStringCV.isInteger(); + ColumnVector isLong = longStringCV.isInteger(); + ColumnVector ints = intStringCV.asInts(); + ColumnVector longs = longStringCV.asLongs(); + ColumnVector expectedInts = ColumnVector.fromInts(0, 0, 0, 0, 0, 0, Integer.MAX_VALUE, Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0); + ColumnVector expectedLongs = ColumnVector.fromLongs(0, 0, 0, 0, 0, 0, Long.MAX_VALUE, Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false, false, true, true, true, true, false)) { + assertColumnsAreEqual(expected, isInt); + assertColumnsAreEqual(expected, isLong); + assertColumnsAreEqual(expectedInts, ints); + assertColumnsAreEqual(expectedLongs, longs); + } + } + + @Test + void testIsFloat() { + String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL"}; + String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", "-1.7976931348623160E308", "1.2e-234", "NULL"}; + try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); + ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); + ColumnVector isFloat = floatStringCV.isFloat(); + ColumnVector isDouble = doubleStringCV.isFloat(); + ColumnVector doubles = doubleStringCV.asDoubles(); + ColumnVector floats = floatStringCV.asFloats(); + ColumnVector expectedFloats = ColumnVector.fromFloats(0f, 0f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f); + ColumnVector expectedDoubles = ColumnVector.fromDoubles(0f, 0f, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0f); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, false, true, true, true, true, true, true, true, false)) { + assertColumnsAreEqual(expected, isDouble); + assertColumnsAreEqual(expected, isFloat); + assertColumnsAreEqual(expectedFloats, floats); + assertColumnsAreEqual(expectedDoubles, doubles); + } + } } From ccc25e27eda47ca84e26e67345d35fcdedf5888c Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Thu, 14 May 2020 20:04:47 -0700 Subject: [PATCH 2/7] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5305c9603eb..38e947ba358 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ - PR #5149 Add Java bindings for PMOD - PR #5153 Add Java bindings for extract - PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN +- PR #5203 Add Java bindings for is_integer and is_float ## Improvements From dfbc9eff0778ea7aef2abd47b2ece8dbee35146f Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Fri, 15 May 2020 15:55:21 -0700 Subject: [PATCH 3/7] addressed review comemnts --- java/src/main/java/ai/rapids/cudf/ColumnVector.java | 2 ++ .../test/java/ai/rapids/cudf/ColumnVectorTest.java | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index b0ca859e9c5..8114067f863 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -390,6 +390,7 @@ public ColumnVector isNull() { * @return - Boolean vector */ public ColumnVector isInteger() { + assert type == DType.STRING; return new ColumnVector(isInteger(getNativeView())); } @@ -399,6 +400,7 @@ public ColumnVector isInteger() { * @return - Boolean vector */ public ColumnVector isFloat() { + assert type == DType.STRING; return new ColumnVector(isFloat(getNativeView())); } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 59f3f6cd180..dd421c4f229 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -2141,19 +2141,19 @@ void testIsInteger() { @Test void testIsFloat() { - String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL"}; - String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", "-1.7976931348623160E308", "1.2e-234", "NULL"}; + String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL", "null", null}; + String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null}; try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isFloat = floatStringCV.isFloat(); ColumnVector isDouble = doubleStringCV.isFloat(); ColumnVector doubles = doubleStringCV.asDoubles(); ColumnVector floats = floatStringCV.asFloats(); - ColumnVector expectedFloats = ColumnVector.fromFloats(0f, 0f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f); - ColumnVector expectedDoubles = ColumnVector.fromDoubles(0f, 0f, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0f); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, false, true, true, true, true, true, true, true, false)) { - assertColumnsAreEqual(expected, isDouble); + ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null); + ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, 0d, null); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, false, true, true, true, true, true, true, true, false, false, null)) { assertColumnsAreEqual(expected, isFloat); + assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedFloats, floats); assertColumnsAreEqual(expectedDoubles, doubles); } From eb4805c691a4eeafe2773496ba55013c54c8dc49 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Fri, 15 May 2020 16:01:37 -0700 Subject: [PATCH 4/7] updated doc --- java/src/main/java/ai/rapids/cudf/ColumnVector.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 8114067f863..60f71462d42 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -387,6 +387,10 @@ public ColumnVector isNull() { /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is an integer, and FALSE if its not an integer + * + * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. + * i.e. If this method returns true for a value could still result in an overflow or underflow + * * @return - Boolean vector */ public ColumnVector isInteger() { @@ -397,6 +401,10 @@ public ColumnVector isInteger() { /** * Returns a Boolean vector with the same number of rows as this instance, that has * TRUE for any entry that is a float, and FALSE if its not a float + * + * NOTE: Float doesn't mean a 32-bit float. It means a number that is a fraction. + * i.e. If this method returns true for a value could still result in an overflow or underflow + * * @return - Boolean vector */ public ColumnVector isFloat() { From f87ae845095bd0db0ecd47aa3ae149d8ae7583b7 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Sat, 16 May 2020 20:44:24 -0700 Subject: [PATCH 5/7] review changes --- .../java/ai/rapids/cudf/ColumnVectorTest.java | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index dd421c4f229..bc343bc4041 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -2121,17 +2121,23 @@ void testNansToNulls() { @Test void testIsInteger() { - String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", "2147483648", "-2147483648", "-2147483649", "NULL"}; - String[] longStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "9223372036854775807", "9223372036854775808", "-9223372036854775808", "-9223372036854775809", "NULL"}; + String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", + "2147483648", "-2147483648", "-2147483649", "NULL", "null", null, "1.2", "1.2e-4"}; + String[] longStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", + "9223372036854775807", "9223372036854775808", "-9223372036854775808", + "-9223372036854775809", "NULL", "null", null, "1.2", "1.2e-4"}; try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); ColumnVector longStringCV = ColumnVector.fromStrings(longStrings); ColumnVector isInt = intStringCV.isInteger(); ColumnVector isLong = longStringCV.isInteger(); ColumnVector ints = intStringCV.asInts(); ColumnVector longs = longStringCV.asLongs(); - ColumnVector expectedInts = ColumnVector.fromInts(0, 0, 0, 0, 0, 0, Integer.MAX_VALUE, Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0); - ColumnVector expectedLongs = ColumnVector.fromLongs(0, 0, 0, 0, 0, 0, Long.MAX_VALUE, Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false, false, true, true, true, true, false)) { + ColumnVector expectedInts = ColumnVector.fromBoxedInts(0, 0, 0, 0, 0, 0, Integer.MAX_VALUE, + Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0, 0, null, 1, 1); + ColumnVector expectedLongs = ColumnVector.fromBoxedLongs(0l, 0l, 0l, 0l, 0l, 0l, Long.MAX_VALUE, + Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0l, 0l, null, 1l, 1l); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false, + false, true, true, true, true, false, false, null, false, false)) { assertColumnsAreEqual(expected, isInt); assertColumnsAreEqual(expected, isLong); assertColumnsAreEqual(expectedInts, ints); @@ -2141,17 +2147,27 @@ void testIsInteger() { @Test void testIsFloat() { - String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL", "null", null}; - String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null}; + String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", + "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL", "null", + null}; + String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", + "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", + "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null}; try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isFloat = floatStringCV.isFloat(); ColumnVector isDouble = doubleStringCV.isFloat(); ColumnVector doubles = doubleStringCV.asDoubles(); ColumnVector floats = floatStringCV.asFloats(); - ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null); - ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, 0d, null); - ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, false, true, true, true, true, true, true, true, false, false, null)) { + ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, + Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, + -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null); + ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, + Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, + 0d, null); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, + false, true, true, true, true, true, true, true, false, false, null)) { assertColumnsAreEqual(expected, isFloat); assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedFloats, floats); From a450c2ef2531de19e62eb8d6d5e8eb5aaa81d337 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Sat, 16 May 2020 20:53:06 -0700 Subject: [PATCH 6/7] updated comments --- java/src/main/java/ai/rapids/cudf/ColumnVector.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 60f71462d42..134c1660a6f 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -389,7 +389,11 @@ public ColumnVector isNull() { * TRUE for any entry that is an integer, and FALSE if its not an integer * * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. - * i.e. If this method returns true for a value could still result in an overflow or underflow + * i.e. If this method returns true for a value could still result in an overflow or underflow. + * + * Also note that exponents are handled but might not be what the user expects. This method will + * convert the mantissa without regarding the exponent before ending the conversion when it hits + * the decimal. e.g. 1.2e-4 => 1 but 0.00012 => 0 * * @return - Boolean vector */ From 92866517c84cd0dfa521eb230906bc6c7ea98f07 Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 18 May 2020 17:12:58 -0700 Subject: [PATCH 7/7] review changes --- .../java/ai/rapids/cudf/ColumnVector.java | 19 +++++++++--------- java/src/main/native/src/ColumnVectorJni.cpp | 2 ++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 20 +++++++++---------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 134c1660a6f..33a20499994 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -386,14 +386,12 @@ public ColumnVector isNull() { /** * Returns a Boolean vector with the same number of rows as this instance, that has - * TRUE for any entry that is an integer, and FALSE if its not an integer + * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned + * for null entries * * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction. - * i.e. If this method returns true for a value could still result in an overflow or underflow. - * - * Also note that exponents are handled but might not be what the user expects. This method will - * convert the mantissa without regarding the exponent before ending the conversion when it hits - * the decimal. e.g. 1.2e-4 => 1 but 0.00012 => 0 + * i.e. If this method returns true for a value it could still result in an overflow or underflow + * if you convert it to a Java integral type * * @return - Boolean vector */ @@ -404,10 +402,13 @@ public ColumnVector isInteger() { /** * Returns a Boolean vector with the same number of rows as this instance, that has - * TRUE for any entry that is a float, and FALSE if its not a float + * TRUE for any entry that is a float, and FALSE if its not a float. A null will be returned + * for null entries * - * NOTE: Float doesn't mean a 32-bit float. It means a number that is a fraction. - * i.e. If this method returns true for a value could still result in an overflow or underflow + * NOTE: Float doesn't mean a 32-bit float. It means a number that is a fraction or can be written + * as a fraction. i.e. This method will return true for integers as well as floats. Also note if + * this method returns true for a value it could still result in an overflow or underflow if you + * convert it to a Java float or double * * @return - Boolean vector */ diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index a3a8b63898a..beb2b15242e 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -1341,6 +1341,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isFloat(JNIEnv *env, jo JNI_NULL_CHECK(env, handle, "native view handle is null", 0) try { + cudf::jni::auto_set_device(env); cudf::column_view * view = reinterpret_cast(handle); std::unique_ptr result = cudf::strings::is_float(*view); return reinterpret_cast(result.release()); @@ -1353,6 +1354,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isInteger(JNIEnv *env, JNI_NULL_CHECK(env, handle, "native view handle is null", 0) try { + cudf::jni::auto_set_device(env); cudf::column_view * view = reinterpret_cast(handle); std::unique_ptr result = cudf::strings::is_integer(*view); return reinterpret_cast(result.release()); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index bc343bc4041..85246330771 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -2122,10 +2122,10 @@ void testNansToNulls() { @Test void testIsInteger() { String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647", - "2147483648", "-2147483648", "-2147483649", "NULL", "null", null, "1.2", "1.2e-4"}; + "2147483648", "-2147483648", "-2147483649", "NULL", "null", null, "1.2", "1.2e-4", "0.00012"}; String[] longStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "9223372036854775807", "9223372036854775808", "-9223372036854775808", - "-9223372036854775809", "NULL", "null", null, "1.2", "1.2e-4"}; + "-9223372036854775809", "NULL", "null", null, "1.2", "1.2e-4", "0.00012"}; try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings); ColumnVector longStringCV = ColumnVector.fromStrings(longStrings); ColumnVector isInt = intStringCV.isInteger(); @@ -2133,11 +2133,11 @@ void testIsInteger() { ColumnVector ints = intStringCV.asInts(); ColumnVector longs = longStringCV.asLongs(); ColumnVector expectedInts = ColumnVector.fromBoxedInts(0, 0, 0, 0, 0, 0, Integer.MAX_VALUE, - Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0, 0, null, 1, 1); + Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0, 0, null, 1, 1, 0); ColumnVector expectedLongs = ColumnVector.fromBoxedLongs(0l, 0l, 0l, 0l, 0l, 0l, Long.MAX_VALUE, - Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0l, 0l, null, 1l, 1l); + Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0l, 0l, null, 1l, 1l, 0l); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false, - false, true, true, true, true, false, false, null, false, false)) { + false, true, true, true, true, false, false, null, false, false, false)) { assertColumnsAreEqual(expected, isInt); assertColumnsAreEqual(expected, isLong); assertColumnsAreEqual(expectedInts, ints); @@ -2149,10 +2149,10 @@ void testIsInteger() { void testIsFloat() { String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL", "null", - null}; + null, "423"}; String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0", "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308", - "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null}; + "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null, "423"}; try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings); ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings); ColumnVector isFloat = floatStringCV.isFloat(); @@ -2161,13 +2161,13 @@ void testIsFloat() { ColumnVector floats = floatStringCV.asFloats(); ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY, - -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null); + -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f); ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d, - 0d, null); + 0d, null, 423d); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false, - false, true, true, true, true, true, true, true, false, false, null)) { + false, true, true, true, true, true, true, true, false, false, null, true)) { assertColumnsAreEqual(expected, isFloat); assertColumnsAreEqual(expected, isDouble); assertColumnsAreEqual(expectedFloats, floats);