Merge pull request #5203 from razajafri/numeric_tests

[REVIEW] Java bindings for numeric tests [skip ci]
rapidsai · May 19, 2020 · 855648c · 855648c
2 parents 725b71d + b3c029a
commit 855648c
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@
 - PR #5149 Add Java bindings for PMOD
 - PR #5153 Add Java bindings for extract
 - PR #5196 Add Java bindings for NULL_EQUALS, NULL_MAX and NULL_MIN
+- PR #5203 Add Java bindings for is_integer and is_float
 - PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
 
 ## Improvements

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
@@ -384,6 +384,39 @@ public ColumnVector isNull() {
     return new ColumnVector(isNullNative(getNativeView()));
   }
 
+  /**
+   * Returns a Boolean vector with the same number of rows as this instance, that has
+   * TRUE for any entry that is an integer, and FALSE if its not an integer. A null will be returned
+   * for null entries
+   *
+   * NOTE: Integer doesn't mean a 32-bit integer. It means a number that is not a fraction.
+   * i.e. If this method returns true for a value it could still result in an overflow or underflow
+   * if you convert it to a Java integral type
+   *
+   * @return - Boolean vector
+   */
+  public ColumnVector isInteger() {
+    assert type == DType.STRING;
+    return new ColumnVector(isInteger(getNativeView()));
+  }
+
+  /**
+   * Returns a Boolean vector with the same number of rows as this instance, that has
+   * TRUE for any entry that is a float, and FALSE if its not a float. A null will be returned
+   * for null entries
+   *
+   * NOTE: Float doesn't mean a 32-bit float. It means a number that is a fraction or can be written
+   * as a fraction. i.e. This method will return true for integers as well as floats. Also note if
+   * this method returns true for a value it could still result in an overflow or underflow if you
+   * convert it to a Java float or double
+   *
+   * @return - Boolean vector
+   */
+  public ColumnVector isFloat() {
+    assert type == DType.STRING;
+    return new ColumnVector(isFloat(getNativeView()));
+  }
+
   /**
    * Returns a Boolean vector with the same number of rows as this instance, that has
    * TRUE for any entry that is NaN, and FALSE if null or a valid floating point value
@@ -2359,6 +2392,10 @@ private static native long rollingWindow(long viewHandle, int min_periods, int a
 
   private static native long isNanNative(long viewHandle);
 
+  private static native long isFloat(long viewHandle);
+
+  private static native long isInteger(long viewHandle);
+
   private static native long isNotNanNative(long viewHandle);
 
   private static native long isNotNullNative(long viewHandle);
@@ -2912,4 +2949,5 @@ public static ColumnVector timestampMicroSecondsFromBoxedLongs(Long... values) {
   public static ColumnVector timestampNanoSecondsFromBoxedLongs(Long... values) {
     return build(DType.TIMESTAMP_NANOSECONDS, values.length, (b) -> b.appendBoxed(values));
   }
+
 }
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -44,6 +44,7 @@
 #include <cudf/strings/convert/convert_booleans.hpp>
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/strings/char_types/char_types.hpp>
 
 #include "jni_utils.hpp"
 
@@ -1335,4 +1336,30 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_nansToNulls(JNIEnv *env
   CATCH_STD(env, 0)
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isFloat(JNIEnv *env, jobject j_object, jlong handle) {
+
+  JNI_NULL_CHECK(env, handle, "native view handle is null", 0)
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::column_view * view = reinterpret_cast<cudf::column_view *>(handle);
+    std::unique_ptr<cudf::column> result = cudf::strings::is_float(*view);
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0)
+}
+
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_isInteger(JNIEnv *env, jobject j_object, jlong handle) {
+
+  JNI_NULL_CHECK(env, handle, "native view handle is null", 0)
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::column_view * view = reinterpret_cast<cudf::column_view *>(handle);
+    std::unique_ptr<cudf::column> result = cudf::strings::is_integer(*view);
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0)
+}
+
 } // extern "C"
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -2118,4 +2118,60 @@ void testNansToNulls() {
       assertColumnsAreEqual(expectedDouble, resultDouble);
     }
   }
+
+  @Test
+  void testIsInteger() {
+    String[] intStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "2147483647",
+        "2147483648", "-2147483648", "-2147483649", "NULL", "null", null, "1.2", "1.2e-4", "0.00012"};
+    String[] longStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity",
+        "9223372036854775807", "9223372036854775808", "-9223372036854775808",
+        "-9223372036854775809", "NULL", "null", null, "1.2", "1.2e-4", "0.00012"};
+    try (ColumnVector intStringCV = ColumnVector.fromStrings(intStrings);
+         ColumnVector longStringCV = ColumnVector.fromStrings(longStrings);
+         ColumnVector isInt = intStringCV.isInteger();
+         ColumnVector isLong = longStringCV.isInteger();
+         ColumnVector ints = intStringCV.asInts();
+         ColumnVector longs = longStringCV.asLongs();
+         ColumnVector expectedInts = ColumnVector.fromBoxedInts(0, 0, 0, 0, 0, 0, Integer.MAX_VALUE,
+             Integer.MIN_VALUE, Integer.MIN_VALUE, Integer.MAX_VALUE, 0, 0, null, 1, 1, 0);
+         ColumnVector expectedLongs = ColumnVector.fromBoxedLongs(0l, 0l, 0l, 0l, 0l, 0l, Long.MAX_VALUE,
+             Long.MIN_VALUE, Long.MIN_VALUE, Long.MAX_VALUE, 0l, 0l, null, 1l, 1l, 0l);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, false, false,
+             false, true, true, true, true, false, false, null, false, false, false)) {
+      assertColumnsAreEqual(expected, isInt);
+      assertColumnsAreEqual(expected, isLong);
+      assertColumnsAreEqual(expectedInts, ints);
+      assertColumnsAreEqual(expectedLongs, longs);
+    }
+  }
+
+  @Test
+  void testIsFloat() {
+    String[] floatStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0",
+        "3.4028235E38", "3.4028236E38", "-3.4028235E38", "-3.4028236E38", "1.2e-24", "NULL", "null",
+        null, "423"};
+    String[] doubleStrings = {"A", "nan", "Inf", "-Inf", "Infinity", "infinity", "-0.0", "0.0",
+        "1.7976931348623159E308", "1.7976931348623160E308", "-1.7976931348623159E308",
+        "-1.7976931348623160E308", "1.2e-234", "NULL", "null", null, "423"};
+    try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings);
+         ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings);
+         ColumnVector isFloat = floatStringCV.isFloat();
+         ColumnVector isDouble = doubleStringCV.isFloat();
+         ColumnVector doubles = doubleStringCV.asDoubles();
+         ColumnVector floats = floatStringCV.asFloats();
+         ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY,
+             Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY,
+             -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f);
+         ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d,
+             Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE,
+             Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d,
+             0d, null, 423d);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false,
+             false, true, true, true, true, true, true, true, false, false, null, true)) {
+      assertColumnsAreEqual(expected, isFloat);
+      assertColumnsAreEqual(expected, isDouble);
+      assertColumnsAreEqual(expectedFloats, floats);
+      assertColumnsAreEqual(expectedDoubles, doubles);
+    }
+  }
 }