diff --git a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
index e452d27287a8..a6c3c65e9830 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java
@@ -33,6 +33,7 @@
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.util.BinaryUtil;
+import org.apache.iceberg.util.NaNUtil;
import static org.apache.iceberg.expressions.Expressions.rewriteNot;
@@ -44,6 +45,11 @@
* Files are passed to {@link #eval(ContentFile)}, which returns true if the file may contain matching
* rows and false if the file cannot contain matching rows. Files may be skipped if and only if the
* return value of {@code eval} is false.
+ *
+ * Due to the comparison implementation of ORC stats, for float/double columns in ORC files, if the first
+ * value in a file is NaN, metrics of this file will report NaN for both upper and lower bound despite
+ * that the column could contain non-NaN data. Thus in some scenarios explicitly checks for NaN is necessary
+ * in order to not skip files that may contain matching data.
*/
public class InclusiveMetricsEvaluator {
private static final int IN_PREDICATE_LIMIT = 200;
@@ -184,13 +190,18 @@ public Boolean notNaN(BoundReference ref) {
public Boolean lt(BoundReference ref, Literal lit) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
+ return ROWS_MIGHT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp >= 0) {
return ROWS_CANNOT_MATCH;
@@ -204,13 +215,18 @@ public Boolean lt(BoundReference ref, Literal lit) {
public Boolean ltEq(BoundReference ref, Literal lit) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
+ return ROWS_MIGHT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp > 0) {
return ROWS_CANNOT_MATCH;
@@ -224,7 +240,7 @@ public Boolean ltEq(BoundReference ref, Literal lit) {
public Boolean gt(BoundReference ref, Literal lit) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
@@ -244,7 +260,7 @@ public Boolean gt(BoundReference ref, Literal lit) {
public Boolean gtEq(BoundReference ref, Literal lit) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
@@ -264,13 +280,18 @@ public Boolean gtEq(BoundReference ref, Literal lit) {
public Boolean eq(BoundReference ref, Literal lit) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
+ return ROWS_MIGHT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp > 0) {
return ROWS_CANNOT_MATCH;
@@ -300,7 +321,7 @@ public Boolean notEq(BoundReference ref, Literal lit) {
public Boolean in(BoundReference ref, Set literalSet) {
Integer id = ref.fieldId();
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_CANNOT_MATCH;
}
@@ -313,6 +334,12 @@ public Boolean in(BoundReference ref, Set literalSet) {
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
+ return ROWS_MIGHT_MATCH;
+ }
+
literals = literals.stream().filter(v -> ref.comparator().compare(lower, v) <= 0).collect(Collectors.toList());
if (literals.isEmpty()) { // if all values are less than lower bound, rows cannot match.
return ROWS_CANNOT_MATCH;
diff --git a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
index d46a8216b1ec..6ea293a0b562 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
@@ -32,6 +32,7 @@
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.StructType;
+import org.apache.iceberg.util.NaNUtil;
import static org.apache.iceberg.expressions.Expressions.rewriteNot;
@@ -44,6 +45,11 @@
*
* Files are passed to {@link #eval(ContentFile)}, which returns true if all rows in the file must
* contain matching rows and false if the file may contain rows that do not match.
+ *
+ * Due to the comparison implementation of ORC stats, for float/double columns in ORC files, if the first
+ * value in a file is NaN, metrics of this file will report NaN for both upper and lower bound despite
+ * that the column could contain non-NaN data. Thus in some scenarios explicitly checks for NaN is necessary
+ * in order to not include files that may contain rows that don't match.
*/
public class StrictMetricsEvaluator {
private final Schema schema;
@@ -179,7 +185,7 @@ public Boolean lt(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
@@ -202,7 +208,7 @@ public Boolean ltEq(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
@@ -225,13 +231,18 @@ public Boolean gt(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the StrictMetricsEvaluator docs for more.
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp > 0) {
return ROWS_MUST_MATCH;
@@ -248,13 +259,18 @@ public Boolean gtEq(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(field.type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the StrictMetricsEvaluator docs for more.
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp >= 0) {
return ROWS_MUST_MATCH;
@@ -271,7 +287,7 @@ public Boolean eq(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
@@ -304,13 +320,18 @@ public Boolean notEq(BoundReference ref, Literal lit) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_MUST_MATCH;
}
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the StrictMetricsEvaluator docs for more.
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
int cmp = lit.comparator().compare(lower, lit.value());
if (cmp > 0) {
return ROWS_MUST_MATCH;
@@ -335,7 +356,7 @@ public Boolean in(BoundReference ref, Set literalSet) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (canContainNulls(id)) {
+ if (canContainNulls(id) || canContainNaNs(id)) {
return ROWS_MIGHT_NOT_MATCH;
}
@@ -371,7 +392,7 @@ public Boolean notIn(BoundReference ref, Set literalSet) {
Types.NestedField field = struct.field(id);
Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
- if (containsNullsOnly(id)) {
+ if (containsNullsOnly(id) || containsNaNsOnly(id)) {
return ROWS_MUST_MATCH;
}
@@ -380,6 +401,11 @@ public Boolean notIn(BoundReference ref, Set literalSet) {
if (lowerBounds != null && lowerBounds.containsKey(id)) {
T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
+ if (NaNUtil.isNaN(lower)) {
+ // NaN indicates unreliable bounds. See the StrictMetricsEvaluator docs for more.
+ return ROWS_MIGHT_NOT_MATCH;
+ }
+
literals = literals.stream().filter(v -> ref.comparator().compare(lower, v) <= 0).collect(Collectors.toList());
if (literals.isEmpty()) { // if all values are less than lower bound, rows must match (notIn).
return ROWS_MUST_MATCH;
@@ -406,6 +432,11 @@ private boolean canContainNulls(Integer id) {
return nullCounts == null || (nullCounts.containsKey(id) && nullCounts.get(id) > 0);
}
+ private boolean canContainNaNs(Integer id) {
+ // nan counts might be null for early version writers when nan counters are not populated.
+ return nanCounts != null && nanCounts.containsKey(id) && nanCounts.get(id) > 0;
+ }
+
private boolean containsNullsOnly(Integer id) {
return valueCounts != null && valueCounts.containsKey(id) &&
nullCounts != null && nullCounts.containsKey(id) &&
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMetricsEvaluatorsNaNHandling.java b/api/src/test/java/org/apache/iceberg/expressions/TestMetricsEvaluatorsNaNHandling.java
new file mode 100644
index 000000000000..415f36126a3b
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestMetricsEvaluatorsNaNHandling.java
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.expressions;
+
+import java.nio.ByteBuffer;
+import java.util.Set;
+import java.util.function.BiFunction;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.types.Conversions.toByteBuffer;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+
+/**
+ * This test class ensures that metrics evaluators could handle NaN as upper/lower bounds correctly.
+ */
+public class TestMetricsEvaluatorsNaNHandling {
+ private static final Schema SCHEMA = new Schema(
+ required(1, "all_nan", Types.DoubleType.get()),
+ required(2, "max_nan", Types.DoubleType.get()),
+ optional(3, "min_max_nan", Types.FloatType.get()),
+ required(4, "all_nan_null_bounds", Types.DoubleType.get()),
+ optional(5, "some_nan_correct_bounds", Types.FloatType.get())
+ );
+
+ private static final DataFile FILE = new TestHelpers.TestDataFile("file.avro", TestHelpers.Row.of(), 50,
+ // any value counts, including nulls
+ ImmutableMap.builder()
+ .put(1, 10L)
+ .put(2, 10L)
+ .put(3, 10L)
+ .put(4, 10L)
+ .put(5, 10L)
+ .build(),
+ // null value counts
+ ImmutableMap.builder()
+ .put(1, 0L)
+ .put(2, 0L)
+ .put(3, 0L)
+ .put(4, 0L)
+ .put(5, 0L)
+ .build(),
+ // nan value counts
+ ImmutableMap.builder()
+ .put(1, 10L)
+ .put(4, 10L)
+ .put(5, 5L)
+ .build(),
+ // lower bounds
+ ImmutableMap.builder()
+ .put(1, toByteBuffer(Types.DoubleType.get(), Double.NaN))
+ .put(2, toByteBuffer(Types.DoubleType.get(), 7D))
+ .put(3, toByteBuffer(Types.FloatType.get(), Float.NaN))
+ .put(5, toByteBuffer(Types.FloatType.get(), 7F))
+ .build(),
+ // upper bounds
+ ImmutableMap.builder()
+ .put(1, toByteBuffer(Types.DoubleType.get(), Double.NaN))
+ .put(2, toByteBuffer(Types.DoubleType.get(), Double.NaN))
+ .put(3, toByteBuffer(Types.FloatType.get(), Float.NaN))
+ .put(5, toByteBuffer(Types.FloatType.get(), 22F))
+ .build());
+
+ private static final Set> LESS_THAN_EXPRESSIONS =
+ ImmutableSet.of(Expressions::lessThan, Expressions::lessThanOrEqual);
+
+ private static final Set> GREATER_THAN_EXPRESSIONS =
+ ImmutableSet.of(Expressions::greaterThan, Expressions::greaterThanOrEqual);
+
+ @Test
+ public void testInclusiveMetricsEvaluatorLessThanAndLessThanOrEqual() {
+ for (BiFunction func : LESS_THAN_EXPRESSIONS) {
+ boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("all_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("max_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("max_nan", 10D)).eval(FILE);
+ Assert.assertTrue("Should match: 10 is larger than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("min_max_nan", 1F)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 1F)).eval(FILE);
+ Assert.assertFalse("Should not match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 10F)).eval(FILE);
+ Assert.assertTrue("Should match: 10 larger than lower bound", shouldRead);
+ }
+ }
+
+ @Test
+ public void testInclusiveMetricsEvaluatorGreaterThanAndGreaterThanOrEqual() {
+ for (BiFunction func : GREATER_THAN_EXPRESSIONS) {
+ boolean shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("all_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("max_nan", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: upper bound is larger than 1", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("max_nan", 10D)).eval(FILE);
+ Assert.assertTrue("Should match: upper bound is larger than 10", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("min_max_nan", 1F)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 1F)).eval(FILE);
+ Assert.assertTrue("Should match: 1 is smaller than upper bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 10F)).eval(FILE);
+ Assert.assertTrue("Should match: 10 is smaller than upper bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertFalse("Should not match: 30 is greater than upper bound", shouldRead);
+ }
+ }
+
+ @Test
+ public void testInclusiveMetricsEvaluatorEquals() {
+ boolean shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.equal("all_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, Expressions.equal("max_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, Expressions.equal("max_nan", 10D)).eval(FILE);
+ Assert.assertTrue("Should match: 10 is within bounds", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(SCHEMA, Expressions.equal("min_max_nan", 1F)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.equal("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 1F)).eval(FILE);
+ Assert.assertFalse("Should not match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 10F)).eval(FILE);
+ Assert.assertTrue("Should match: 10 is within bounds", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertFalse("Should not match: 30 is greater than upper bound", shouldRead);
+ }
+
+ @Test
+ public void testInclusiveMetricsEvaluatorNotEquals() {
+ boolean shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("all_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("max_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("max_nan", 10D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("min_max_nan", 1F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("all_nan_null_bounds", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 1F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 10F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+ }
+
+ @Test
+ public void testInclusiveMetricsEvaluatorIn() {
+ boolean shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("all_nan", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("max_nan", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertTrue("Should match: 10 and 30 are greater than lower bound", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("min_max_nan", 1F, 10F, 30F)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("all_nan_null_bounds", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("some_nan_correct_bounds", 1F, 10F, 30F)).eval(FILE);
+ Assert.assertTrue("Should match: 10 within bounds", shouldRead);
+
+ shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.in("some_nan_correct_bounds", 1F, 30F)).eval(FILE);
+ Assert.assertFalse("Should not match: 1 not within bounds", shouldRead);
+ }
+
+ @Test
+ public void testInclusiveMetricsEvaluatorNotIn() {
+ boolean shouldRead = new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("all_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("max_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("max_nan", 10D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("min_max_nan", 1F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("all_nan_null_bounds", 1D)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 1F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 10F)).eval(FILE);
+ shouldRead = shouldRead & new InclusiveMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertTrue("Should match: no visibility", shouldRead);
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorLessThanAndLessThanOrEqual() {
+ for (BiFunction func : LESS_THAN_EXPRESSIONS) {
+ boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("all_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("max_nan", 10D)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 is less than upper bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("min_max_nan", 1F)).eval(FILE);
+ Assert.assertFalse("Should not match: no visibility", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 30F)).eval(FILE);
+ Assert.assertFalse("Should not match: nan value exists", shouldRead);
+ }
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorGreaterThanAndGreaterThanOrEqual() {
+ for (BiFunction func : GREATER_THAN_EXPRESSIONS) {
+ boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("all_nan", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("max_nan", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("max_nan", 10D)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 is larger than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("min_max_nan", 1F)).eval(FILE);
+ Assert.assertFalse("Should not match: no visibility", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertFalse("Should not match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, func.apply("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertFalse("Should not match: nan value exists", shouldRead);
+ }
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorNotEquals() {
+ boolean shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("all_nan", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, Expressions.notEqual("max_nan", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, Expressions.notEqual("max_nan", 10D)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 is within bounds", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(SCHEMA, Expressions.notEqual("min_max_nan", 1F)).eval(FILE);
+ Assert.assertFalse("Should not match: no visibility", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("all_nan_null_bounds", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 1F)).eval(FILE);
+ Assert.assertTrue("Should match: 1 is smaller than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 10F)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 is within bounds", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notEqual("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertTrue("Should match: 30 is greater than upper bound", shouldRead);
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorEquals() {
+ boolean shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("all_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("max_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("max_nan", 10D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("min_max_nan", 1F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("all_nan_null_bounds", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 1F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 10F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertFalse("Should not match: bounds not equal to given value", shouldRead);
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorNotIn() {
+ boolean shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("all_nan", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertTrue("Should match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("max_nan", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 and 30 are greater than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("max_nan", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: 1 is less than lower bound", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("min_max_nan", 1F, 10F, 30F)).eval(FILE);
+ Assert.assertFalse("Should not match: no visibility", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("all_nan_null_bounds", 1D, 10D, 30D)).eval(FILE);
+ Assert.assertTrue("Should match: all nan column doesn't contain number", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 1F, 10F, 30F)).eval(FILE);
+ Assert.assertFalse("Should not match: 10 within bounds", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 1D)).eval(FILE);
+ Assert.assertTrue("Should match: 1 not within bounds", shouldRead);
+
+ shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.notIn("some_nan_correct_bounds", 30D)).eval(FILE);
+ Assert.assertTrue("Should match: 30 not within bounds", shouldRead);
+ }
+
+ @Test
+ public void testStrictMetricsEvaluatorIn() {
+ boolean shouldRead = new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("all_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("max_nan", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("max_nan", 10D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("min_max_nan", 1F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("all_nan_null_bounds", 1D)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("some_nan_correct_bounds", 1F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.in("some_nan_correct_bounds", 10F)).eval(FILE);
+ shouldRead = shouldRead | new StrictMetricsEvaluator(
+ SCHEMA, Expressions.equal("some_nan_correct_bounds", 30)).eval(FILE);
+ Assert.assertFalse("Should not match: bounds not equal to given value", shouldRead);
+ }
+}