diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 4ab22606ec..dcd19e4c2c 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -546,7 +546,8 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto, " include ORC-517. Writer version: {}", predicate.getColumnName(), writerVersion); return TruthValue.YES_NO_NULL; - } else if (category == TypeDescription.Category.DOUBLE) { + } else if (category == TypeDescription.Category.DOUBLE + || category == TypeDescription.Category.FLOAT) { DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs; if (!Double.isFinite(dstas.getSum())) { LOG.debug("Not using predication pushdown on {} because stats contain NaN values", diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index cd92264dcb..57584e171b 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -4070,7 +4070,8 @@ public void testPredicatePushdownForComplex() throws Exception { @Test public void testPredicatePushdownWithNan() throws Exception { TypeDescription schema = TypeDescription.createStruct() - .addField("double1", TypeDescription.createDouble()); + .addField("double1", TypeDescription.createDouble()) + .addField("float1", TypeDescription.createFloat()); Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf) @@ -4084,14 +4085,18 @@ public void testPredicatePushdownWithNan() throws Exception { batch.ensureSize(3500); batch.size = 3500; batch.cols[0].noNulls = true; + batch.cols[1].noNulls = true; DoubleColumnVector dbcol = ((DoubleColumnVector) batch.cols[0]); + DoubleColumnVector fcol = ((DoubleColumnVector) batch.cols[1]); // first row NaN (resulting to min/max and sum columnStats of stride to be NaN) // NaN in the middle of a stride causes Sum of last stride to be NaN dbcol.vector[0] = Double.NaN; + fcol.vector[0] = Double.NaN; for (int i=1; i < 3500; ++i) { dbcol.vector[i] = i == 3200 ? Double.NaN : i; + fcol.vector[i] = i == 3200 ? Double.NaN : i; } writer.addRowBatch(batch); writer.close(); @@ -4101,6 +4106,7 @@ public void testPredicatePushdownWithNan() throws Exception { assertEquals(3500, reader.getNumberOfRows()); // Only the first stride matches the predicate, just need to make sure NaN stats are ignored + // Test double category push down SearchArgument sarg = SearchArgumentFactory.newBuilder() .startAnd() .lessThan("double1", PredicateLeaf.Type.FLOAT, 100d) @@ -4122,6 +4128,29 @@ public void testPredicatePushdownWithNan() throws Exception { rows.nextBatch(batch); assertEquals(0, batch.size); + + // Test float category push down + sarg = SearchArgumentFactory.newBuilder() + .startAnd() + .lessThan("float1", PredicateLeaf.Type.FLOAT, 100d) + .end() + .build(); + + rows = reader.rows(reader.options() + .range(0L, Long.MAX_VALUE) + .searchArgument(sarg, new String[]{"float1"})); + batch = reader.getSchema().createRowBatch(3500); + + rows.nextBatch(batch); + // First stride should be read as NaN sum is ignored + assertEquals(1000, batch.size); + + rows.nextBatch(batch); + // Last stride should be read as NaN sum is ignored + assertEquals(500, batch.size); + + rows.nextBatch(batch); + assertEquals(0, batch.size); } /**