Skip to content

Commit

Permalink
ORC-1146: Float category missing check if the statistic sum is a fini…
Browse files Browse the repository at this point in the history
…te value

### What changes were proposed in this pull request?

This pr is aimed at checking whether the float category statistic sum has a finite value.

### Why are the changes needed?

When the orc float category is written with NaN, pushing down is not supported.

### How was this patch tested?

Added unit test.

Closes #1077

Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 93a3505)
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 9a56fa5)
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
guiyanakuang authored and dongjoon-hyun committed Apr 5, 2022
1 parent 3afe72c commit 7d1a378
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
3 changes: 2 additions & 1 deletion java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,8 @@ static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
" include ORC-517. Writer version: {}",
predicate.getColumnName(), writerVersion);
return TruthValue.YES_NO_NULL;
} else if (category == TypeDescription.Category.DOUBLE) {
} else if (category == TypeDescription.Category.DOUBLE
|| category == TypeDescription.Category.FLOAT) {
DoubleColumnStatistics dstas = (DoubleColumnStatistics) cs;
if (!Double.isFinite(dstas.getSum())) {
LOG.debug("Not using predication pushdown on {} because stats contain NaN values",
Expand Down
31 changes: 30 additions & 1 deletion java/core/src/test/org/apache/orc/TestVectorOrcFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -4070,7 +4070,8 @@ public void testPredicatePushdownForComplex() throws Exception {
@Test
public void testPredicatePushdownWithNan() throws Exception {
TypeDescription schema = TypeDescription.createStruct()
.addField("double1", TypeDescription.createDouble());
.addField("double1", TypeDescription.createDouble())
.addField("float1", TypeDescription.createFloat());

Writer writer = OrcFile.createWriter(testFilePath,
OrcFile.writerOptions(conf)
Expand All @@ -4084,14 +4085,18 @@ public void testPredicatePushdownWithNan() throws Exception {
batch.ensureSize(3500);
batch.size = 3500;
batch.cols[0].noNulls = true;
batch.cols[1].noNulls = true;

DoubleColumnVector dbcol = ((DoubleColumnVector) batch.cols[0]);
DoubleColumnVector fcol = ((DoubleColumnVector) batch.cols[1]);

// first row NaN (resulting to min/max and sum columnStats of stride to be NaN)
// NaN in the middle of a stride causes Sum of last stride to be NaN
dbcol.vector[0] = Double.NaN;
fcol.vector[0] = Double.NaN;
for (int i=1; i < 3500; ++i) {
dbcol.vector[i] = i == 3200 ? Double.NaN : i;
fcol.vector[i] = i == 3200 ? Double.NaN : i;
}
writer.addRowBatch(batch);
writer.close();
Expand All @@ -4101,6 +4106,7 @@ public void testPredicatePushdownWithNan() throws Exception {
assertEquals(3500, reader.getNumberOfRows());

// Only the first stride matches the predicate, just need to make sure NaN stats are ignored
// Test double category push down
SearchArgument sarg = SearchArgumentFactory.newBuilder()
.startAnd()
.lessThan("double1", PredicateLeaf.Type.FLOAT, 100d)
Expand All @@ -4122,6 +4128,29 @@ public void testPredicatePushdownWithNan() throws Exception {

rows.nextBatch(batch);
assertEquals(0, batch.size);

// Test float category push down
sarg = SearchArgumentFactory.newBuilder()
.startAnd()
.lessThan("float1", PredicateLeaf.Type.FLOAT, 100d)
.end()
.build();

rows = reader.rows(reader.options()
.range(0L, Long.MAX_VALUE)
.searchArgument(sarg, new String[]{"float1"}));
batch = reader.getSchema().createRowBatch(3500);

rows.nextBatch(batch);
// First stride should be read as NaN sum is ignored
assertEquals(1000, batch.size);

rows.nextBatch(batch);
// Last stride should be read as NaN sum is ignored
assertEquals(500, batch.size);

rows.nextBatch(batch);
assertEquals(0, batch.size);
}

/**
Expand Down

0 comments on commit 7d1a378

Please sign in to comment.