diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilter.java b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilter.java index e3edfe747397..2f41888e63ac 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilter.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilter.java @@ -1098,7 +1098,7 @@ private BytesValues(byte[][] values, boolean nullAllowed) super(true, nullAllowed); requireNonNull(values, "values is null"); - checkArgument(values.length > 1, "values must contain at least 2 entries"); + checkArgument(values.length > 0, "values must not be empty"); this.values = values; lengthExists = new boolean[Arrays.stream(values).mapToInt(value -> value.length).max().getAsInt() + 1]; @@ -1209,6 +1209,69 @@ public String toString() } } + class BytesValuesExclusive + extends AbstractTupleDomainFilter + { + private final BytesValues delegate; + + private BytesValuesExclusive(byte[][] values, boolean nullAllowed) + { + super(true, nullAllowed); + delegate = BytesValues.of(values, nullAllowed); + } + + public static BytesValuesExclusive of(byte[][] values, boolean nullAllowed) + { + return new BytesValuesExclusive(values, nullAllowed); + } + + @Override + public boolean testBytes(byte[] value, int offset, int length) + { + if (!delegate.testLength(length)) { + return true; + } + return !delegate.testBytes(value, offset, length); + } + + @Override + public boolean testLength(int length) + { + return true; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + + if (o == null || getClass() != o.getClass()) { + return false; + } + + BytesValuesExclusive that = (BytesValuesExclusive) o; + return nullAllowed == that.nullAllowed && + delegate.equals(that.delegate); + } + + @Override + public int hashCode() + { + return Objects.hash(delegate, nullAllowed); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("delegate", delegate) + .add("nullAllowed", nullAllowed) + .toString(); + } + } + class BigintMultiRange extends AbstractTupleDomainFilter { diff --git a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilterUtils.java b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilterUtils.java index 9a3be757e6d1..a8534b38856c 100644 --- a/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilterUtils.java +++ b/presto-orc/src/main/java/com/facebook/presto/orc/TupleDomainFilterUtils.java @@ -20,6 +20,7 @@ import com.facebook.presto.orc.TupleDomainFilter.BooleanValue; import com.facebook.presto.orc.TupleDomainFilter.BytesRange; import com.facebook.presto.orc.TupleDomainFilter.BytesValues; +import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive; import com.facebook.presto.orc.TupleDomainFilter.DoubleRange; import com.facebook.presto.orc.TupleDomainFilter.FloatRange; import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange; @@ -35,6 +36,7 @@ import io.airlift.slice.Slice; import java.util.List; +import java.util.Objects; import static com.facebook.presto.orc.TupleDomainFilter.ALWAYS_FALSE; import static com.facebook.presto.orc.TupleDomainFilter.IS_NOT_NULL; @@ -129,6 +131,15 @@ public static TupleDomainFilter toFilter(Domain domain) .toArray(byte[][]::new), nullAllowed); } + + if (isNotIn(ranges)) { + return BytesValuesExclusive.of( + bytesRanges.stream() + .map(BytesRange::getLower) + .filter(Objects::nonNull) + .toArray(byte[][]::new), + nullAllowed); + } } if (firstRangeFilter instanceof DoubleRange || firstRangeFilter instanceof FloatRange) { @@ -142,7 +153,7 @@ public static TupleDomainFilter toFilter(Domain domain) } /** - * Returns true is ranges represent != or NOT IN filter for double or float. + * Returns true is ranges represent != or NOT IN filter for double, float or string column. * * The logic is to return true if ranges are next to each other, but don't include the touch value. */ @@ -156,7 +167,7 @@ private static boolean isNotIn(List ranges) Marker previousHigh = firstRange.getHigh(); Type type = previousHigh.getType(); - if (type != DOUBLE && type != REAL) { + if (type != DOUBLE && type != REAL && !isVarcharType(type) && !(type instanceof CharType)) { return false; } diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilter.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilter.java index c6c76f3573e0..08f70d6fa530 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilter.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilter.java @@ -20,6 +20,7 @@ import com.facebook.presto.orc.TupleDomainFilter.BooleanValue; import com.facebook.presto.orc.TupleDomainFilter.BytesRange; import com.facebook.presto.orc.TupleDomainFilter.BytesValues; +import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive; import com.facebook.presto.orc.TupleDomainFilter.DoubleRange; import com.facebook.presto.orc.TupleDomainFilter.FloatRange; import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange; @@ -329,6 +330,21 @@ public void testBytesValues() } } + @Test + public void testBytesValuesExclusive() + { + // The filter has values of size on either side of 8 bytes. + TupleDomainFilter filter = BytesValuesExclusive.of(new byte[][] {toBytes("Igne"), toBytes("natura"), toBytes("renovitur"), toBytes("integra.")}, false); + assertFalse(filter.testBytes(toBytes("Igne"), 0, 4)); + assertFalse(filter.testBytes(toBytes("natura"), 0, 6)); + assertFalse(filter.testBytes(toBytes("renovitur"), 0, 9)); + assertFalse(filter.testBytes(toBytes("integra."), 0, 8)); + assertFalse(filter.testNull()); + + assertTrue(filter.testBytes(toBytes("natura"), 0, 5)); + assertTrue(filter.testBytes(toBytes("apple"), 0, 5)); + } + private static byte[] sequentialBytes(byte base, int length) { byte[] bytes = new byte[length]; diff --git a/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilterUtils.java b/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilterUtils.java index 9795d4eb52cd..6fb6a83e61a7 100644 --- a/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilterUtils.java +++ b/presto-orc/src/test/java/com/facebook/presto/orc/TestTupleDomainFilterUtils.java @@ -24,6 +24,7 @@ import com.facebook.presto.orc.TupleDomainFilter.BooleanValue; import com.facebook.presto.orc.TupleDomainFilter.BytesRange; import com.facebook.presto.orc.TupleDomainFilter.BytesValues; +import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive; import com.facebook.presto.orc.TupleDomainFilter.DoubleRange; import com.facebook.presto.orc.TupleDomainFilter.FloatRange; import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange; @@ -361,9 +362,7 @@ public void testDecimal() public void testVarchar() { assertEquals(toFilter(equal(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(toBytes("abc"), false, toBytes("abc"), false, false)); - assertEquals(toFilter(not(equal(C_VARCHAR, stringLiteral("abc", VARCHAR)))), MultiRange.of(ImmutableList.of( - BytesRange.of(null, true, toBytes("abc"), true, false), - BytesRange.of(toBytes("abc"), true, null, true, false)), false, false)); + assertEquals(toFilter(not(equal(C_VARCHAR, stringLiteral("abc", VARCHAR)))), TupleDomainFilter.BytesValuesExclusive.of(new byte[][]{toBytes("abc")}, false)); assertEquals(toFilter(lessThan(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(null, true, toBytes("abc"), true, false)); assertEquals(toFilter(lessThanOrEqual(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(null, true, toBytes("abc"), false, false)); @@ -373,6 +372,8 @@ public void testVarchar() assertEquals(toFilter(in(C_VARCHAR, ImmutableList.of(stringLiteral("Ex", createVarcharType(7)), stringLiteral("oriente")))), BytesValues.of(new byte[][] {toBytes("Ex"), toBytes("oriente")}, false)); + assertEquals(toFilter(not(in(C_VARCHAR, ImmutableList.of(stringLiteral("Ex", createVarcharType(7)), stringLiteral("oriente"))))), + BytesValuesExclusive.of(new byte[][]{toBytes("Ex"), toBytes("oriente")}, false)); } @Test