Skip to content

Commit

Permalink
Optimize NOT IN filter for strings
Browse files Browse the repository at this point in the history
  • Loading branch information
kewang1024 authored and mbasmanova committed Jan 14, 2020
1 parent 422d7a9 commit fc695e0
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,7 @@ private BytesValues(byte[][] values, boolean nullAllowed)
super(true, nullAllowed);

requireNonNull(values, "values is null");
checkArgument(values.length > 1, "values must contain at least 2 entries");
checkArgument(values.length > 0, "values must not be empty");

this.values = values;
lengthExists = new boolean[Arrays.stream(values).mapToInt(value -> value.length).max().getAsInt() + 1];
Expand Down Expand Up @@ -1209,6 +1209,69 @@ public String toString()
}
}

class BytesValuesExclusive
extends AbstractTupleDomainFilter
{
private final BytesValues delegate;

private BytesValuesExclusive(byte[][] values, boolean nullAllowed)
{
super(true, nullAllowed);
delegate = BytesValues.of(values, nullAllowed);
}

public static BytesValuesExclusive of(byte[][] values, boolean nullAllowed)
{
return new BytesValuesExclusive(values, nullAllowed);
}

@Override
public boolean testBytes(byte[] value, int offset, int length)
{
if (!delegate.testLength(length)) {
return true;
}
return !delegate.testBytes(value, offset, length);
}

@Override
public boolean testLength(int length)
{
return true;
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}

if (o == null || getClass() != o.getClass()) {
return false;
}

BytesValuesExclusive that = (BytesValuesExclusive) o;
return nullAllowed == that.nullAllowed &&
delegate.equals(that.delegate);
}

@Override
public int hashCode()
{
return Objects.hash(delegate, nullAllowed);
}

@Override
public String toString()
{
return toStringHelper(this)
.add("delegate", delegate)
.add("nullAllowed", nullAllowed)
.toString();
}
}

class BigintMultiRange
extends AbstractTupleDomainFilter
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.facebook.presto.orc.TupleDomainFilter.BooleanValue;
import com.facebook.presto.orc.TupleDomainFilter.BytesRange;
import com.facebook.presto.orc.TupleDomainFilter.BytesValues;
import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive;
import com.facebook.presto.orc.TupleDomainFilter.DoubleRange;
import com.facebook.presto.orc.TupleDomainFilter.FloatRange;
import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange;
Expand All @@ -35,6 +36,7 @@
import io.airlift.slice.Slice;

import java.util.List;
import java.util.Objects;

import static com.facebook.presto.orc.TupleDomainFilter.ALWAYS_FALSE;
import static com.facebook.presto.orc.TupleDomainFilter.IS_NOT_NULL;
Expand Down Expand Up @@ -129,6 +131,15 @@ public static TupleDomainFilter toFilter(Domain domain)
.toArray(byte[][]::new),
nullAllowed);
}

if (isNotIn(ranges)) {
return BytesValuesExclusive.of(
bytesRanges.stream()
.map(BytesRange::getLower)
.filter(Objects::nonNull)
.toArray(byte[][]::new),
nullAllowed);
}
}

if (firstRangeFilter instanceof DoubleRange || firstRangeFilter instanceof FloatRange) {
Expand All @@ -142,7 +153,7 @@ public static TupleDomainFilter toFilter(Domain domain)
}

/**
* Returns true is ranges represent != or NOT IN filter for double or float.
* Returns true is ranges represent != or NOT IN filter for double, float or string column.
*
* The logic is to return true if ranges are next to each other, but don't include the touch value.
*/
Expand All @@ -156,7 +167,7 @@ private static boolean isNotIn(List<Range> ranges)
Marker previousHigh = firstRange.getHigh();

Type type = previousHigh.getType();
if (type != DOUBLE && type != REAL) {
if (type != DOUBLE && type != REAL && !isVarcharType(type) && !(type instanceof CharType)) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.facebook.presto.orc.TupleDomainFilter.BooleanValue;
import com.facebook.presto.orc.TupleDomainFilter.BytesRange;
import com.facebook.presto.orc.TupleDomainFilter.BytesValues;
import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive;
import com.facebook.presto.orc.TupleDomainFilter.DoubleRange;
import com.facebook.presto.orc.TupleDomainFilter.FloatRange;
import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange;
Expand Down Expand Up @@ -329,6 +330,21 @@ public void testBytesValues()
}
}

@Test
public void testBytesValuesExclusive()
{
// The filter has values of size on either side of 8 bytes.
TupleDomainFilter filter = BytesValuesExclusive.of(new byte[][] {toBytes("Igne"), toBytes("natura"), toBytes("renovitur"), toBytes("integra.")}, false);
assertFalse(filter.testBytes(toBytes("Igne"), 0, 4));
assertFalse(filter.testBytes(toBytes("natura"), 0, 6));
assertFalse(filter.testBytes(toBytes("renovitur"), 0, 9));
assertFalse(filter.testBytes(toBytes("integra."), 0, 8));
assertFalse(filter.testNull());

assertTrue(filter.testBytes(toBytes("natura"), 0, 5));
assertTrue(filter.testBytes(toBytes("apple"), 0, 5));
}

private static byte[] sequentialBytes(byte base, int length)
{
byte[] bytes = new byte[length];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.facebook.presto.orc.TupleDomainFilter.BooleanValue;
import com.facebook.presto.orc.TupleDomainFilter.BytesRange;
import com.facebook.presto.orc.TupleDomainFilter.BytesValues;
import com.facebook.presto.orc.TupleDomainFilter.BytesValuesExclusive;
import com.facebook.presto.orc.TupleDomainFilter.DoubleRange;
import com.facebook.presto.orc.TupleDomainFilter.FloatRange;
import com.facebook.presto.orc.TupleDomainFilter.LongDecimalRange;
Expand Down Expand Up @@ -361,9 +362,7 @@ public void testDecimal()
public void testVarchar()
{
assertEquals(toFilter(equal(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(toBytes("abc"), false, toBytes("abc"), false, false));
assertEquals(toFilter(not(equal(C_VARCHAR, stringLiteral("abc", VARCHAR)))), MultiRange.of(ImmutableList.of(
BytesRange.of(null, true, toBytes("abc"), true, false),
BytesRange.of(toBytes("abc"), true, null, true, false)), false, false));
assertEquals(toFilter(not(equal(C_VARCHAR, stringLiteral("abc", VARCHAR)))), TupleDomainFilter.BytesValuesExclusive.of(new byte[][]{toBytes("abc")}, false));

assertEquals(toFilter(lessThan(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(null, true, toBytes("abc"), true, false));
assertEquals(toFilter(lessThanOrEqual(C_VARCHAR, stringLiteral("abc", VARCHAR))), BytesRange.of(null, true, toBytes("abc"), false, false));
Expand All @@ -373,6 +372,8 @@ public void testVarchar()

assertEquals(toFilter(in(C_VARCHAR, ImmutableList.of(stringLiteral("Ex", createVarcharType(7)), stringLiteral("oriente")))),
BytesValues.of(new byte[][] {toBytes("Ex"), toBytes("oriente")}, false));
assertEquals(toFilter(not(in(C_VARCHAR, ImmutableList.of(stringLiteral("Ex", createVarcharType(7)), stringLiteral("oriente"))))),
BytesValuesExclusive.of(new byte[][]{toBytes("Ex"), toBytes("oriente")}, false));
}

@Test
Expand Down

0 comments on commit fc695e0

Please sign in to comment.