Skip to content

Commit

Permalink
Pushdown dereferences into table scan for parquet tables
Browse files Browse the repository at this point in the history
Co-authored-by: Zhenxiao Luo <[email protected]>
Co-authored-by: qqibrow <[email protected]>
  • Loading branch information
3 people committed Aug 15, 2020
1 parent 871dbf3 commit 009eb3f
Show file tree
Hide file tree
Showing 9 changed files with 686 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType;
import static com.facebook.presto.common.type.VarcharType.createVarcharType;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
Expand Down Expand Up @@ -303,4 +304,15 @@ public int getEstimatedRetainedSizeInBytes()
// Size of TypeInfo is not accounted as TypeInfo's are cached and retained by the TypeInfoFactory
return INSTANCE_SIZE + hiveTypeName.getEstimatedSizeInBytes();
}

public Optional<HiveType> findChildType(List<String> childPath)
{
TypeInfo typeInfo = getTypeInfo();
for (String part : childPath) {
checkArgument(typeInfo instanceof StructTypeInfo, "typeinfo is not struct type", typeInfo);
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
typeInfo = structTypeInfo.getStructFieldTypeInfo(part);
}
return Optional.of(toHiveType(typeInfo));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public enum ColumnType
private final ColumnType columnType;
private final Optional<String> comment;
private final List<Subfield> requiredSubfields;
private final Optional<Subfield> pushdownSubfield;

@JsonCreator
public HiveColumnHandle(
Expand All @@ -76,16 +77,18 @@ public HiveColumnHandle(
@JsonProperty("hiveColumnIndex") int hiveColumnIndex,
@JsonProperty("columnType") ColumnType columnType,
@JsonProperty("comment") Optional<String> comment,
@JsonProperty("requiredSubfields") List<Subfield> requiredSubfields)
@JsonProperty("requiredSubfields") List<Subfield> requiredSubfields,
@JsonProperty("pushdownSubfield") Optional<Subfield> pushdownSubfield)
{
this.name = requireNonNull(name, "name is null");
checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED, "hiveColumnIndex is negative");
checkArgument(hiveColumnIndex >= 0 || columnType == PARTITION_KEY || columnType == SYNTHESIZED || pushdownSubfield.isPresent(), "hiveColumnIndex is negative");
this.hiveColumnIndex = hiveColumnIndex;
this.hiveType = requireNonNull(hiveType, "hiveType is null");
this.typeName = requireNonNull(typeSignature, "type is null");
this.columnType = requireNonNull(columnType, "columnType is null");
this.comment = requireNonNull(comment, "comment is null");
this.requiredSubfields = requireNonNull(requiredSubfields, "requiredSubfields is null");
this.pushdownSubfield = requireNonNull(pushdownSubfield, "pushdownSubfield is null");
}

public HiveColumnHandle(
Expand All @@ -96,7 +99,7 @@ public HiveColumnHandle(
ColumnType columnType,
Optional<String> comment)
{
this(name, hiveType, typeSignature, hiveColumnIndex, columnType, comment, ImmutableList.of());
this(name, hiveType, typeSignature, hiveColumnIndex, columnType, comment, ImmutableList.of(), Optional.empty());
}

@JsonProperty
Expand Down Expand Up @@ -156,16 +159,22 @@ public List<Subfield> getRequiredSubfields()
return requiredSubfields;
}

@JsonProperty
public Optional<Subfield> getPushdownSubfield()
{
return pushdownSubfield;
}

@Override
public ColumnHandle withRequiredSubfields(List<Subfield> subfields)
{
return new HiveColumnHandle(name, hiveType, typeName, hiveColumnIndex, columnType, comment, subfields);
return new HiveColumnHandle(name, hiveType, typeName, hiveColumnIndex, columnType, comment, subfields, pushdownSubfield);
}

@Override
public int hashCode()
{
return Objects.hash(name, hiveColumnIndex, hiveType, columnType, comment);
return Objects.hash(name, hiveColumnIndex, hiveType, columnType, comment, requiredSubfields, pushdownSubfield);
}

@Override
Expand All @@ -183,7 +192,8 @@ public boolean equals(Object obj)
Objects.equals(this.hiveType, other.hiveType) &&
Objects.equals(this.columnType, other.columnType) &&
Objects.equals(this.comment, other.comment) &&
Objects.equals(this.requiredSubfields, other.requiredSubfields);
Objects.equals(this.requiredSubfields, other.requiredSubfields) &&
Objects.equals(this.pushdownSubfield, other.pushdownSubfield);
}

@Override
Expand All @@ -204,12 +214,12 @@ public static HiveColumnHandle updateRowIdHandle()
// plan-time support for row-by-row delete so that planning doesn't fail. This is why we need
// rowid handle. Note that in Hive connector, rowid handle is not implemented beyond plan-time.

return new HiveColumnHandle(UPDATE_ROW_ID_COLUMN_NAME, HIVE_LONG, BIGINT.getTypeSignature(), -1, SYNTHESIZED, Optional.empty(), ImmutableList.of());
return new HiveColumnHandle(UPDATE_ROW_ID_COLUMN_NAME, HIVE_LONG, BIGINT.getTypeSignature(), -1, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
}

public static HiveColumnHandle pathColumnHandle()
{
return new HiveColumnHandle(PATH_COLUMN_NAME, PATH_HIVE_TYPE, PATH_TYPE_SIGNATURE, PATH_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of());
return new HiveColumnHandle(PATH_COLUMN_NAME, PATH_HIVE_TYPE, PATH_TYPE_SIGNATURE, PATH_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
}

/**
Expand All @@ -219,7 +229,7 @@ public static HiveColumnHandle pathColumnHandle()
*/
public static HiveColumnHandle bucketColumnHandle()
{
return new HiveColumnHandle(BUCKET_COLUMN_NAME, BUCKET_HIVE_TYPE, BUCKET_TYPE_SIGNATURE, BUCKET_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of());
return new HiveColumnHandle(BUCKET_COLUMN_NAME, BUCKET_HIVE_TYPE, BUCKET_TYPE_SIGNATURE, BUCKET_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
}

public static boolean isPathColumnHandle(HiveColumnHandle column)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ public static List<HiveColumnHandle> toColumnHandles(List<ColumnMapping> regular
columnHandle.getHiveColumnIndex(),
columnHandle.getColumnType(),
Optional.empty(),
columnHandle.getRequiredSubfields());
columnHandle.getRequiredSubfields(),
columnHandle.getPushdownSubfield());
})
.collect(toList());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ public static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColum
nextMissingColumnIndex++;
}
}
physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment(), column.getRequiredSubfields()));
physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment(), column.getRequiredSubfields(), column.getPushdownSubfield()));
}
return physicalColumns.build();
}
Expand Down
Loading

0 comments on commit 009eb3f

Please sign in to comment.