Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
allisonport-db committed Sep 28, 2023
1 parent 40db527 commit b615cfe
Show file tree
Hide file tree
Showing 20 changed files with 258 additions and 170 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import io.delta.kernel.internal.types.TableSchemaSerDe;

public class Metadata {

public static Metadata fromRow(Row row, TableClient tableClient) {
if (row == null) {
return null;
Expand Down Expand Up @@ -68,7 +67,7 @@ public static Metadata fromRow(Row row, TableClient tableClient) {
.add("createdTime", LongType.INSTANCE, true /* contains null */)
.add("configuration",
new MapType(StringType.INSTANCE, StringType.INSTANCE, false),
false /* contains null */);
false /* nullable */);

private final String id;
private final Optional<String> name;
Expand Down Expand Up @@ -97,9 +96,9 @@ public Metadata(
this.format = requireNonNull(format, "format is null");
this.schemaString = requireNonNull(schemaString, "schemaString is null");
this.schema = schema;
this.partitionColumns = partitionColumns;
this.partitionColumns = requireNonNull(partitionColumns, "partitionColumns is null");
this.createdTime = createdTime;
this.configuration = configuration;
this.configuration = requireNonNull(configuration, "configuration is null");
this.columnMappingMode = new Lazy<>(() ->
VectorUtils.<String, String>toJavaMap(configuration)
.getOrDefault("delta.columnMapping.mode", "none")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
import io.delta.kernel.types.StructType;
import io.delta.kernel.utils.VectorUtils;

// TODO: will update based on rebase

public class Protocol {
public static Protocol fromRow(Row row) {
if (row == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ public class ColumnarBatchRow
private final ColumnarBatch columnarBatch;
private final int rowId;

// TODO: document the accessor must return *ArrayValue* for ArrayType
public ColumnarBatchRow(ColumnarBatch columnarBatch, int rowId) {
this.columnarBatch = Objects.requireNonNull(columnarBatch, "columnarBatch is null");
this.rowId = rowId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,13 @@ public class GenericRow implements Row {
private final StructType schema;
private final Map<Integer, Object> ordinalToValue;

// TODO document accessors must return the right complex type??

/**
* @param schema the schema of the row
* @param ordinalToValue a mapping of column ordinal to objects; for each column the object
* must be of the return type corresponding to the data type's getter
* method in the Row interface
*/
public GenericRow(StructType schema, Map<Integer, Object> ordinalToValue) {
this.schema = requireNonNull(schema, "schema is null");
this.ordinalToValue = requireNonNull(ordinalToValue, "ordinalToValue is null");
Expand Down Expand Up @@ -116,13 +121,13 @@ public Row getStruct(int ordinal) {
}

@Override
// TODO document
public ArrayValue getArray(int ordinal) {
// TODO: not sufficient check, also need to check the element type
throwIfUnsafeAccess(ordinal, ArrayType.class, "array");
return (ArrayValue) getValue(ordinal);
}

@Override
// TODO document
public MapValue getMap(int ordinal) {
// TODO: not sufficient check, also need to check the element types
throwIfUnsafeAccess(ordinal, MapType.class, "map");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ public static StructType getPhysicalSchema(TableClient tableClient, Row scanStat
* @return List of partition column names according to the scan state.
*/
public static List<String> getPartitionColumns(Row scanState) {
// TODO can this be null?
return VectorUtils.toJavaList(
scanState.getArray(COL_NAME_TO_ORDINAL.get("partitionColumns")));
}
Expand All @@ -115,6 +116,7 @@ public static List<String> getPartitionColumns(Row scanState) {
* {@link Scan#getScanState(TableClient)}.
*/
public static String getColumnMappingMode(Row scanState) {
// TODO can this be null?
Map<String, String> configuration = VectorUtils.toJavaMap(
scanState.getMap(COL_NAME_TO_ORDINAL.get("configuration")));
return configuration.getOrDefault("delta.columnMapping.mode", "none");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package io.delta.kernel.internal.types;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -78,7 +79,6 @@ public static StructType fromJson(JsonHandler jsonHandler, String serializedStru
private static StructType parseStructType(JsonHandler jsonHandler,
String serializedStructType) {
Function<Row, StructType> evalMethod = (row) -> {
// todo how will this be done with the struct changes?
final List<Row> fields = VectorUtils.toJavaList(row.getArray(0));
return new StructType(
fields.stream()
Expand All @@ -97,8 +97,8 @@ private static StructField parseStructField(JsonHandler jsonHandler, Row row) {
String serializedDataType = row.getString(1);
DataType type = parseDataType(jsonHandler, serializedDataType);
boolean nullable = row.getBoolean(2);
Map<String, String> metadata = VectorUtils.toJavaMap(row.getMap(3));

Map<String, String> metadata = row.isNullAt(3) ? Collections.emptyMap() :
VectorUtils.toJavaMap(row.getMap(3));
return new StructField(name, type, nullable, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public final class VectorUtils {
private VectorUtils() {}

/**
* Which types are supported??
* Converts an {@link ArrayValue} to a Java list. Any nested complex types are also converted
* to their Java type.
*/
public static <T> List<T> toJavaList(ArrayValue arrayValue) {
ColumnVector elementVector = arrayValue.getElements();
Expand All @@ -43,6 +44,14 @@ public static <T> List<T> toJavaList(ArrayValue arrayValue) {
return elements;
}

/**
* Converts a {@link MapValue} to a Java map. Any nested complex types are also converted
* to their Java type.
*
* Please note not all key types override hashCode/equals. Be careful when using with keys of:
* - Struct type at any nesting level (i.e. ArrayType(StructType) does not)
* - Binary type
*/
public static <K, V> Map<K, V> toJavaMap(MapValue mapValue) {
ColumnVector keyVector = mapValue.getKeys();
DataType keyDataType = keyVector.getDataType();
Expand All @@ -59,16 +68,42 @@ public static <K, V> Map<K, V> toJavaMap(MapValue mapValue) {
return values;
}

/**
* Gets the value at {@code rowId} from the column vector. The type of the Object returned
* depends on the data type of the column vector. For complex types array and map, returns
* the value as Java list or Java map.
*/
private static Object getValueAsObject(
ColumnVector columnVector, DataType dataType, int rowId) {
// TODO support more types
// TODO combine with other utils?
if (columnVector.isNullAt(rowId)) {
return null;
}
if (dataType instanceof BooleanType) {
return columnVector.getBoolean(rowId);
} else if (dataType instanceof ByteType) {
return columnVector.getByte(rowId);
} else if (dataType instanceof ShortType) {
return columnVector.getShort(rowId);
} else if (dataType instanceof IntegerType || dataType instanceof DateType) {
return columnVector.getInt(rowId);
} else if (dataType instanceof LongType || dataType instanceof TimestampType) {
return columnVector.getLong(rowId);
} else if (dataType instanceof FloatType) {
return columnVector.getFloat(rowId);
} else if (dataType instanceof DoubleType) {
return columnVector.getDouble(rowId);
} else if (dataType instanceof StringType) {
return columnVector.getString(rowId);
} else if (dataType instanceof BinaryType) {
return columnVector.getBinary(rowId);
} else if (dataType instanceof StructType) {
return columnVector.getStruct(rowId);
} else if (dataType instanceof DecimalType) {
return columnVector.getDecimal(rowId);
} else if (dataType instanceof ArrayType) {
return toJavaList(columnVector.getArray(rowId));
} else if (dataType instanceof MapType) {
return toJavaMap(columnVector.getMap(rowId));
} else {
throw new UnsupportedOperationException("unsupported data type");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,6 @@ public ColumnVector getElements() {
}

if (dataType instanceof MapType) {
// TODO standardize the map type and array type here and in defaults package

throwIfTypeMismatch("map", jsonValue.isObject(), jsonValue);
final MapType mapType = (MapType) dataType;
final List<Object> keys = new ArrayList<>();
Expand All @@ -195,7 +193,6 @@ public int getSize() {

@Override
public ColumnVector getKeys() {
// todo check is string?
return new TestColumnVector(mapType.getKeyType(), keys);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ public Row getStruct(int ordinal) {
}

@Override
// TODO?
public ArrayValue getArray(int ordinal) {
return (ArrayValue) parsedValues[ordinal];
}
Expand Down Expand Up @@ -171,8 +170,6 @@ private static Object decodeElement(JsonNode jsonValue, DataType dataType) {
}

if (dataType instanceof ArrayType) {
// todo make sure this works for nested arrays

throwIfTypeMismatch("array", jsonValue.isArray(), jsonValue);
final ArrayType arrayType = ((ArrayType) dataType);
final ArrayNode jsonArray = (ArrayNode) jsonValue;
Expand Down Expand Up @@ -200,8 +197,6 @@ public ColumnVector getElements() {
}

if (dataType instanceof MapType) {
// todo make sure this works for nested maps

throwIfTypeMismatch("map", jsonValue.isObject(), jsonValue);
final MapType mapType = (MapType) dataType;
if (!(mapType.getKeyType() instanceof StringType)) {
Expand Down Expand Up @@ -261,7 +256,7 @@ private static Object decodeField(ObjectNode rootNode, StructField field) {
return decodeElement(rootNode.get(field.getName()), field.getDataType());
}

// TODO: throw on unsafe access
// TODO should this do type checks?
private static class DefaultJsonVector implements ColumnVector {

private final DataType dataType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,21 @@
import io.delta.kernel.types.DataType;
import static io.delta.kernel.defaults.internal.DefaultKernelUtils.checkArgument;

// TODO test!!
/**
* Provides a restricted view on an underlying column vector.
*/
public class DefaultViewVector implements ColumnVector {

private final ColumnVector underlyingVector;
private final int offset;
private final int size;

// TODO docs
/**
* @param underlyingVector the underlying column vector to read
* @param start the row index of the underlyingVector where we want this vector to start
* @param end the row index of the underlyingVector where we want this vector to end
* (non-inclusive)
*/
public DefaultViewVector(ColumnVector underlyingVector, int start, int end) {
this.underlyingVector = underlyingVector;
this.offset = start;
Expand Down

This file was deleted.

Loading

0 comments on commit b615cfe

Please sign in to comment.