From cb89436e5421e08c6194d837934aaf1f7620faad Mon Sep 17 00:00:00 2001 From: Venki Korukanti Date: Thu, 15 Jun 2023 23:02:31 -0700 Subject: [PATCH] [Kernel] Add missing data types and support for data type (de)serialization This PR is part of delta-io/delta#1783. It adds additional data types supported by Delta Lake protocol that were missing from the interfaces PR delta-io/delta#1808. It also adds serialization and deserailization of table schema represented as `StructType`. UTs Closes delta-io/delta#1842 --- kernel/build.sbt | 8 +- .../main/java/io/delta/kernel/data/Row.java | 4 +- .../java/io/delta/kernel/expressions/And.java | 4 +- .../kernel/expressions/BinaryComparison.java | 2 + .../kernel/expressions/BinaryOperator.java | 8 +- .../io/delta/kernel/expressions/Column.java | 3 +- .../io/delta/kernel/expressions/Literal.java | 1 - .../expressions/CastingComparator.java | 43 ++- .../internal/types/TableSchemaSerDe.java | 271 ++++++++++++++++++ .../java/io/delta/kernel/types/ArrayType.java | 50 +++- .../delta/kernel/types/BasePrimitiveType.java | 108 +++++++ .../io/delta/kernel/types/BinaryType.java | 29 ++ .../io/delta/kernel/types/BooleanType.java | 11 +- .../java/io/delta/kernel/types/ByteType.java | 29 ++ .../java/io/delta/kernel/types/DataType.java | 45 +-- .../java/io/delta/kernel/types/DateType.java | 30 ++ .../io/delta/kernel/types/DecimalType.java | 93 ++++++ .../io/delta/kernel/types/DoubleType.java | 29 ++ .../java/io/delta/kernel/types/FloatType.java | 29 ++ .../io/delta/kernel/types/IntegerType.java | 12 +- .../java/io/delta/kernel/types/LongType.java | 12 +- .../java/io/delta/kernel/types/MapType.java | 58 +++- .../io/delta/kernel/types/MixedDataType.java | 90 ++++++ .../java/io/delta/kernel/types/ShortType.java | 27 ++ .../io/delta/kernel/types/StringType.java | 10 +- .../io/delta/kernel/types/StructField.java | 95 +++--- .../io/delta/kernel/types/StructType.java | 125 ++++---- .../io/delta/kernel/types/TimestampType.java | 30 ++ .../kernel/types/UnresolvedDataType.java | 40 --- .../java/io/delta/kernel/utils/Utils.java | 52 +++- .../internal/types/JsonHandlerTestImpl.java | 269 +++++++++++++++++ .../internal/types/TestTableSchemaSerDe.java | 129 +++++++++ 32 files changed, 1525 insertions(+), 221 deletions(-) rename kernel/kernel-api/src/main/java/io/delta/kernel/{ => internal}/expressions/CastingComparator.java (53%) create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java delete mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java create mode 100644 kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java create mode 100644 kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java diff --git a/kernel/build.sbt b/kernel/build.sbt index c58f1423699..257e4ea4f69 100644 --- a/kernel/build.sbt +++ b/kernel/build.sbt @@ -46,7 +46,13 @@ lazy val kernelApi = (project in file("kernel-api")) commonSettings, scalaStyleSettings, releaseSettings, - libraryDependencies ++= Seq(), + libraryDependencies ++= Seq( + + "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.5" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "junit" % "junit" % "4.11" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test" + ), Compile / doc / javacOptions := Seq( "-public", "-windowtitle", "Delta Kernel API " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java index 28854a18c3b..06c16a0b312 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java @@ -65,13 +65,13 @@ public interface Row { * Return struct value of the column located at the given ordinal. * Throws error if the column at given ordinal is not of struct type, */ - Row getRecord(int ordinal); + Row getStruct(int ordinal); /** * Return array value of the column located at the given ordinal. * Throws error if the column at given ordinal is not of array type, */ - List getList(int ordinal); + List getArray(int ordinal); /** * Return map value of the column located at the given ordinal. diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java index 34bda9e26a7..4ad3328c933 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java @@ -48,8 +48,8 @@ public And(Expression left, Expression right) { throw new IllegalArgumentException( String.format( "'And' requires expressions of type boolean. Got %s and %s.", - left.dataType().typeName(), - right.dataType().typeName() + left.dataType(), + right.dataType() ) ); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java index 415f4cc64bd..54e9648d85b 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java @@ -18,6 +18,8 @@ import java.util.Comparator; +import io.delta.kernel.internal.expressions.CastingComparator; + /** * A {@link BinaryOperator} that compares the left and right {@link Expression}s and evaluates to a * boolean value. diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java index ccec858e612..6d1b9222b65 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java @@ -29,13 +29,13 @@ protected BinaryOperator(Expression left, Expression right, String symbol) { super(left, right); this.symbol = symbol; - if (!left.dataType().equivalent(right.dataType())) { + if (!left.dataType().equals(right.dataType())) { throw new IllegalArgumentException( String.format( "BinaryOperator left and right DataTypes must be the same. Found %s and %s.", - left.dataType().typeName(), - right.dataType().typeName()) - ); + left.dataType(), + right.dataType() + )); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java index 0af879bd573..5d7fc02188e 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java @@ -21,7 +21,6 @@ import java.util.Set; import io.delta.kernel.data.Row; -import io.delta.kernel.types.*; import io.delta.kernel.types.BooleanType; import io.delta.kernel.types.DataType; import io.delta.kernel.types.IntegerType; @@ -61,7 +60,7 @@ public Column(int ordinal, String name, DataType dataType) { throw new UnsupportedOperationException( String.format( "The data type %s of column %s at ordinal %s is not supported", - dataType.typeName(), + dataType, name, ordinal) ); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java index 7710752e4f6..4c678847b6a 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java @@ -19,7 +19,6 @@ import java.util.Objects; import io.delta.kernel.data.Row; -import io.delta.kernel.types.*; import io.delta.kernel.types.BooleanType; import io.delta.kernel.types.DataType; import io.delta.kernel.types.IntegerType; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java similarity index 53% rename from kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java rename to kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java index 8d13647f696..a3f979dbf54 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java @@ -14,40 +14,53 @@ * limitations under the License. */ -package io.delta.kernel.expressions; +package io.delta.kernel.internal.expressions; import java.util.Comparator; -import io.delta.kernel.types.*; +import io.delta.kernel.types.BinaryType; import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.ByteType; import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.FloatType; import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; import io.delta.kernel.types.StringType; +import io.delta.kernel.types.TimestampType; -// TODO: exclude from public interfaces (move to "internal" somewhere?) public class CastingComparator> implements Comparator { public static Comparator forDataType(DataType dataType) { - if (dataType instanceof IntegerType) { - return new CastingComparator(); - } - if (dataType instanceof BooleanType) { return new CastingComparator(); - } - - if (dataType instanceof LongType) { + } else if (dataType instanceof ByteType) { + return new CastingComparator(); + } else if (dataType instanceof ShortType) { + return new CastingComparator(); + } else if (dataType instanceof IntegerType) { + return new CastingComparator(); + } else if (dataType instanceof LongType) { return new CastingComparator(); - } - - if (dataType instanceof StringType) { + } else if (dataType instanceof FloatType) { + return new CastingComparator(); + } else if (dataType instanceof DoubleType) { + return new CastingComparator(); + } else if (dataType instanceof StringType) { return new CastingComparator(); + } else if (dataType instanceof DateType) { + // Date value is accessed as integer (number of days since epoch). + // This may change in the future. + return new CastingComparator(); + } else if (dataType instanceof TimestampType) { + // Timestamp value is accessed as long (epoch seconds). This may change in the future. + return new CastingComparator(); } throw new IllegalArgumentException( - String.format("Unsupported DataType: %s", dataType.typeName()) - ); + String.format("Unsupported DataType: %s", dataType)); } private final Comparator comparator; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java new file mode 100644 index 00000000000..7dc5c382426 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java @@ -0,0 +1,271 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.internal.types; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import io.delta.kernel.client.JsonHandler; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.Row; +import io.delta.kernel.types.ArrayType; +import io.delta.kernel.types.BasePrimitiveType; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DecimalType; +import io.delta.kernel.types.MapType; +import io.delta.kernel.types.MixedDataType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; +import io.delta.kernel.utils.Utils; + +/** + * Utility class to serialize and deserialize the table schema which is of type {@link StructType}. + */ +public class TableSchemaSerDe +{ + private TableSchemaSerDe() + { + } + + /** + * Serialize the given table schema {@code structType} as JSON. This should produce Delta + * Protocol complaint format. + * + * @param structType Data type to serialize + * @return Table schema serialized as JSON string. + */ + public static String toJson(StructType structType) + { + return structType.toJson(); + } + + /** + * Deserialize the table schema from {@code serializedStructType} using the given + * {@code tableClient} + * + * @param jsonHandler An instance of {@link JsonHandler} to use for parsing + * JSON operations. + * @param serializedStructType Table schema in JSON format compliant with the Delta Protocol. + * @return Table schema + */ + public static StructType fromJson(JsonHandler jsonHandler, String serializedStructType) + { + return parseStructType(jsonHandler, serializedStructType); + } + + /** + * Utility method to parse a given struct as struct type. + */ + private static StructType parseStructType(JsonHandler jsonHandler, String serializedStructType) + { + Function evalMethod = (row) -> { + final List fields = row.getArray(0); + return new StructType( + fields.stream() + .map(field -> parseStructField(jsonHandler, field)) + .collect(Collectors.toList())); + }; + return parseAndEvalSingleRow( + jsonHandler, serializedStructType, STRUCT_TYPE_SCHEMA, evalMethod); + } + + /** + * Utility method to parse a {@link StructField} from the {@link Row} + */ + private static StructField parseStructField(JsonHandler jsonHandler, Row row) + { + String name = row.getString(0); + String serializedDataType = row.getString(1); + DataType type = parseDataType(jsonHandler, serializedDataType); + boolean nullable = row.getBoolean(2); + Map metadata = row.getMap(3); + + return new StructField(name, type, nullable, metadata); + } + + /** + * Utility method to parse the data type from the {@link Row}. + */ + private static DataType parseDataType(JsonHandler jsonHandler, String serializedDataType) + { + if (BasePrimitiveType.isPrimitiveType(serializedDataType)) { + return BasePrimitiveType.createPrimitive(serializedDataType); + } + + // Check if it is decimal type + if (serializedDataType.startsWith("decimal")) { + if (serializedDataType.equalsIgnoreCase("decimal")) { + return DecimalType.USER_DEFAULT; + } + + // parse the precision and scale + Matcher matcher = DECIMAL_TYPE_PATTERN.matcher(serializedDataType); + if (!matcher.matches()) { + throw new IllegalArgumentException( + "Invalid decimal type format: " + serializedDataType); + } + return new DecimalType( + Integer.valueOf(matcher.group("precision")), + Integer.valueOf(matcher.group("scale"))); + } + // This must be a complex type which is described as an JSON object. + + Optional arrayType = parseAsArrayType(jsonHandler, serializedDataType); + if (arrayType.isPresent()) { + return arrayType.get(); + } + + Optional mapType = parseAsMapType(jsonHandler, serializedDataType); + if (mapType.isPresent()) { + return mapType.get(); + } + + return parseStructType(jsonHandler, serializedDataType); + } + + private static Optional parseAsArrayType(JsonHandler jsonHandler, String json) + { + Function> evalMethod = (row) -> { + if (!"array".equalsIgnoreCase(row.getString(0))) { + return Optional.empty(); + } + + if (row.isNullAt(1) || row.isNullAt(2)) { + throw new IllegalArgumentException("invalid array serialized format: " + json); + } + + // Now parse the element type and create an array data type object + DataType elementType = parseDataType(jsonHandler, row.getString(1)); + boolean containsNull = row.getBoolean(2); + + return Optional.of(new ArrayType(elementType, containsNull)); + }; + + return parseAndEvalSingleRow(jsonHandler, json, ARRAY_TYPE_SCHEMA, evalMethod); + } + + private static Optional parseAsMapType(JsonHandler jsonHandler, String json) + { + Function> evalMethod = (row -> { + if (!"map".equalsIgnoreCase(row.getString(0))) { + return Optional.empty(); + } + + if (row.isNullAt(1) || row.isNullAt(2) || row.isNullAt(3)) { + throw new IllegalArgumentException("invalid map serialized format: " + json); + } + + // Now parse the key and value types and create a map data type object + DataType keyType = parseDataType(jsonHandler, row.getString(1)); + DataType valueType = parseDataType(jsonHandler, row.getString(2)); + boolean valueContainsNull = row.getBoolean(3); + + return Optional.of(new MapType(keyType, valueType, valueContainsNull)); + }); + + return parseAndEvalSingleRow(jsonHandler, json, MAP_TYPE_SCHEMA, evalMethod); + } + + /** + * Helper method to parse a single json string + */ + private static R parseAndEvalSingleRow( + JsonHandler jsonHandler, + String jsonString, + StructType outputSchema, + Function evalFunction) + { + ColumnVector columnVector = Utils.singletonColumnVector(jsonString); + ColumnarBatch result = jsonHandler.parseJson(columnVector, outputSchema); + + assert result.getSize() == 1; + + CloseableIterator rows = result.getRows(); + try { + return evalFunction.apply(rows.next()); + } + finally { + Utils.safeClose(rows); + } + } + + /** + * Schema of the one member ({@link StructField}) in {@link StructType}. + */ + private static final StructType STRUCT_FIELD_SCHEMA = new StructType() + .add("name", StringType.INSTANCE) + .add("type", MixedDataType.INSTANCE) // Data type can be a string or a object. + .add("nullable", BooleanType.INSTANCE) + .add("metadata", + new MapType(StringType.INSTANCE, StringType.INSTANCE, false /* valueContainsNull */)); + + /** + * Schema of the serialized {@link StructType}. + */ + private static StructType STRUCT_TYPE_SCHEMA = + new StructType() + .add("fields", new ArrayType(STRUCT_FIELD_SCHEMA, false /* containsNull */)); + + /** + * Example Array Type in serialized format + * { + * "type" : "array", + * "elementType" : { + * "type" : "struct", + * "fields" : [ { + * "name" : "d", + * "type" : "integer", + * "nullable" : false, + * "metadata" : { } + * } ] + * }, + * "containsNull" : true + * } + */ + private static StructType ARRAY_TYPE_SCHEMA = + new StructType() + .add("type", StringType.INSTANCE) + .add("elementType", MixedDataType.INSTANCE) + .add("containsNull", BooleanType.INSTANCE); + + /** + * Example Map Type in serialized format + * { + * "type" : "map", + * "keyType" : "string", + * "valueType" : "string", + * "valueContainsNull" : true + * } + */ + private static StructType MAP_TYPE_SCHEMA = + new StructType() + .add("type", StringType.INSTANCE) + .add("keyType", MixedDataType.INSTANCE) + .add("valueType", MixedDataType.INSTANCE) + .add("valueContainsNull", BooleanType.INSTANCE); + + private static Pattern DECIMAL_TYPE_PATTERN = + Pattern.compile("decimal\\(\\s*(?[0-9]+),\\s*(?[0-9]+)\\s*\\)"); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java index a4ee9d28196..cbc158057a5 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java @@ -16,22 +16,62 @@ package io.delta.kernel.types; -public class ArrayType extends DataType { - public static ArrayType EMPTY_INSTANCE = new ArrayType(null, false); +import java.util.Objects; +public class ArrayType extends DataType +{ private final DataType elementType; private final boolean containsNull; - public ArrayType(DataType elementType, boolean containsNull) { + public ArrayType(DataType elementType, boolean containsNull) + { this.elementType = elementType; this.containsNull = containsNull; } - public DataType getElementType() { + public DataType getElementType() + { return elementType; } - public boolean containsNull() { + public boolean containsNull() + { return containsNull; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ArrayType arrayType = (ArrayType) o; + return containsNull == arrayType.containsNull + && Objects.equals(elementType, arrayType.elementType); + } + + @Override + public int hashCode() + { + return Objects.hash(elementType, containsNull); + } + + @Override + public String toJson() + { + return String.format("{" + + "\"type\": \"array\"," + + "\"elementType\": %s," + + "\"containsNull\": %s" + + "}", elementType.toJson(), containsNull); + } + + @Override + public String toString() + { + return "array[" + elementType + "]"; + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java new file mode 100644 index 00000000000..ddc3dbfe38a --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java @@ -0,0 +1,108 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +import java.util.HashMap; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Base class for all primitive types {@link DataType}. + */ +public abstract class BasePrimitiveType extends DataType +{ + /** + * Create a primitive type {@link DataType} + * + * @param primitiveTypeName Primitive type name. + * @return + */ + public static DataType createPrimitive(String primitiveTypeName) + { + return Optional.ofNullable(nameToPrimitiveTypeMap.get(primitiveTypeName)) + .orElseThrow( + () -> new IllegalArgumentException("Unknown primitive type " + primitiveTypeName)); + } + + /** + * Is the given type name a primitive type? + */ + public static boolean isPrimitiveType(String typeName) + { + return nameToPrimitiveTypeMap.containsKey(typeName); + } + + /** For testing only */ + public static List getAllPrimitiveTypes() { + return nameToPrimitiveTypeMap.values().stream().collect(Collectors.toList()); + } + + private static HashMap nameToPrimitiveTypeMap = new HashMap<>(); + + static { + nameToPrimitiveTypeMap.put("boolean", BooleanType.INSTANCE); + nameToPrimitiveTypeMap.put("byte", ByteType.INSTANCE); + nameToPrimitiveTypeMap.put("short", ShortType.INSTANCE); + nameToPrimitiveTypeMap.put("integer", IntegerType.INSTANCE); + nameToPrimitiveTypeMap.put("long", LongType.INSTANCE); + nameToPrimitiveTypeMap.put("float", FloatType.INSTANCE); + nameToPrimitiveTypeMap.put("double", DoubleType.INSTANCE); + nameToPrimitiveTypeMap.put("date", DateType.INSTANCE); + nameToPrimitiveTypeMap.put("timestamp", TimestampType.INSTANCE); + nameToPrimitiveTypeMap.put("binary", BinaryType.INSTANCE); + nameToPrimitiveTypeMap.put("string", StringType.INSTANCE); + } + + private final String primitiveTypeName; + + protected BasePrimitiveType(String primitiveTypeName) + { + this.primitiveTypeName = primitiveTypeName; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + BasePrimitiveType that = (BasePrimitiveType) o; + return primitiveTypeName.equals(that.primitiveTypeName); + } + + @Override + public int hashCode() + { + return Objects.hash(primitiveTypeName); + } + + @Override + public String toString() + { + return primitiveTypeName; + } + + @Override + public String toJson() + { + return String.format("\"%s\"", primitiveTypeName); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java new file mode 100644 index 00000000000..69f09cdbbf6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java @@ -0,0 +1,29 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * The data type representing {@code byte[]} values. + */ +public class BinaryType extends BasePrimitiveType +{ + public static final BinaryType INSTANCE = new BinaryType(); + + private BinaryType() + { + super("binary"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java index 745ad0e97a9..f64570c1899 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java @@ -13,11 +13,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -public class BooleanType extends DataType { +/** + * Data type representing {@code boolean} type values. + */ +public class BooleanType extends BasePrimitiveType +{ public static final BooleanType INSTANCE = new BooleanType(); - private BooleanType() { } + private BooleanType() { + super("boolean"); + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java new file mode 100644 index 00000000000..d8af6568c3e --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java @@ -0,0 +1,29 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * The data type representing {@code byte} type values. + */ +public class ByteType extends BasePrimitiveType +{ + public static final ByteType INSTANCE = new ByteType(); + + private ByteType() + { + super("byte"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java index 7f1223e00bf..eb19f173651 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java @@ -16,43 +16,22 @@ package io.delta.kernel.types; -import java.util.Locale; +public abstract class DataType +{ + /** + * Convert the data type to Delta protocol specified serialization format. + * + * @return Data type serialized in JSON format. + */ + public abstract String toJson(); -public abstract class DataType { - - public static DataType createPrimitive(String typeName) { - if (typeName.equals(IntegerType.INSTANCE.typeName())) return IntegerType.INSTANCE; - if (typeName.equals(LongType.INSTANCE.typeName())) return LongType.INSTANCE; - if (typeName.equals(StringType.INSTANCE.typeName())) return StringType.INSTANCE; - if (typeName.equals(BooleanType.INSTANCE.typeName())) return BooleanType.INSTANCE; - - throw new IllegalArgumentException( - String.format("Can't create primitive for type type %s", typeName) - ); - } - - public String typeName() { - String name = this.getClass().getSimpleName(); - if (name.endsWith("Type")) { - name = name.substring(0, name.length() - 4); - } - return name.toLowerCase(Locale.ROOT); - } - public boolean equivalent(DataType dt) { - return this.equals(dt); - } + @Override + public abstract int hashCode(); @Override - public String toString() { - return typeName(); - } + public abstract boolean equals(Object obj); @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DataType that = (DataType) o; - return typeName().equals(that.typeName()); - } + public abstract String toString(); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java new file mode 100644 index 00000000000..89ebdee24e0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java @@ -0,0 +1,30 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * A date type, supporting "0001-01-01" through "9999-12-31". + * Internally, this is represented as the number of days from 1970-01-01. + */ +public class DateType extends BasePrimitiveType +{ + public static final DateType INSTANCE = new DateType(); + + private DateType() + { + super("date"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java new file mode 100644 index 00000000000..a01cb803548 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java @@ -0,0 +1,93 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +import java.util.Objects; + +/** + * The data type representing {@code java.math.BigDecimal} values. + * A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + * of digits on right side of dot). + *

+ * The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + *

+ * The default precision and scale is (10, 0). + */ +public final class DecimalType extends DataType +{ + public static final DecimalType USER_DEFAULT = new DecimalType(10, 0); + + private final int precision; + private final int scale; + + public DecimalType(int precision, int scale) + { + if (precision < 0 || precision > 38 || scale < 0 || scale > 38 || scale > precision) { + throw new IllegalArgumentException(String.format( + "Invalid precision and scale combo (%d, %d). They should be in the range [0, 38] " + + "and scale can not be more than the precision.", precision, scale)); + } + this.precision = precision; + this.scale = scale; + } + + /** + * @return the maximum number of digits of the decimal + */ + public int getPrecision() + { + return precision; + } + + /** + * @return the number of digits on the right side of the decimal point (dot) + */ + public int getScale() + { + return scale; + } + + @Override + public String toJson() + { + return String.format("\"decimal(%d, %d)\"", precision, scale); + } + + @Override + public String toString() + { + return String.format("Decimal(%d, %d)", precision, scale); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DecimalType that = (DecimalType) o; + return precision == that.precision && scale == that.scale; + } + + @Override + public int hashCode() + { + return Objects.hash(precision, scale); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java new file mode 100644 index 00000000000..2c153175109 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java @@ -0,0 +1,29 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * The data type representing {@code double} type values. + */ +public class DoubleType extends BasePrimitiveType +{ + public static final DoubleType INSTANCE = new DoubleType(); + + private DoubleType() + { + super("double"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java new file mode 100644 index 00000000000..7d7a179aaf0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java @@ -0,0 +1,29 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * The data type representing {@code float} type values. + */ +public class FloatType extends BasePrimitiveType +{ + public static final FloatType INSTANCE = new FloatType(); + + private FloatType() + { + super("float"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java index f1802fd71b8..b681d4206ca 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java @@ -13,11 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -public class IntegerType extends DataType { +/** + * The data type representing {@code integer} type values. + */ +public class IntegerType extends BasePrimitiveType +{ public static final IntegerType INSTANCE = new IntegerType(); - private IntegerType() { } + private IntegerType() + { + super("integer"); + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java index 3bb545a313e..0d9387043dc 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java @@ -13,11 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -public class LongType extends DataType { +/** + * The data type representing {@code long} type values. + */ +public class LongType extends BasePrimitiveType +{ public static final LongType INSTANCE = new LongType(); - private LongType() { } + private LongType() + { + super("long"); + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java index c6c9bd0bf8c..7bbfe7025db 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java @@ -13,32 +13,76 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -public class MapType extends DataType { +import java.util.Objects; - public static final MapType EMPTY_INSTANCE = new MapType(null, null, false); +/** + * Data type representing a map type. + */ +public class MapType extends DataType +{ private final DataType keyType; private final DataType valueType; private final boolean valueContainsNull; - public MapType(DataType keyType, DataType valueType, boolean valueContainsNull) { + public MapType(DataType keyType, DataType valueType, boolean valueContainsNull) + { this.keyType = keyType; this.valueType = valueType; this.valueContainsNull = valueContainsNull; } - public DataType getKeyType() { + public DataType getKeyType() + { return keyType; } - public DataType getValueType() { + public DataType getValueType() + { return valueType; } - public boolean isValueContainsNull() { + public boolean isValueContainsNull() + { return valueContainsNull; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + MapType mapType = (MapType) o; + return valueContainsNull == mapType.valueContainsNull && keyType.equals(mapType.keyType) && + valueType.equals(mapType.valueType); + } + + @Override + public int hashCode() + { + return Objects.hash(keyType, valueType, valueContainsNull); + } + + @Override + public String toJson() + { + return String.format("{" + + "\"type\": \"map\"," + + "\"keyType\": %s," + + "\"valueType\": %s," + + "\"valueContainsNull\": %s" + + "}", keyType.toJson(), valueType.toJson(), valueContainsNull); + } + + @Override + public String toString() + { + return String.format("Map[%s, %s]", keyType, valueType); + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java new file mode 100644 index 00000000000..e96698b2bb5 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java @@ -0,0 +1,90 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +import io.delta.kernel.data.ColumnVector; + +/** + * TODO: + * This is a controversial data type to have, but we have no way to specify the schema + * of JSON serialized table schema. In order to use the + * {@link io.delta.kernel.client.JsonHandler#parseJson(ColumnVector, StructType)}, the Kernel + * needs to specify the exact schema of the data, but depending upon the type serialized (`int` vs + * `map`), the serialized JSON could have different schema. + *

+ * `int` type column schema is serialized as: + * + *

+ *   {
+ *     "name" : "a",
+ *     "type" : "integer",
+ *     "nullable" : false,
+ *     "metadata" : { }
+ *   }
+ *  
+ *

+ * `struct` type column schema is serialized as: + *

+ *   {
+ *     "name" : "b",
+ *     "type" : {
+ *       "type" : "struct",
+ *       "fields" : [ {
+ *         "name" : "d",
+ *         "type" : "integer",
+ *         "nullable" : false,
+ *         "metadata" : { }
+ *       } ]
+ *     },
+ *     "nullable" : true,
+ *     "metadata" : { }
+ *   }
+ * 
+ * Whenever this type is specified, reader should expect either a `string` value or `struct` value. + * The implementation of reader should convert the `string` or `struct` value to `string` type. + * Reader implementations can expect this type only for JSON format data reading cases only. + */ +public class MixedDataType extends DataType +{ + public static final MixedDataType INSTANCE = new MixedDataType(); + + private MixedDataType() {} + + @Override + public String toJson() + { + throw new UnsupportedOperationException( + "this should never called as this type is not persisted to storage"); + } + + @Override + public int hashCode() + { + return 0; + } + + @Override + public boolean equals(Object obj) + { + return obj == this; + } + + @Override + public String toString() + { + return "mixed"; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java new file mode 100644 index 00000000000..0f8f408b3ee --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java @@ -0,0 +1,27 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * The data type representing {@code short} type values. + */ +public class ShortType extends BasePrimitiveType { + public static final ShortType INSTANCE = new ShortType(); + + private ShortType() { + super("short"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java index 9f8f6895745..1cb2e0fbbf6 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java @@ -13,11 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -public class StringType extends DataType { +/** + * The data type representing {@code string} type values. + */ +public class StringType extends BasePrimitiveType { public static final StringType INSTANCE = new StringType(); - private StringType() { } + private StringType() { + super("string"); + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java index d9d43f6c1a6..04e18fd7c40 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java @@ -17,46 +17,22 @@ package io.delta.kernel.types; import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; -import io.delta.kernel.data.Row; - -public class StructField { - - //////////////////////////////////////////////////////////////////////////////// - // Static Fields / Methods - //////////////////////////////////////////////////////////////////////////////// - - // TODO: docs - public static StructField fromRow(Row row) { - final String name = row.getString(0); - final DataType type = UnresolvedDataType.fromRow(row, 1); - final boolean nullable = row.getBoolean(2); - final Map metadata = row.getMap(3); - return new StructField(name, type, nullable, metadata); - } - - // TODO: docs - public static final StructType READ_SCHEMA = new StructType() - .add("name", StringType.INSTANCE) - .add("type", UnresolvedDataType.INSTANCE) - .add("nullable", BooleanType.INSTANCE) - .add("metadata", new MapType(StringType.INSTANCE, StringType.INSTANCE, false)); - - //////////////////////////////////////////////////////////////////////////////// - // Instance Fields / Methods - //////////////////////////////////////////////////////////////////////////////// - +public class StructField +{ private final String name; private final DataType dataType; private final boolean nullable; private final Map metadata; - // private final FieldMetadata metadata; public StructField( - String name, - DataType dataType, - boolean nullable, - Map metadata) { + String name, + DataType dataType, + boolean nullable, + Map metadata) + { this.name = name; this.dataType = dataType; this.nullable = nullable; @@ -66,34 +42,75 @@ public StructField( /** * @return the name of this field */ - public String getName() { + public String getName() + { return name; } /** * @return the data type of this field */ - public DataType getDataType() { + public DataType getDataType() + { return dataType; } /** * @return the metadata for this field */ - public Map getMetadata() { + public Map getMetadata() + { return metadata; } /** * @return whether this field allows to have a {@code null} value. */ - public boolean isNullable() { + public boolean isNullable() + { return nullable; } @Override - public String toString() { + public String toString() + { return String.format("StructField(name=%s,type=%s,nullable=%s,metadata=%s)", - name, dataType, nullable, "empty(fix - this)"); + name, dataType, nullable, "empty(fix - this)"); + } + + public String toJson() + { + String metadataAsJson = metadata.entrySet().stream() + .map(e -> String.format("\"%s\" : \"%s\"", e.getKey(), e.getValue())) + .collect(Collectors.joining(",\n")); + + return String.format( + "{\n" + + " \"name\" : \"%s\",\n" + + " \"type\" : %s,\n" + + " \"nullable\" : %s, \n" + + " \"metadata\" : { %s }\n" + + "}", name, dataType.toJson(), nullable, metadataAsJson); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + StructField that = (StructField) o; + return nullable == that.nullable && name.equals(that.name) && + dataType.equals(that.dataType) && + metadata.equals(that.metadata); + } + + @Override + public int hashCode() + { + return Objects.hash(name, dataType, nullable, metadata); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java index aca1f7b5dc0..c3af8146323 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java @@ -13,52 +13,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package io.delta.kernel.types; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.stream.Collectors; -import io.delta.kernel.data.Row; import io.delta.kernel.expressions.Column; import io.delta.kernel.utils.Tuple2; -public final class StructType extends DataType { - - //////////////////////////////////////////////////////////////////////////////// - // Static Fields / Methods - //////////////////////////////////////////////////////////////////////////////// - - public static StructType EMPTY_INSTANCE = new StructType(); - - // TODO: docs - public static StructType fromRow(Row row) { - final List fields = row.getList(0); - return new StructType( - fields - .stream() - .map(StructField::fromRow) - .collect(Collectors.toList()) - ); - } - - // TODO: docs - public static StructType READ_SCHEMA = new StructType() - .add("fields", new ArrayType(StructField.READ_SCHEMA, false /* contains null */ )); - - //////////////////////////////////////////////////////////////////////////////// - // Instance Fields / Methods - //////////////////////////////////////////////////////////////////////////////// +/** + * Struct type which contains one or more columns. + */ +public final class StructType extends DataType +{ private final Map> nameToFieldAndOrdinal; private final List fields; private final List fieldNames; - public StructType() { + public StructType() + { this(new ArrayList<>()); } - public StructType(List fields) { + public StructType(List fields) + { this.fields = fields; this.fieldNames = fields.stream().map(f -> f.getName()).collect(Collectors.toList()); @@ -68,52 +52,60 @@ public StructType(List fields) { } } - public StructType add(StructField field) { + public StructType add(StructField field) + { final List fieldsCopy = new ArrayList<>(fields); fieldsCopy.add(field); return new StructType(fieldsCopy); } - public StructType add(String name, DataType dataType) { - return add(new StructField(name, dataType, true /* nullable */, - new HashMap())); + public StructType add(String name, DataType dataType) + { + return add(new StructField(name, dataType, true /* nullable */, new HashMap<>())); } - public StructType add(String name, DataType dataType, Map metadata) { + public StructType add(String name, DataType dataType, Map metadata) + { return add(new StructField(name, dataType, true /* nullable */, metadata)); } /** * @return array of fields */ - public List fields() { + public List fields() + { return Collections.unmodifiableList(fields); } /** * @return array of field names */ - public List fieldNames() { + public List fieldNames() + { return fieldNames; } /** * @return the number of fields */ - public int length() { + public int length() + { return fields.size(); } - public int indexOf(String fieldName) { + public int indexOf(String fieldName) + { return fieldNames.indexOf(fieldName); } - public StructField get(String fieldName) { + public StructField get(String fieldName) + { return nameToFieldAndOrdinal.get(fieldName)._1; } - public StructField at(int index) { + public StructField at(int index) + { return fields.get(index); } @@ -123,7 +115,8 @@ public StructField at(int index) { * @param ordinal the ordinal of the {@link StructField} to create a column for * @return a {@link Column} expression for the {@link StructField} with ordinal {@code ordinal} */ - public Column column(int ordinal) { + public Column column(int ordinal) + { final StructField field = at(ordinal); return new Column(ordinal, field.getName(), field.getDataType()); } @@ -134,14 +127,16 @@ public Column column(int ordinal) { * @param fieldName the name of the {@link StructField} to create a column for * @return a {@link Column} expression for the {@link StructField} with name {@code fieldName} */ - public Column column(String fieldName) { + public Column column(String fieldName) + { Tuple2 fieldAndOrdinal = nameToFieldAndOrdinal.get(fieldName); System.out.println("Created column " + fieldName + " with ordinal " + fieldAndOrdinal._2); return new Column(fieldAndOrdinal._2, fieldName, fieldAndOrdinal._1.getDataType()); } @Override - public String toString() { + public String toString() + { return String.format( "%s(%s)", getClass().getSimpleName(), @@ -149,11 +144,39 @@ public String toString() { ); } - /** - * @return a readable indented tree representation of this {@code StructType} - * and all of its nested elements - */ - public String treeString() { - return "TODO"; + @Override + public String toJson() + { + String fieldsAsJson = fields.stream() + .map(e -> e.toJson()) + .collect(Collectors.joining(",\n")); + + return String.format( + "{\n" + + " \"type\" : \"struct\",\n" + + " \"fields\" : [ %s ]\n" + + "}", + fieldsAsJson); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + StructType that = (StructType) o; + return nameToFieldAndOrdinal.equals(that.nameToFieldAndOrdinal) && + fields.equals(that.fields) && + fieldNames.equals(that.fieldNames); + } + + @Override + public int hashCode() + { + return Objects.hash(nameToFieldAndOrdinal, fields, fieldNames); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java new file mode 100644 index 00000000000..972bbd0be5c --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java @@ -0,0 +1,30 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.types; + +/** + * A date type, supporting "0001-01-01" through "9999-12-31". + * Internally, this is represented as the number of days from 1970-01-01. + */ +public class TimestampType extends BasePrimitiveType +{ + public static final TimestampType INSTANCE = new TimestampType(); + + private TimestampType() + { + super("timestamp"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java deleted file mode 100644 index 10ad336c2ca..00000000000 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (2023) The Delta Lake Project Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.delta.kernel.types; - -import io.delta.kernel.data.Row; - -/** - * TODO: this needs to be removed, for now ignore this. - */ -public class UnresolvedDataType extends DataType { - - public static final UnresolvedDataType INSTANCE = new UnresolvedDataType(); - - public static DataType fromRow(Row row, int ordinal) { - try { - // e.g. IntegerType -> {"name":"as_int","type":"integer","nullable":true,"metadata":{} - // e.g. LongType -> {"name":"as_long","type":"long","nullable":true,"metadata":{}} - final String typeName = row.getString(ordinal); - return DataType.createPrimitive(typeName); - } catch (RuntimeException ex) { - throw new RuntimeException("Failed to parse UnresolvedDataType"); - } - } - - private UnresolvedDataType() { } -} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java index a7162528078..16b57a3becc 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java @@ -17,8 +17,10 @@ package io.delta.kernel.utils; import java.io.IOException; +import java.util.Iterator; import io.delta.kernel.Scan; +import io.delta.kernel.client.TableClient; import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.Row; import io.delta.kernel.fs.FileStatus; @@ -56,6 +58,32 @@ public T next() { }; } + /** + * Convert a {@link Iterator} to {@link CloseableIterator}. Useful when passing normal iterators + * for arguments that require {@link CloseableIterator} type. + * @param iter {@link Iterator} instance + * @param Element type + * @return A {@link CloseableIterator} wrapping the given {@link Iterator} + */ + public static CloseableIterator toCloseableIterator(Iterator iter) { + return new CloseableIterator() { + @Override + public void close() { } + + @Override + public boolean hasNext() + { + return iter.hasNext(); + } + + @Override + public T next() + { + return iter.next(); + } + }; + } + /** * Utility method to create a singleton string {@link ColumnVector} * @@ -72,23 +100,21 @@ public DataType getDataType() } @Override - public int getSize() - { + public int getSize() { return 1; } @Override - public void close() {} + public void close() { + } @Override - public boolean isNullAt(int rowId) - { + public boolean isNullAt(int rowId) { return value == null; } @Override - public String getString(int rowId) - { + public String getString(int rowId) { if (rowId != 0) { throw new IllegalArgumentException("Invalid row id: " + rowId); } @@ -122,4 +148,16 @@ public static FileStatus getFileStatus(Row scanFileInfo) { return FileStatus.of(path, size, 0); } + + /** + * Close the iterator. + * @param i1 + */ + public static void safeClose(CloseableIterator i1) { + try { + i1.close(); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } } diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java new file mode 100644 index 00000000000..25e43141080 --- /dev/null +++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java @@ -0,0 +1,269 @@ +package io.delta.kernel.internal.types; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import io.delta.kernel.client.FileReadContext; +import io.delta.kernel.client.JsonHandler; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.FileDataReadResult; +import io.delta.kernel.data.Row; +import io.delta.kernel.expressions.Expression; +import io.delta.kernel.types.ArrayType; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.MapType; +import io.delta.kernel.types.MixedDataType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; +import io.delta.kernel.utils.Utils; + +/** + * Implementation of {@link JsonHandler} for testing Delta Kernel APIs + */ +public class JsonHandlerTestImpl + implements JsonHandler +{ + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Override + public CloseableIterator contextualizeFileReads( + CloseableIterator fileIter, Expression predicate) + { + throw new UnsupportedOperationException("not yet implemented"); + } + + @Override + public ColumnarBatch parseJson(ColumnVector jsonStringVector, StructType outputSchema) + { + List rows = new ArrayList<>(); + for (int i = 0; i < jsonStringVector.getSize(); i++) { + final String json = jsonStringVector.getString(i); + try { + final JsonNode jsonNode = objectMapper.readTree(json); + rows.add(new TestJsonRow((ObjectNode) jsonNode, outputSchema)); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + return new ColumnarBatch() + { + @Override + public StructType getSchema() + { + return outputSchema; + } + + @Override + public ColumnVector getColumnVector(int ordinal) + { + throw new UnsupportedOperationException("not yet implemented"); + } + + @Override + public int getSize() + { + return rows.size(); + } + + @Override + public CloseableIterator getRows() + { + return Utils.toCloseableIterator(rows.iterator()); + } + }; + } + + @Override + public CloseableIterator readJsonFiles( + CloseableIterator fileIter, StructType physicalSchema) + { + throw new UnsupportedOperationException("not yet implemented"); + } + + private static class TestJsonRow implements Row + { + static void throwIfTypeMismatch(String expType, boolean hasExpType, JsonNode jsonNode) + { + if (!hasExpType) { + throw new RuntimeException( + String.format("Couldn't decode %s, expected a %s", jsonNode, expType)); + } + } + + private static Object decodeElement(JsonNode jsonValue, DataType dataType) + { + if (jsonValue.isNull()) { + return null; + } + + if (dataType.equals(MixedDataType.INSTANCE)) { + if (jsonValue.isTextual()) { + return jsonValue.textValue(); + } + else if (jsonValue instanceof ObjectNode) { + return jsonValue.toString(); + } + throwIfTypeMismatch("object or string", false, jsonValue); + } + + if (dataType instanceof BooleanType) { + throwIfTypeMismatch("boolean", jsonValue.isBoolean(), jsonValue); + return jsonValue.booleanValue(); + } + + if (dataType instanceof IntegerType) { + throwIfTypeMismatch("integer", jsonValue.isInt(), jsonValue); + return jsonValue.intValue(); + } + + if (dataType instanceof LongType) { + throwIfTypeMismatch("long", jsonValue.isLong(), jsonValue); + return jsonValue.numberValue().longValue(); + } + + if (dataType instanceof StringType) { + throwIfTypeMismatch("string", jsonValue.isTextual(), jsonValue); + return jsonValue.asText(); + } + + if (dataType instanceof StructType) { + throwIfTypeMismatch("object", jsonValue.isObject(), jsonValue); + return new TestJsonRow((ObjectNode) jsonValue, (StructType) dataType); + } + + if (dataType instanceof ArrayType) { + throwIfTypeMismatch("array", jsonValue.isArray(), jsonValue); + final ArrayType arrayType = ((ArrayType) dataType); + final ArrayNode jsonArray = (ArrayNode) jsonValue; + final List output = new ArrayList<>(); + + for (Iterator it = jsonArray.elements(); it.hasNext(); ) { + final JsonNode element = it.next(); + final Object parsedElement = decodeElement(element, arrayType.getElementType()); + output.add(parsedElement); + } + return output; + } + + if (dataType instanceof MapType) { + throwIfTypeMismatch("map", jsonValue.isObject(), jsonValue); + final MapType mapType = (MapType) dataType; + final Iterator> iter = jsonValue.fields(); + final Map output = new HashMap<>(); + + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + String keyParsed = entry.getKey(); + Object valueParsed = decodeElement(entry.getValue(), mapType.getValueType()); + output.put(keyParsed, valueParsed); + } + + return output; + } + + throw new UnsupportedOperationException( + String.format("Unsupported DataType %s for RootNode %s", dataType, jsonValue) + ); + } + + private static Object decodeField(ObjectNode rootNode, StructField field) + { + if (rootNode.get(field.getName()) == null) { + if (field.isNullable()) { + return null; + } + + throw new RuntimeException(String.format( + "Root node at key %s is null but field isn't nullable. Root node: %s", + field.getName(), + rootNode)); + } + + return decodeElement(rootNode.get(field.getName()), field.getDataType()); + } + + private final Object[] parsedValues; + private final StructType readSchema; + + public TestJsonRow(ObjectNode rootNode, StructType readSchema) + { + this.readSchema = readSchema; + this.parsedValues = new Object[readSchema.length()]; + + for (int i = 0; i < readSchema.length(); i++) { + final StructField field = readSchema.at(i); + final Object parsedValue = decodeField(rootNode, field); + parsedValues[i] = parsedValue; + } + } + + @Override + public StructType getSchema() + { + return readSchema; + } + + @Override + public boolean isNullAt(int ordinal) + { + return parsedValues[ordinal] == null; + } + + @Override + public boolean getBoolean(int ordinal) + { + return (boolean) parsedValues[ordinal]; + } + + @Override + public int getInt(int ordinal) + { + return (int) parsedValues[ordinal]; + } + + @Override + public long getLong(int ordinal) + { + return (long) parsedValues[ordinal]; + } + + @Override + public String getString(int ordinal) + { + return (String) parsedValues[ordinal]; + } + + @Override + public Row getStruct(int ordinal) + { + return (TestJsonRow) parsedValues[ordinal]; + } + + @Override + public List getArray(int ordinal) + { + return (List) parsedValues[ordinal]; + } + + @Override + public Map getMap(int ordinal) + { + return (Map) parsedValues[ordinal]; + } + } +} diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java new file mode 100644 index 00000000000..ffe82a30dae --- /dev/null +++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java @@ -0,0 +1,129 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.internal.types; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import static org.junit.Assert.assertEquals; +import org.junit.Test; + +import io.delta.kernel.types.ArrayType; +import io.delta.kernel.types.BasePrimitiveType; +import io.delta.kernel.types.BinaryType; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DecimalType; +import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.MapType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; + +import io.delta.kernel.internal.types.JsonHandlerTestImpl; +import io.delta.kernel.internal.types.TableSchemaSerDe; + +public class TestTableSchemaSerDe +{ + @Test + public void primitiveTypeRoundTrip() + { + List fieldList = new ArrayList<>(); + for (DataType dataType : BasePrimitiveType.getAllPrimitiveTypes()) { + fieldList.add(structField("col1" + dataType, dataType, true)); + fieldList.add(structField("col2" + dataType, dataType, false)); + fieldList.add(structField("col3" + dataType, dataType, false, sampleMetadata())); + } + + fieldList.add(structField("col1decimal", new DecimalType(30, 10), true)); + fieldList.add(structField("col2decimal", new DecimalType(38, 22), false)); + fieldList.add(structField("col3decimal", new DecimalType(5, 2), false, sampleMetadata())); + + StructType expSchem = new StructType(fieldList); + String serializedFormat = TableSchemaSerDe.toJson(expSchem); + StructType actSchema = + TableSchemaSerDe.fromJson(new JsonHandlerTestImpl(), serializedFormat); + + assertEquals(expSchem, actSchema); + } + + @Test + public void complexTypesRoundTrip() + { + List fieldList = new ArrayList<>(); + + ArrayType arrayType = array(IntegerType.INSTANCE, true); + ArrayType arrayArrayType = array(arrayType, false); + MapType mapType = map(FloatType.INSTANCE, BinaryType.INSTANCE, false); + MapType mapMapType = map(mapType, BinaryType.INSTANCE, true); + StructType structType = new StructType() + .add("simple", DateType.INSTANCE); + StructType structAllType = new StructType() + .add("prim", BooleanType.INSTANCE) + .add("arr", arrayType) + .add("map", mapType) + .add("struct", structType); + + fieldList.add(structField("col1", arrayType, true)); + fieldList.add(structField("col2", arrayArrayType, false)); + fieldList.add(structField("col3", mapType, false)); + fieldList.add(structField("col4", mapMapType, false)); + fieldList.add(structField("col5", structType, false)); + fieldList.add(structField("col6", structAllType, false)); + + StructType expSchem = new StructType(fieldList); + String serializedFormat = TableSchemaSerDe.toJson(expSchem); + StructType actSchema = + TableSchemaSerDe.fromJson(new JsonHandlerTestImpl(), serializedFormat); + + assertEquals(expSchem, actSchema); + } + + private StructField structField(String name, DataType type, boolean nullable) + { + return structField(name, type, nullable, Collections.emptyMap()); + } + + private StructField structField( + String name, + DataType type, + boolean nullable, + Map metadata) + { + return new StructField(name, type, nullable, metadata); + } + + private ArrayType array(DataType elemType, boolean containsNull) + { + return new ArrayType(elemType, containsNull); + } + + private MapType map(DataType keyType, DataType valueType, boolean valueContainsNull) + { + return new MapType(keyType, valueType, valueContainsNull); + } + + private Map sampleMetadata() + { + Map metadata = new HashMap<>(); + metadata.put("key1", "value1"); + metadata.put("key2", "value2"); + return metadata; + } +}