From a03ed32969198f20f79d5670fe7111b04c20f93c Mon Sep 17 00:00:00 2001 From: Allison Portis Date: Tue, 6 Jun 2023 14:30:38 -0700 Subject: [PATCH] [Delta Kernel] Add Delta Kernel java interfaces Adds the initial java interfaces for Delta Kernel #1783. Also adds javastyle checks and some javadoc settings. N/A. Only adds interfaces. Closes delta-io/delta#1808 --- kernel/build.sbt | 23 +- kernel/dev/checkstyle-suppressions.xml | 31 + kernel/dev/checkstyle.xml | 221 +++++++ .../kernel/InvalidExpressionException.java | 29 + .../src/main/java/io/delta/kernel/Scan.java | 107 ++++ .../java/io/delta/kernel/ScanBuilder.java | 55 ++ .../main/java/io/delta/kernel/Snapshot.java | 50 ++ .../src/main/java/io/delta/kernel/Table.java | 47 ++ .../delta/kernel/TableNotFoundException.java | 25 + .../kernel/client/ExpressionHandler.java | 40 ++ .../io/delta/kernel/client/FileHandler.java | 50 ++ .../delta/kernel/client/FileReadContext.java | 33 ++ .../delta/kernel/client/FileSystemClient.java | 43 ++ .../io/delta/kernel/client/JsonHandler.java | 67 +++ .../delta/kernel/client/ParquetHandler.java | 49 ++ .../io/delta/kernel/client/TableClient.java | 50 ++ .../io/delta/kernel/client/package-info.java | 22 + .../io/delta/kernel/data/ColumnVector.java | 131 +++++ .../io/delta/kernel/data/ColumnarBatch.java | 62 ++ .../io/delta/kernel/data/DataReadResult.java | 72 +++ .../delta/kernel/data/FileDataReadResult.java | 38 ++ .../main/java/io/delta/kernel/data/Row.java | 81 +++ .../io/delta/kernel/data/package-info.java | 20 + .../java/io/delta/kernel/expressions/And.java | 62 ++ .../kernel/expressions/BinaryComparison.java | 39 ++ .../kernel/expressions/BinaryExpression.java | 77 +++ .../kernel/expressions/BinaryOperator.java | 46 ++ .../kernel/expressions/CastingComparator.java | 64 ++ .../io/delta/kernel/expressions/Column.java | 114 ++++ .../io/delta/kernel/expressions/EqualTo.java | 33 ++ .../delta/kernel/expressions/Expression.java | 60 ++ .../expressions/ExpressionEvaluator.java | 39 ++ .../kernel/expressions/LeafExpression.java | 43 ++ .../io/delta/kernel/expressions/Literal.java | 124 ++++ .../delta/kernel/expressions/Predicate.java | 30 + .../kernel/expressions/package-info.java | 21 + .../java/io/delta/kernel/fs/FileStatus.java | 74 +++ .../main/java/io/delta/kernel/fs/Path.java | 555 ++++++++++++++++++ .../java/io/delta/kernel/fs/package-info.java | 20 + .../java/io/delta/kernel/package-info.java | 21 + .../java/io/delta/kernel/types/ArrayType.java | 37 ++ .../io/delta/kernel/types/BooleanType.java | 23 + .../java/io/delta/kernel/types/DataType.java | 58 ++ .../io/delta/kernel/types/IntegerType.java | 23 + .../java/io/delta/kernel/types/LongType.java | 23 + .../java/io/delta/kernel/types/MapType.java | 44 ++ .../io/delta/kernel/types/StringType.java | 23 + .../io/delta/kernel/types/StructField.java | 99 ++++ .../io/delta/kernel/types/StructType.java | 159 +++++ .../kernel/types/UnresolvedDataType.java | 40 ++ .../io/delta/kernel/types/package-info.java | 21 + .../delta/kernel/utils/CloseableIterator.java | 61 ++ .../java/io/delta/kernel/utils/Tuple2.java | 43 ++ .../java/io/delta/kernel/utils/Utils.java | 125 ++++ .../io/delta/kernel/utils/package-info.java | 20 + kernel/project/plugins.sbt | 5 +- 56 files changed, 3566 insertions(+), 6 deletions(-) create mode 100644 kernel/dev/checkstyle-suppressions.xml create mode 100644 kernel/dev/checkstyle.xml create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/InvalidExpressionException.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/Snapshot.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/Table.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/TableNotFoundException.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/FileReadContext.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/FileSystemClient.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/JsonHandler.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/ParquetHandler.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/TableClient.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/client/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnarBatch.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/DataReadResult.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/FileDataReadResult.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/data/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryExpression.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/EqualTo.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Expression.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/ExpressionEvaluator.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/LeafExpression.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/expressions/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/fs/FileStatus.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/fs/Path.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/fs/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/types/package-info.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/utils/CloseableIterator.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/utils/Tuple2.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/utils/package-info.java diff --git a/kernel/build.sbt b/kernel/build.sbt index 67d7956195c..c58f1423699 100644 --- a/kernel/build.sbt +++ b/kernel/build.sbt @@ -27,10 +27,18 @@ lazy val commonSettings = Seq( Compile / compile / javacOptions ++= Seq("-target", "1.8", "-Xlint:unchecked"), // Configurations to speed up tests and reduce memory footprint Test / javaOptions += "-Xmx1024m", + + // Can be run explicitly via: build/sbt $module/checkstyle + // Will automatically be run during compilation (e.g. build/sbt compile) + // and during tests (e.g. build/sbt test) + checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"), + checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error), + (checkstyle in Compile) := (checkstyle in Compile).triggeredBy(compile in Compile).value, + (checkstyle in Test) := (checkstyle in Test).triggeredBy(compile in Test).value ) -// TODO javastyle checkstyle tests -// TODO unidoc/javadoc settings +// TODO: after adding scala source files SBT will no longer automatically run javadoc instead of +// scaladoc lazy val kernelApi = (project in file("kernel-api")) .settings( @@ -38,7 +46,16 @@ lazy val kernelApi = (project in file("kernel-api")) commonSettings, scalaStyleSettings, releaseSettings, - libraryDependencies ++= Seq() + libraryDependencies ++= Seq(), + Compile / doc / javacOptions := Seq( + "-public", + "-windowtitle", "Delta Kernel API " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-noqualifier", "java.lang", + "-Xdoclint:all" + // TODO: exclude internal packages + ), + // Ensure doc is run with tests. Must be cleaned before test for docs to be generated + (Test / test) := ((Test / test) dependsOn (Compile / doc)).value ) val hadoopVersion = "3.3.1" diff --git a/kernel/dev/checkstyle-suppressions.xml b/kernel/dev/checkstyle-suppressions.xml new file mode 100644 index 00000000000..9445ced96e5 --- /dev/null +++ b/kernel/dev/checkstyle-suppressions.xml @@ -0,0 +1,31 @@ + + + + + + + + diff --git a/kernel/dev/checkstyle.xml b/kernel/dev/checkstyle.xml new file mode 100644 index 00000000000..88e7a7d7c52 --- /dev/null +++ b/kernel/dev/checkstyle.xml @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/InvalidExpressionException.java b/kernel/kernel-api/src/main/java/io/delta/kernel/InvalidExpressionException.java new file mode 100644 index 00000000000..a2188e4b90a --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/InvalidExpressionException.java @@ -0,0 +1,29 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.delta.kernel; + +import io.delta.kernel.expressions.Expression; + +/** + * Thrown when the given {@link Expression} is not valid. + * TODO: we may need to divide this further into multiple exceptions. + */ +public class InvalidExpressionException + extends Exception +{ +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java b/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java new file mode 100644 index 00000000000..a430e5b1c57 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java @@ -0,0 +1,107 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel; + +import java.io.IOException; +import java.util.Optional; + +import io.delta.kernel.client.FileReadContext; +import io.delta.kernel.client.ParquetHandler; +import io.delta.kernel.client.TableClient; +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.DataReadResult; +import io.delta.kernel.data.FileDataReadResult; +import io.delta.kernel.data.Row; +import io.delta.kernel.expressions.Expression; +import io.delta.kernel.expressions.Literal; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; +import io.delta.kernel.utils.Utils; + +/** + * Represents a scan of a Delta table. + */ +public interface Scan { + /** + * Get an iterator of data files to scan. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return iterator of {@link ColumnarBatch}s where each row in each batch corresponds to one + * scan file + */ + CloseableIterator getScanFiles(TableClient tableClient); + + /** + * Get the remaining filter that is not guaranteed to be satisfied for the data Delta Kernel + * returns. This filter is used by Delta Kernel to do data skipping when possible. + * + * @return the remaining filter as an {@link Expression}. + */ + Expression getRemainingFilter(); + + /** + * Get the scan state associated with the current scan. This state is common across all + * files in the scan to be read. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return Scan state in {@link Row} format. + */ + Row getScanState(TableClient tableClient); + + /** + * Get the data from the given scan files using the connector provided {@link TableClient}. + * + * @param tableClient Connector provided {@link TableClient} implementation. + * @param scanState Scan state returned by {@link Scan#getScanState(TableClient)} + * @param scanFileRowIter an iterator of {@link Row}s. Each {@link Row} represents one scan file + * from the {@link ColumnarBatch} returned by + * {@link Scan#getScanFiles(TableClient)} + * @param filter An optional filter that can be used for data skipping while reading the + * scan files. + * @return Data read from the input scan files as an iterator of {@link DataReadResult}s. Each + * {@link DataReadResult} instance contains the data read and an optional selection + * vector that indicates data rows as valid or invalid. It is the responsibility of the + * caller to close this iterator. + * @throws IOException when error occurs while reading the data. + */ + static CloseableIterator readData( + TableClient tableClient, + Row scanState, + CloseableIterator scanFileRowIter, + Optional filter) throws IOException { + + StructType readSchema = Utils.getPhysicalSchema(scanState); + + ParquetHandler parquetHandler = tableClient.getParquetHandler(); + + CloseableIterator filesReadContextsIter = + parquetHandler.contextualizeFileReads( + scanFileRowIter, + filter.orElse(Literal.TRUE)); + + CloseableIterator data = + parquetHandler.readParquetFiles(filesReadContextsIter, readSchema); + + // TODO: Attach the selection vector associated with the file + return data.map(fileDataReadResult -> + new DataReadResult( + fileDataReadResult.getData(), + Optional.empty() + ) + ); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java b/kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java new file mode 100644 index 00000000000..931c0c1f241 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/ScanBuilder.java @@ -0,0 +1,55 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel; + +import io.delta.kernel.client.TableClient; +import io.delta.kernel.expressions.Expression; +import io.delta.kernel.types.StructType; + +/** + * Builder to construct {@link Scan} object. + */ +public interface ScanBuilder { + + /** + * Apply the given filter expression to prune any files that do not contain data satisfying + * the given filter. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @param filter an {@link Expression} which evaluates to boolean. + * @return A {@link ScanBuilder} with filter applied. + * + * @throws InvalidExpressionException if the filter is not valid. + */ + ScanBuilder withFilter(TableClient tableClient, Expression filter) + throws InvalidExpressionException; + + /** + * Apply the given readSchema. If the builder already has a projection applied, calling + * this again replaces the existing projection. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @param readSchema Subset of columns to read from the Delta table. + * @return A {@link ScanBuilder} with projection pruning. + */ + ScanBuilder withReadSchema(TableClient tableClient, StructType readSchema); + + /** + * @return Build the {@link Scan instance} + */ + Scan build(); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/Snapshot.java b/kernel/kernel-api/src/main/java/io/delta/kernel/Snapshot.java new file mode 100644 index 00000000000..418458941b6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/Snapshot.java @@ -0,0 +1,50 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel; + +import io.delta.kernel.client.TableClient; +import io.delta.kernel.types.StructType; + +/** + * Represents the snapshot of a Delta table. + */ +public interface Snapshot { + + /** + * Get the version of this snapshot in the table. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return version of this snapshot in the Delta table + */ + long getVersion(TableClient tableClient); + + /** + * Get the schema of the table at this snapshot. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return Schema of the Delta table at this snapshot. + */ + StructType getSchema(TableClient tableClient); + + /** + * Create a scan builder to construct a {@link Scan} to read data from this snapshot. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return an instance of {@link ScanBuilder} + */ + ScanBuilder getScanBuilder(TableClient tableClient); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/Table.java b/kernel/kernel-api/src/main/java/io/delta/kernel/Table.java new file mode 100644 index 00000000000..6d5b7c6aa04 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/Table.java @@ -0,0 +1,47 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel; + +import io.delta.kernel.client.TableClient; + +/** + * Represents the Delta Lake table for a given path. + */ +public interface Table { + + /** + * Instantiate a table object for the Delta Lake table at the given path. + * + * @param path location where the Delta table is present. Path needs to be fully qualified. + * @return an instance of {@link Table} representing the Delta table at given path + * @throws TableNotFoundException when there is no Delta table at the given path. + */ + static Table forPath(String path) + throws TableNotFoundException + { + // TODO requires io.delta.kernel.internal.TableImpl + throw new UnsupportedOperationException("not implemented yet"); + } + + /** + * Get the latest snapshot of the table. + * + * @param tableClient {@link TableClient} instance to use in Delta Kernel. + * @return an instance of {@link Snapshot} + */ + Snapshot getLatestSnapshot(TableClient tableClient); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/TableNotFoundException.java b/kernel/kernel-api/src/main/java/io/delta/kernel/TableNotFoundException.java new file mode 100644 index 00000000000..3c009d40a9b --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/TableNotFoundException.java @@ -0,0 +1,25 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel; + +/** + * Thrown when there is no Delta table at the given location. + */ +public class TableNotFoundException + extends Exception +{ +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java new file mode 100644 index 00000000000..6bbb54f07c4 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/ExpressionHandler.java @@ -0,0 +1,40 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import io.delta.kernel.expressions.Expression; +import io.delta.kernel.expressions.ExpressionEvaluator; +import io.delta.kernel.types.StructType; + +/** + * Provides expression evaluation capability to Delta Kernel. Delta Kernel can use this client + * to evaluate predicate on partition filters, fill up partition column values and any computation + * on data using {@link Expression}s. + */ +public interface ExpressionHandler +{ + /** + * Create an {@link ExpressionEvaluator} that can evaluate the given expression on + * {@link io.delta.kernel.data.ColumnarBatch}s with the given batchSchema. + * + * @param batchSchema Schema of the input data. + * @param expression Expression to evaluate. + * @return An {@link ExpressionEvaluator} instance bound to the given expression and + * batchSchema. + */ + ExpressionEvaluator getEvaluator(StructType batchSchema, Expression expression); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java new file mode 100644 index 00000000000..b50e9cd6155 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileHandler.java @@ -0,0 +1,50 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import io.delta.kernel.data.Row; +import io.delta.kernel.expressions.Expression; +import io.delta.kernel.fs.FileStatus; +import io.delta.kernel.utils.CloseableIterator; + +/** + * Provides file handling functionality to Delta Kernel. Connectors can implement this client to + * provide Delta Kernel their own custom implementation of file splitting, additional predicate + * pushdown or any other connector-specific capabilities. + */ +public interface FileHandler +{ + /** + * Associates a connector specific {@link FileReadContext} for each scan file represented by a + * {@link Row} in {@code fileIter}. Delta Kernel will supply the returned + * {@link FileReadContext}s back to the connector when reading the file (for example, in + * {@link ParquetHandler#readParquetFiles}). Delta Kernel does not interpret + * {@link FileReadContext}. + * + * For example, a connector can attach split information in its own implementation + * of {@link FileReadContext} or attach any predicates. + * + * @param fileIter iterator of scan files where each {@link Row} contains {@link FileStatus} + * information + * @param predicate Predicate to prune data. This is optional for the connector to use for + * further optimization. Filtering by this predicate is not required. + * @return Iterator of {@link FileReadContext} to read data from. + */ + CloseableIterator contextualizeFileReads( + CloseableIterator fileIter, + Expression predicate); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileReadContext.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileReadContext.java new file mode 100644 index 00000000000..326a26351be --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileReadContext.java @@ -0,0 +1,33 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import io.delta.kernel.data.Row; + +/** + * Placeholder interface allowing connectors to attach their own custom implementation. Connectors + * can use this to pass additional context about a scan file through Delta Kernel and back to the + * connector for interpretation. + */ +public interface FileReadContext +{ + /** + * Get the scan file info associated with the read context. + * @return scan file {@link Row} + */ + Row getScanFileRow(); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileSystemClient.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileSystemClient.java new file mode 100644 index 00000000000..cb6d16c34b6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/FileSystemClient.java @@ -0,0 +1,43 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import java.io.FileNotFoundException; + +import io.delta.kernel.fs.FileStatus; +import io.delta.kernel.utils.CloseableIterator; + +/** + * Provides file system related functionalities to Delta Kernel. Delta Kernel uses this client + * whenever it needs to access the underlying file system where the Delta table is present. + * Connector implementation of this interface can hide filesystem specific details from Delta + * Kernel. + */ +public interface FileSystemClient +{ + /** + * List the paths in the same directory that are lexicographically greater or equal to + * (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + * + * @param filePath Fully qualified path to a file + * @return Closeable iterator of files. It is the responsibility of the caller to close the + * iterator. + * @throws FileNotFoundException if the file at the given path is not found + */ + CloseableIterator listFrom(String filePath) + throws FileNotFoundException; +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/JsonHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/JsonHandler.java new file mode 100644 index 00000000000..f5dda338b7e --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/JsonHandler.java @@ -0,0 +1,67 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import java.io.IOException; + +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.FileDataReadResult; +import io.delta.kernel.data.Row; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; + +/** + * Provides JSON handling functionality to Delta Kernel. Delta Kernel can use this client to + * parse JSON strings into {@link io.delta.kernel.data.Row} or read content from JSON files. + * Connectors can leverage this interface to provide their best implementation of the JSON parsing + * capability to + * Delta Kernel. + */ +public interface JsonHandler + extends FileHandler +{ + /** + * Parse the given json strings and return the fields requested by {@code outputSchema} + * as columns in a {@link ColumnarBatch}. + * + * @param jsonStringVector String {@link ColumnVector} of valid JSON strings. + * @param outputSchema Schema of the data to return from the parsed JSON. If any requested + * fields are missing in the JSON string, a null is returned for that + * particular field in the returned {@link Row}. The type for each given + * field is expected to match the type in the JSON. + * @return a {@link ColumnarBatch} of schema {@code outputSchema} with one row for each entry + * in {@code jsonStringVector} + */ + ColumnarBatch parseJson(ColumnVector jsonStringVector, StructType outputSchema); + + /** + * Read and parse the JSON format file at given locations and return the data as a + * {@link ColumnarBatch} with the columns requested by {@code physicalSchema}. + * + * @param fileIter Iterator of {@link FileReadContext} objects to read data from. + * @param physicalSchema Select list of columns to read from the JSON file. + * @return an iterator of {@link FileDataReadResult}s containing the data in columnar format + * and the corresponding scan file information. It is the responsibility of the caller + * to close the iterator. The data returned is in the same as the order of files given + * in fileIter. + * @throws IOException if an error occurs during the read. + */ + CloseableIterator readJsonFiles( + CloseableIterator fileIter, + StructType physicalSchema) throws IOException; +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/ParquetHandler.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/ParquetHandler.java new file mode 100644 index 00000000000..1dd88f4dc33 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/ParquetHandler.java @@ -0,0 +1,49 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +import java.io.IOException; + +import io.delta.kernel.data.ColumnarBatch; +import io.delta.kernel.data.FileDataReadResult; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; + +/** + * Provides Parquet file related functionalities to Delta Kernel. Connectors can leverage this + * interface to provide their own custom implementation of Parquet data file functionalities to + * Delta Kernel. + */ +public interface ParquetHandler + extends FileHandler +{ + /** + * Read the Parquet format files at the given locations and return the data as a + * {@link ColumnarBatch} with the columns requested by {@code physicalSchema}. + * + * @param fileIter Iterator of {@link FileReadContext} objects to read data from. + * @param physicalSchema Select list of columns to read from the Parquet file. + * @return an iterator of {@link FileDataReadResult}s containing the data in columnar format + * and the corresponding scan file information. It is the responsibility of the caller + * to close the iterator. The data returned is in the same as the order of files given + * in fileIter. + * @throws IOException if an error occurs during the read. + */ + CloseableIterator readParquetFiles( + CloseableIterator fileIter, + StructType physicalSchema) throws IOException; +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/TableClient.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/TableClient.java new file mode 100644 index 00000000000..93828cadc82 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/TableClient.java @@ -0,0 +1,50 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.client; + +/** + * Interface encapsulating all clients needed by the Delta Kernel in order to read the + * Delta table. Connectors are expected to pass an implementation of this interface when reading + * a Delta table. + */ +public interface TableClient +{ + + /** + * Get the connector provided {@link ExpressionHandler}. + * @return An implementation of {@link ExpressionHandler}. + */ + ExpressionHandler getExpressionHandler(); + + /** + * Get the connector provided {@link JsonHandler}. + * @return An implementation of {@link JsonHandler}. + */ + JsonHandler getJsonHandler(); + + /** + * Get the connector provided {@link FileSystemClient}. + * @return An implementation of {@link FileSystemClient}. + */ + FileSystemClient getFileSystemClient(); + + /** + * Get the connector provided {@link ParquetHandler}. + * @return An implementation of {@link ParquetHandler}. + */ + ParquetHandler getParquetHandler(); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/client/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/client/package-info.java new file mode 100644 index 00000000000..98226959d87 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/client/package-info.java @@ -0,0 +1,22 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Interfaces to allow the connector to bring their own implementation of functions such + * as reading parquet files, listing files in a file system, parsing a JSON string etc. to Delta + * Kernel. + */ +package io.delta.kernel.client; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java new file mode 100644 index 00000000000..2cf07c19b9e --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java @@ -0,0 +1,131 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.data; + +import java.util.List; +import java.util.Map; + +import io.delta.kernel.types.DataType; + +/** + * Represents zero or more values of a single column. + */ +public interface ColumnVector extends AutoCloseable { + /** + * @return the data type of this column vector. + */ + DataType getDataType(); + + /** + * @return number of elements in the vector + */ + int getSize(); + + /** + * Cleans up memory for this column vector. The column vector is not usable after this. + */ + @Override + void close(); + + /** + * @return whether the value at {@code rowId} is NULL. + */ + boolean isNullAt(int rowId); + + /** + * Returns the boolean type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default boolean getBoolean(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + /** + * Returns the byte type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default byte getByte(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the short type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default short getShort(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the int type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default int getInt(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the long type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default long getLong(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the float type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default float getFloat(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the double type value for {@code rowId}. The return value is undefined and can be + * anything, if the slot for {@code rowId} is null. + */ + default double getDouble(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the binary type value for {@code rowId}. If the slot for {@code rowId} is null, it + * should return null. + */ + default byte[] getBinary(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + /** + * Returns the string type value for {@code rowId}. If the slot for {@code rowId} is null, it + * should return null. + */ + default String getString(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + default Map getMap(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + default Row getStruct(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } + + default List getArray(int rowId) { + throw new UnsupportedOperationException("Invalid value request for data type"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnarBatch.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnarBatch.java new file mode 100644 index 00000000000..6157cd796d9 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnarBatch.java @@ -0,0 +1,62 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.data; + +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; + +/** + * Represents zero or more rows of records with same schema type. + */ +public interface ColumnarBatch { + /** + * @return the schema of the data in this batch. + */ + StructType getSchema(); + + /** + * Return the {@link ColumnVector} for the given ordinal in the columnar batch. If the ordinal + * is not valid throws error. + * @param ordinal the ordinal of the column to retrieve + * @return the {@link ColumnVector} for the given ordinal in the columnar batch + */ + ColumnVector getColumnVector(int ordinal); + + /** + * @return the number of rows/records in the columnar batch + */ + int getSize(); + + /** + * Return a slice of the current batch. + * + * @param start Starting record index to include in the returned columnar batch + * @param end Ending record index (exclusive) to include in the returned columnar batch + * @return a columnar batch containing the records between [start, end) + */ + default ColumnarBatch slice(int start, int end) { + throw new UnsupportedOperationException("Not yet implemented!"); + } + + /** + * @return iterator of {@link Row}s in this batch + */ + default CloseableIterator getRows() { + // TODO needs io.delta.kernel.internal.ColumnarBatchRow + throw new UnsupportedOperationException("Not yet implemented!"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/DataReadResult.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/DataReadResult.java new file mode 100644 index 00000000000..ecd2a7cadf8 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/DataReadResult.java @@ -0,0 +1,72 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.data; + +import java.util.Optional; + +/** + * Data read from Delta table file. Data is in {@link ColumnarBatch} format with an optional + * selection vector to select only a subset of rows for this columnar batch. + * + * The selection vector is of type boolean and has the same size as the data in the corresponding + * {@link ColumnarBatch}. For each row index, a value of true in the selection vector indicates + * the row at the same index in the data {@link ColumnarBatch} is valid; a value of false + * indicates the row should be ignored. If there is no selection vector then all the rows are valid. + */ +public class DataReadResult +{ + private final ColumnarBatch data; + private final Optional selectionVector; + + public DataReadResult(ColumnarBatch data, Optional selectionVector) + { + this.data = data; + this.selectionVector = selectionVector; + } + + /** + * Return the data as {@link ColumnarBatch}. Not all rows in the data are valid for this result. + * An optional selectionVector determines which rows are selected. If there is no + * selection vector that means all rows in this columnar batch are valid for this result. + * @return all the data read from the file + */ + public ColumnarBatch getData() { + return data; + } + + /** + * Optional selection vector containing one entry for each row in data indicating whether + * a row is selected or not selected. If there is no selection vector then all the rows are + * valid. + * @return an optional {@link ColumnVector} indicating which rows are valid + */ + public Optional getSelectionVector() + { + return selectionVector; + } + + /** + * Helper method to rewrite the data in this result by removing the rows that are not + * selected. + * @return A {@link ColumnarBatch} with only the selected rows according to the + * {@link #getSelectionVector()} + */ + public ColumnarBatch rewriteWithoutSelectionVector() { + return data; + // TODO: implement removing deleted rows. + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/FileDataReadResult.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/FileDataReadResult.java new file mode 100644 index 00000000000..a49dcca6cef --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/FileDataReadResult.java @@ -0,0 +1,38 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.data; + + +/** + * Data read from a Delta table file and the corresponding scan file information. + */ +public interface FileDataReadResult +{ + /** + * Get the data read from the file. + * @return Data in {@link ColumnarBatch} format. + */ + ColumnarBatch getData(); + + /** + * Get the scan file information of the file from which the data is read as a {@link Row}. This + * should be the same {@link Row} that Delta Kernel provided when reading/contextualizing + * the file. + * @return a scan file {@link Row} + */ + Row getScanFileRow(); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java new file mode 100644 index 00000000000..28854a18c3b --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/Row.java @@ -0,0 +1,81 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.data; + +import java.util.List; +import java.util.Map; + +import io.delta.kernel.types.StructType; + +/** + * Represent a single record + */ +public interface Row { + + /** + * @return Schema of the record. + */ + StructType getSchema(); + + /** + * @param ordinal the ordinal of the column to check + * @return whether the column at {@code ordinal} is null + */ + boolean isNullAt(int ordinal); + + /** + * Return boolean value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of boolean type, + */ + boolean getBoolean(int ordinal); + + /** + * Return integer value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of integer type, + */ + int getInt(int ordinal); + + /** + * Return long value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of long type, + */ + long getLong(int ordinal); + + /** + * Return string value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of varchar type, + */ + String getString(int ordinal); + + /** + * Return struct value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of struct type, + */ + Row getRecord(int ordinal); + + /** + * Return array value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of array type, + */ + List getList(int ordinal); + + /** + * Return map value of the column located at the given ordinal. + * Throws error if the column at given ordinal is not of map type, + */ + Map getMap(int ordinal); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/package-info.java new file mode 100644 index 00000000000..813f0688ec0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Delta Kernel interfaces for representing data in columnar and row format. + */ +package io.delta.kernel.data; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java new file mode 100644 index 00000000000..34bda9e26a7 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/And.java @@ -0,0 +1,62 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Collection; + +import io.delta.kernel.types.BooleanType; + +/** + * Evaluates logical {@code expr1} AND {@code expr2} for {@code new And(expr1, expr2)}. + *

+ * Requires both left and right input expressions evaluate to booleans. + */ +public final class And extends BinaryOperator implements Predicate { + + public static And apply(Collection conjunctions) { + if (conjunctions.size() == 0) { + throw new IllegalArgumentException("And.apply must be called with at least 1 element"); + } + + return (And) conjunctions + .stream() + // we start off with And(true, true) + // then we get the 1st expression: And(And(true, true), expr1) + // then we get the 2nd expression: And(And(true, true), expr1), expr2) etc. + .reduce(new And(Literal.TRUE, Literal.TRUE), And::new); + } + + public And(Expression left, Expression right) { + super(left, right, "&&"); + if (!(left.dataType() instanceof BooleanType) || + !(right.dataType() instanceof BooleanType)) { + + throw new IllegalArgumentException( + String.format( + "'And' requires expressions of type boolean. Got %s and %s.", + left.dataType().typeName(), + right.dataType().typeName() + ) + ); + } + } + + @Override + public Object nullSafeEval(Object leftResult, Object rightResult) { + return (boolean) leftResult && (boolean) rightResult; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java new file mode 100644 index 00000000000..415f4cc64bd --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java @@ -0,0 +1,39 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Comparator; + +/** + * A {@link BinaryOperator} that compares the left and right {@link Expression}s and evaluates to a + * boolean value. + */ +public abstract class BinaryComparison extends BinaryOperator implements Predicate { + private final Comparator comparator; + + protected BinaryComparison(Expression left, Expression right, String symbol) { + super(left, right, symbol); + + // super asserted that left and right DataTypes were the same + + comparator = CastingComparator.forDataType(left.dataType()); + } + + protected int compare(Object leftResult, Object rightResult) { + return comparator.compare(leftResult, rightResult); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryExpression.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryExpression.java new file mode 100644 index 00000000000..82a67b328e1 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryExpression.java @@ -0,0 +1,77 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import io.delta.kernel.data.Row; + +/** + * An {@link Expression} with two inputs and one output. The output is by default evaluated to null + * if either input is evaluated to null. + */ +public abstract class BinaryExpression implements Expression { + protected final Expression left; + protected final Expression right; + + protected BinaryExpression(Expression left, Expression right) { + this.left = left; + this.right = right; + } + + public Expression getLeft() { + return left; + } + + public Expression getRight() { + return right; + } + + @Override + public final Object eval(Row row) { + Object leftResult = left.eval(row); + if (null == leftResult) return null; + + Object rightResult = right.eval(row); + if (null == rightResult) return null; + + return nullSafeEval(leftResult, rightResult); + } + + protected abstract Object nullSafeEval(Object leftResult, Object rightResult); + + @Override + public List children() { + return Arrays.asList(left, right); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BinaryExpression that = (BinaryExpression) o; + return Objects.equals(left, that.left) && + Objects.equals(right, that.right); + } + + @Override + public int hashCode() { + return Objects.hash(left, right); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java new file mode 100644 index 00000000000..ccec858e612 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryOperator.java @@ -0,0 +1,46 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +/** + * A {@link BinaryExpression} that is an operator, meaning the string representation is + * {@code x symbol y}, rather than {@code funcName(x, y)}. + *

+ * Requires both inputs to be of the same data type. + */ +public abstract class BinaryOperator extends BinaryExpression { + protected final String symbol; + + protected BinaryOperator(Expression left, Expression right, String symbol) { + super(left, right); + this.symbol = symbol; + + if (!left.dataType().equivalent(right.dataType())) { + throw new IllegalArgumentException( + String.format( + "BinaryOperator left and right DataTypes must be the same. Found %s and %s.", + left.dataType().typeName(), + right.dataType().typeName()) + ); + } + } + + @Override + public String toString() { + return String.format("(%s %s %s)", left.toString(), symbol, right.toString()); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java new file mode 100644 index 00000000000..8d13647f696 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java @@ -0,0 +1,64 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Comparator; + +import io.delta.kernel.types.*; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; + +// TODO: exclude from public interfaces (move to "internal" somewhere?) +public class CastingComparator> implements Comparator { + + public static Comparator forDataType(DataType dataType) { + if (dataType instanceof IntegerType) { + return new CastingComparator(); + } + + if (dataType instanceof BooleanType) { + return new CastingComparator(); + } + + if (dataType instanceof LongType) { + return new CastingComparator(); + } + + if (dataType instanceof StringType) { + return new CastingComparator(); + } + + throw new IllegalArgumentException( + String.format("Unsupported DataType: %s", dataType.typeName()) + ); + } + + private final Comparator comparator; + + public CastingComparator() { + comparator = Comparator.naturalOrder(); + } + + @SuppressWarnings("unchecked") + @Override + public int compare(Object a, Object b) { + return comparator.compare((T) a, (T) b); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java new file mode 100644 index 00000000000..0af879bd573 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Column.java @@ -0,0 +1,114 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import io.delta.kernel.data.Row; +import io.delta.kernel.types.*; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructType; + +/** + * A column whose row-value will be computed based on the data in a {@link Row}. + *

+ * It is recommended that you instantiate using an existing table schema + * {@link StructType} with {@link StructType#column(int)}. + *

+ * Only supports primitive data types, see + * Delta Transaction Log Protocol: Primitive Types. + */ +public final class Column extends LeafExpression { + private final int ordinal; + private final String name; + private final DataType dataType; + private final RowEvaluator evaluator; + + public Column(int ordinal, String name, DataType dataType) { + this.ordinal = ordinal; + this.name = name; + this.dataType = dataType; + + if (dataType instanceof IntegerType) { + evaluator = (row -> row.getInt(ordinal)); + } else if (dataType instanceof BooleanType) { + evaluator = (row -> row.getBoolean(ordinal)); + } else if (dataType instanceof LongType) { + evaluator = (row -> row.getLong(ordinal)); + } else if (dataType instanceof StringType) { + evaluator = (row -> row.getString(ordinal)); + } else { + throw new UnsupportedOperationException( + String.format( + "The data type %s of column %s at ordinal %s is not supported", + dataType.typeName(), + name, + ordinal) + ); + } + } + + public String name() { + return name; + } + + @Override + public Object eval(Row row) { + return row.isNullAt(ordinal) ? null : evaluator.nullSafeEval(row); + } + + @Override + public DataType dataType() { + return dataType; + } + + @Override + public String toString() { + return "Column(" + name + ")"; + } + + @Override + public Set references() { + return Collections.singleton(name); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Column column = (Column) o; + return Objects.equals(ordinal, column.ordinal) && + Objects.equals(name, column.name) && + Objects.equals(dataType, column.dataType); + } + + @Override + public int hashCode() { + return Objects.hash(name, dataType); + } + + @FunctionalInterface + private interface RowEvaluator { + Object nullSafeEval(Row row); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/EqualTo.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/EqualTo.java new file mode 100644 index 00000000000..8f2076c56e5 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/EqualTo.java @@ -0,0 +1,33 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +/** + * Evaluates {@code expr1} = {@code expr2} for {@code new EqualTo(expr1, expr2)}. + */ +public final class EqualTo extends BinaryComparison implements Predicate { + + public EqualTo(Expression left, Expression right) { + super(left, right, "="); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) == 0; + } +} + diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Expression.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Expression.java new file mode 100644 index 00000000000..af4bc592ecc --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Expression.java @@ -0,0 +1,60 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import io.delta.kernel.data.Row; +import io.delta.kernel.types.DataType; + +/** + * Generic interface for all Expressions + */ +public interface Expression { + + /** + * @param row the input row to evaluate. + * @return the result of evaluating this expression on the given input {@link Row}. + */ + Object eval(Row row); + + /** + * @return the {@link DataType} of the result of evaluating this expression. + */ + DataType dataType(); + + /** + * @return the String representation of this expression. + */ + String toString(); + + /** + * @return a {@link List} of the immediate children of this node + */ + List children(); + + /** + * @return the names of columns referenced by this expression. + */ + default Set references() { + Set result = new HashSet<>(); + children().forEach(child -> result.addAll(child.references())); + return result; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/ExpressionEvaluator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/ExpressionEvaluator.java new file mode 100644 index 00000000000..18a55347628 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/ExpressionEvaluator.java @@ -0,0 +1,39 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.ColumnarBatch; + +/** + * Interface for implementing an {@link Expression} evaluator. + * It contains one {@link Expression} which can be evaluated on multiple {@link ColumnarBatch}es + * Connectors can implement this interface to optimize the evaluation using the + * connector specific capabilities. + */ +public interface ExpressionEvaluator extends AutoCloseable +{ + /** + * Evaluate the expression on given {@link ColumnarBatch} data. + * + * @param input input data in columnar format. + * @return Result of the expression as a {@link ColumnVector}. Contains one value for each + * row of the input. The data type of the output is same as the type output of the + * expression this evaluator is using. + */ + ColumnVector eval(ColumnarBatch input); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/LeafExpression.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/LeafExpression.java new file mode 100644 index 00000000000..05f48b5c5a6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/LeafExpression.java @@ -0,0 +1,43 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * An {@link Expression} with no children. + */ +public abstract class LeafExpression implements Expression { + + protected LeafExpression() {} + + @Override + public List children() { + return Collections.emptyList(); + } + + @Override + public Set references() { + return Collections.emptySet(); + } + + public abstract boolean equals(Object o); + + public abstract int hashCode(); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java new file mode 100644 index 00000000000..7710752e4f6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java @@ -0,0 +1,124 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import java.util.Objects; + +import io.delta.kernel.data.Row; +import io.delta.kernel.types.*; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.IntegerType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.StringType; + +/** + * A literal value. + *

+ * Only supports primitive data types, see + * Delta Transaction Log Protocol: Primitive Types. + */ +public final class Literal extends LeafExpression { + + //////////////////////////////////////////////////////////////////////////////// + // Static Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + public static final Literal TRUE = Literal.of(true); + public static final Literal FALSE = Literal.of(false); + + /** + * Create an integer {@link Literal} object + * @param value integer value + * @return a {@link Literal} with data type {@link IntegerType} + */ + public static Literal of(int value) { + return new Literal(value, IntegerType.INSTANCE); + } + + /** + * Create a boolean {@link Literal} object + * @param value boolean value + * @return a {@link Literal} with data type {@link BooleanType} + */ + public static Literal of(boolean value) { + return new Literal(value, BooleanType.INSTANCE); + } + + /** + * Create a long {@link Literal} object + * @param value long value + * @return a {@link Literal} with data type {@link LongType} + */ + public static Literal of(long value) { + return new Literal(value, LongType.INSTANCE); + } + + /** + * Create a string {@link Literal} object + * @param value string value + * @return a {@link Literal} with data type {@link StringType} + */ + public static Literal of(String value) { + return new Literal(value, StringType.INSTANCE); + } + + //////////////////////////////////////////////////////////////////////////////// + // Instance Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + private final Object value; + private final DataType dataType; + + private Literal(Object value, DataType dataType) { + this.value = value; + this.dataType = dataType; + } + + public Object value() { + return value; + } + + @Override + public Object eval(Row record) { + return value; + } + + @Override + public DataType dataType() { + return dataType; + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Literal literal = (Literal) o; + return Objects.equals(value, literal.value) && + Objects.equals(dataType, literal.dataType); + } + + @Override + public int hashCode() { + return Objects.hash(value, dataType); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java new file mode 100644 index 00000000000..a26a01273f9 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java @@ -0,0 +1,30 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.expressions; + +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; + +/** + * An {@link Expression} that defines a relation on inputs. Evaluates to true, false, or null. + */ +public interface Predicate extends Expression { + @Override + default DataType dataType() { + return BooleanType.INSTANCE; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/package-info.java new file mode 100644 index 00000000000..d096df63b2a --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/package-info.java @@ -0,0 +1,21 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Expressions framework that defines the most common expressions which the connectors + * can use to pass predicates to Delta Kernel. + */ +package io.delta.kernel.expressions; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/fs/FileStatus.java b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/FileStatus.java new file mode 100644 index 00000000000..1695b87982c --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/FileStatus.java @@ -0,0 +1,74 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.fs; + +import java.util.Objects; + +/** + * Class for encapsulating metadata about a file in Delta Lake table. + */ +public class FileStatus { + + private final String path; + private final long size; + private final long modificationTime; + + private FileStatus( + String path, + long size, + long modificationTime) { + this.path = Objects.requireNonNull(path, "path is null"); + this.size = size; // TODO: validation + this.modificationTime = modificationTime; // TODO: validation + } + + /** + * Get the path to the file. + * @return Fully qualified file path + */ + public String getPath() { + return path; + } + + /** + * Get the size of the file in bytes. + * @return File size in bytes. + */ + public long getSize() + { + return size; + } + + /** + * Get the modification time of the file in epoch millis. + * @return Modification time in epoch millis + */ + public long getModificationTime() + { + return modificationTime; + } + + /** + * Create a {@link FileStatus} with the given path, size and modification time. + * @param path Fully qualified file path. + * @param size File size in bytes + * @param modificationTime Modification time of the file in epoch millis + */ + public static FileStatus of(String path, long size, long modificationTime) { + return new FileStatus(path, size, modificationTime); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/fs/Path.java b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/Path.java new file mode 100644 index 00000000000..0bf69a267d6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/Path.java @@ -0,0 +1,555 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.fs; + +import java.io.InvalidObjectException; +import java.io.ObjectInputValidation; +import java.io.Serializable; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.regex.Pattern; + +/** + * Names a file or directory in a FileSystem. + * Path strings use slash as the directory separator. + * + * Taken from https://github.com/apache/hadoop/blob/branch-3.3.4/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java + * TODO: remove unused parts + */ +public class Path + implements Comparable, Serializable, ObjectInputValidation { + + /** + * The directory separator, a slash. + */ + public static final String SEPARATOR = "/"; + + /** + * The directory separator, a slash, as a character. + */ + public static final char SEPARATOR_CHAR = '/'; + + /** + * The current directory, ".". + */ + public static final String CUR_DIR = "."; + + /** + * Whether the current host is a Windows machine. + */ + public static final boolean WINDOWS = + System.getProperty("os.name").startsWith("Windows"); + + /** + * Pre-compiled regular expressions to detect path formats. + */ + private static final Pattern HAS_DRIVE_LETTER_SPECIFIER = + Pattern.compile("^/?[a-zA-Z]:"); + + /** Pre-compiled regular expressions to detect duplicated slashes. */ + private static final Pattern SLASHES = Pattern.compile("/+"); + + private static final long serialVersionUID = 0xad00f; + + private URI uri; // a hierarchical uri + + /** + * Test whether this Path uses a scheme and is relative. + * Pathnames with scheme and relative path are illegal. + */ + void checkNotSchemeWithRelative() { + if (toUri().isAbsolute() && !isUriPathAbsolute()) { + throw new IllegalArgumentException( + "Unsupported name: has scheme but relative path-part"); + } + } + + void checkNotRelative() { + if (!isAbsolute() && toUri().getScheme() == null) { + throw new IllegalArgumentException("Path is relative"); + } + } + + /** + * Return a version of the given Path without the scheme information. + * + * @param path the source Path + * @return a copy of this Path without the scheme information + */ + public static Path getPathWithoutSchemeAndAuthority(Path path) { + // This code depends on Path.toString() to remove the leading slash before + // the drive specification on Windows. + Path newPath = path.isUriPathAbsolute() ? + new Path(null, null, path.toUri().getPath()) : + path; + return newPath; + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, String child) { + this(new Path(parent), new Path(child)); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, String child) { + this(parent, new Path(child)); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, Path child) { + this(new Path(parent), child); + } + + /** + * Create a new Path based on the child path resolved against the parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, Path child) { + // Add a slash to parent's path so resolution is compatible with URI's + URI parentUri = parent.uri; + String parentPath = parentUri.getPath(); + if (!(parentPath.equals("/") || parentPath.isEmpty())) { + try { + parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), + parentUri.getPath()+"/", null, parentUri.getFragment()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + URI resolved = parentUri.resolve(child.uri); + initialize(resolved.getScheme(), resolved.getAuthority(), + resolved.getPath(), resolved.getFragment()); + } + + private void checkPathArg( String path ) throws IllegalArgumentException { + // disallow construction of a Path from an empty string + if ( path == null ) { + throw new IllegalArgumentException( + "Can not create a Path from a null string"); + } + if( path.length() == 0 ) { + throw new IllegalArgumentException( + "Can not create a Path from an empty string"); + } + } + + /** + * Construct a path from a String. Path strings are URIs, but with + * unescaped elements and some additional normalization. + * + * @param pathString the path string + */ + public Path(String pathString) throws IllegalArgumentException { + checkPathArg( pathString ); + + // We can't use 'new URI(String)' directly, since it assumes things are + // escaped, which we don't require of Paths. + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { + pathString = "/" + pathString; + } + + // parse uri components + String scheme = null; + String authority = null; + + int start = 0; + + // parse uri scheme, if any + int colon = pathString.indexOf(':'); + int slash = pathString.indexOf('/'); + if ((colon != -1) && + ((slash == -1) || (colon < slash))) { // has a scheme + scheme = pathString.substring(0, colon); + start = colon+1; + } + + // parse uri authority, if any + if (pathString.startsWith("//", start) && + (pathString.length()-start > 2)) { // has authority + int nextSlash = pathString.indexOf('/', start+2); + int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); + authority = pathString.substring(start+2, authEnd); + start = authEnd; + } + + // uri path is the rest of the string -- query & fragment not supported + String path = pathString.substring(start, pathString.length()); + + initialize(scheme, authority, path, null); + } + + /** + * Construct a path from a URI + * + * @param aUri the source URI + */ + public Path(URI aUri) { + uri = aUri.normalize(); + } + + /** + * Construct a Path from components. + * + * @param scheme the scheme + * @param authority the authority + * @param path the path + */ + public Path(String scheme, String authority, String path) { + checkPathArg( path ); + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(path) && path.charAt(0) != '/') { + path = "/" + path; + } + + // add "./" in front of Linux relative paths so that a path containing + // a colon e.q. "a:b" will not be interpreted as scheme "a". + if (!WINDOWS && path.charAt(0) != '/') { + path = "./" + path; + } + + initialize(scheme, authority, path, null); + } + + private void initialize(String scheme, String authority, String path, + String fragment) { + try { + this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) + .normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Merge 2 paths such that the second path is appended relative to the first. + * The returned path has the scheme and authority of the first path. On + * Windows, the drive specification in the second path is discarded. + * + * @param path1 the first path + * @param path2 the second path, to be appended relative to path1 + * @return the merged path + */ + public static Path mergePaths(Path path1, Path path2) { + String path2Str = path2.toUri().getPath(); + path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); + // Add path components explicitly, because simply concatenating two path + // string is not safe, for example: + // "/" + "/foo" yields "//foo", which will be parsed as authority in Path + return new Path(path1.toUri().getScheme(), + path1.toUri().getAuthority(), + path1.toUri().getPath() + path2Str); + } + + /** + * Normalize a path string to use non-duplicated forward slashes as + * the path separator and remove any trailing path separators. + * + * @param scheme the URI scheme. Used to deduce whether we + * should replace backslashes or not + * @param path the scheme-specific part + * @return the normalized path string + */ + private static String normalizePath(String scheme, String path) { + // Remove duplicated slashes. + path = SLASHES.matcher(path).replaceAll("/"); + + // Remove backslashes if this looks like a Windows path. Avoid + // the substitution if it looks like a non-local URI. + if (WINDOWS && + (hasWindowsDrive(path) || + (scheme == null) || + (scheme.isEmpty()) || + (scheme.equals("file")))) { + path = path.replace("\\", "/"); + } + + // trim trailing slash from non-root path (ignoring windows drive) + int minLength = startPositionWithoutWindowsDrive(path) + 1; + if (path.length() > minLength && path.endsWith(SEPARATOR)) { + path = path.substring(0, path.length()-1); + } + + return path; + } + + private static boolean hasWindowsDrive(String path) { + return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find()); + } + + private static int startPositionWithoutWindowsDrive(String path) { + if (hasWindowsDrive(path)) { + return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; + } else { + return 0; + } + } + + /** + * Determine whether a given path string represents an absolute path on + * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. + * + * @param pathString the path string to evaluate + * @param slashed true if the given path is prefixed with "/" + * @return true if the supplied path looks like an absolute path with a Windows + * drive-specifier + */ + public static boolean isWindowsAbsolutePath(final String pathString, + final boolean slashed) { + int start = startPositionWithoutWindowsDrive(pathString); + return start > 0 + && pathString.length() > start + && ((pathString.charAt(start) == SEPARATOR_CHAR) || + (pathString.charAt(start) == '\\')); + } + + /** + * Convert this Path to a URI. + * + * @return this Path as a URI + */ + public URI toUri() { return uri; } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute and the scheme is null, and the authority + * is null. + * + * @return whether the path is absolute and the URI has no scheme nor + * authority parts + */ + public boolean isAbsoluteAndSchemeAuthorityNull() { + return (isUriPathAbsolute() && + uri.getScheme() == null && uri.getAuthority() == null); + } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute. + * + * @return whether this URI's path is absolute + */ + public boolean isUriPathAbsolute() { + int start = startPositionWithoutWindowsDrive(uri.getPath()); + return uri.getPath().startsWith(SEPARATOR, start); + } + + /** + * Returns true if the path component (i.e. directory) of this URI is + * absolute. This method is a wrapper for {@link #isUriPathAbsolute()}. + * + * @return whether this URI's path is absolute + */ + public boolean isAbsolute() { + return isUriPathAbsolute(); + } + + /** + * Returns true if and only if this path represents the root of a file system. + * + * @return true if and only if this path represents the root of a file system + */ + public boolean isRoot() { + return getParent() == null; + } + + /** + * Returns the final component of this path. + * + * @return the final component of this path + */ + public String getName() { + String path = uri.getPath(); + int slash = path.lastIndexOf(SEPARATOR); + return path.substring(slash+1); + } + + /** + * Returns the parent of a path or null if at root. + * @return the parent of a path or null if at root + */ + public Path getParent() { + String path = uri.getPath(); + int lastSlash = path.lastIndexOf('/'); + int start = startPositionWithoutWindowsDrive(path); + if ((path.length() == start) || // empty path + (lastSlash == start && path.length() == start+1)) { // at root + return null; + } + String parent; + if (lastSlash==-1) { + parent = CUR_DIR; + } else { + parent = path.substring(0, lastSlash==start?start+1:lastSlash); + } + return new Path(uri.getScheme(), uri.getAuthority(), parent); + } + + /** + * Adds a suffix to the final name in the path. + * + * @param suffix the suffix to add + * @return a new path with the suffix added + */ + public Path suffix(String suffix) { + return new Path(getParent(), getName()+suffix); + } + + @Override + public String toString() { + // we can't use uri.toString(), which escapes everything, because we want + // illegal characters unescaped in the string, for glob processing, etc. + StringBuilder buffer = new StringBuilder(); + if (uri.getScheme() != null) { + buffer.append(uri.getScheme()) + .append(":"); + } + if (uri.getAuthority() != null) { + buffer.append("//") + .append(uri.getAuthority()); + } + if (uri.getPath() != null) { + String path = uri.getPath(); + if (path.indexOf('/')==0 && + hasWindowsDrive(path) && // has windows drive + uri.getScheme() == null && // but no scheme + uri.getAuthority() == null) { // or authority + path = path.substring(1); // remove slash before drive + } + buffer.append(path); + } + if (uri.getFragment() != null) { + buffer.append("#") + .append(uri.getFragment()); + } + return buffer.toString(); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Path)) { + return false; + } + Path that = (Path)o; + return this.uri.equals(that.uri); + } + + @Override + public int hashCode() { + return uri.hashCode(); + } + + @Override + public int compareTo(Path o) { + return this.uri.compareTo(o.uri); + } + + /** + * Returns the number of elements in this path. + * @return the number of elements in this path + */ + public int depth() { + String path = uri.getPath(); + int depth = 0; + int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; + while (slash != -1) { + depth++; + slash = path.indexOf(SEPARATOR, slash+1); + } + return depth; + } + + /** + * Returns a qualified path object. + * + * @param defaultUri if this path is missing the scheme or authority + * components, borrow them from this URI + * @param workingDir if this path isn't absolute, treat it as relative to this + * working directory + * @return this path if it contains a scheme and authority and is absolute, or + * a new path that includes a path and authority and is fully qualified + */ + public Path makeQualified(URI defaultUri, Path workingDir) { + Path path = this; + if (!isAbsolute()) { + path = new Path(workingDir, this); + } + + URI pathUri = path.toUri(); + + String scheme = pathUri.getScheme(); + String authority = pathUri.getAuthority(); + String fragment = pathUri.getFragment(); + + if (scheme != null && + (authority != null || defaultUri.getAuthority() == null)) { + return path; + } + + if (scheme == null) { + scheme = defaultUri.getScheme(); + } + + if (authority == null) { + authority = defaultUri.getAuthority(); + if (authority == null) { + authority = ""; + } + } + + URI newUri = null; + try { + newUri = new URI(scheme, authority , + normalizePath(scheme, pathUri.getPath()), null, fragment); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + return new Path(newUri); + } + + /** + * Validate the contents of a deserialized Path, so as + * to defend against malicious object streams. + * @throws InvalidObjectException if there's no URI + */ + @Override + public void validateObject() throws InvalidObjectException { + if (uri == null) { + throw new InvalidObjectException("No URI in deserialized Path"); + } + + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/fs/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/package-info.java new file mode 100644 index 00000000000..8767f5e64c7 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/fs/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities to represent files. + */ +package io.delta.kernel.fs; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/package-info.java new file mode 100644 index 00000000000..4aecea587ab --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/package-info.java @@ -0,0 +1,21 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Delta Kernel interfaces for constructing table object representing a Delta Lake table, getting + * snapshot from the table and building a scan object to scan a subset of the data in the table. + */ +package io.delta.kernel; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java new file mode 100644 index 00000000000..a4ee9d28196 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ArrayType.java @@ -0,0 +1,37 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class ArrayType extends DataType { + public static ArrayType EMPTY_INSTANCE = new ArrayType(null, false); + + private final DataType elementType; + private final boolean containsNull; + + public ArrayType(DataType elementType, boolean containsNull) { + this.elementType = elementType; + this.containsNull = containsNull; + } + + public DataType getElementType() { + return elementType; + } + + public boolean containsNull() { + return containsNull; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java new file mode 100644 index 00000000000..745ad0e97a9 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java @@ -0,0 +1,23 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class BooleanType extends DataType { + public static final BooleanType INSTANCE = new BooleanType(); + + private BooleanType() { } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java new file mode 100644 index 00000000000..7f1223e00bf --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DataType.java @@ -0,0 +1,58 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +import java.util.Locale; + +public abstract class DataType { + + public static DataType createPrimitive(String typeName) { + if (typeName.equals(IntegerType.INSTANCE.typeName())) return IntegerType.INSTANCE; + if (typeName.equals(LongType.INSTANCE.typeName())) return LongType.INSTANCE; + if (typeName.equals(StringType.INSTANCE.typeName())) return StringType.INSTANCE; + if (typeName.equals(BooleanType.INSTANCE.typeName())) return BooleanType.INSTANCE; + + throw new IllegalArgumentException( + String.format("Can't create primitive for type type %s", typeName) + ); + } + + public String typeName() { + String name = this.getClass().getSimpleName(); + if (name.endsWith("Type")) { + name = name.substring(0, name.length() - 4); + } + return name.toLowerCase(Locale.ROOT); + } + public boolean equivalent(DataType dt) { + return this.equals(dt); + } + + @Override + public String toString() { + return typeName(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DataType that = (DataType) o; + return typeName().equals(that.typeName()); + } +} + diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java new file mode 100644 index 00000000000..f1802fd71b8 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java @@ -0,0 +1,23 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class IntegerType extends DataType { + public static final IntegerType INSTANCE = new IntegerType(); + + private IntegerType() { } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java new file mode 100644 index 00000000000..3bb545a313e --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java @@ -0,0 +1,23 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class LongType extends DataType { + public static final LongType INSTANCE = new LongType(); + + private LongType() { } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java new file mode 100644 index 00000000000..c6c9bd0bf8c --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MapType.java @@ -0,0 +1,44 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class MapType extends DataType { + + public static final MapType EMPTY_INSTANCE = new MapType(null, null, false); + + private final DataType keyType; + private final DataType valueType; + private final boolean valueContainsNull; + + public MapType(DataType keyType, DataType valueType, boolean valueContainsNull) { + this.keyType = keyType; + this.valueType = valueType; + this.valueContainsNull = valueContainsNull; + } + + public DataType getKeyType() { + return keyType; + } + + public DataType getValueType() { + return valueType; + } + + public boolean isValueContainsNull() { + return valueContainsNull; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java new file mode 100644 index 00000000000..9f8f6895745 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java @@ -0,0 +1,23 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +public class StringType extends DataType { + public static final StringType INSTANCE = new StringType(); + + private StringType() { } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java new file mode 100644 index 00000000000..d9d43f6c1a6 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java @@ -0,0 +1,99 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +import java.util.Map; + +import io.delta.kernel.data.Row; + +public class StructField { + + //////////////////////////////////////////////////////////////////////////////// + // Static Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + // TODO: docs + public static StructField fromRow(Row row) { + final String name = row.getString(0); + final DataType type = UnresolvedDataType.fromRow(row, 1); + final boolean nullable = row.getBoolean(2); + final Map metadata = row.getMap(3); + return new StructField(name, type, nullable, metadata); + } + + // TODO: docs + public static final StructType READ_SCHEMA = new StructType() + .add("name", StringType.INSTANCE) + .add("type", UnresolvedDataType.INSTANCE) + .add("nullable", BooleanType.INSTANCE) + .add("metadata", new MapType(StringType.INSTANCE, StringType.INSTANCE, false)); + + //////////////////////////////////////////////////////////////////////////////// + // Instance Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + private final String name; + private final DataType dataType; + private final boolean nullable; + private final Map metadata; + // private final FieldMetadata metadata; + + public StructField( + String name, + DataType dataType, + boolean nullable, + Map metadata) { + this.name = name; + this.dataType = dataType; + this.nullable = nullable; + this.metadata = metadata; + } + + /** + * @return the name of this field + */ + public String getName() { + return name; + } + + /** + * @return the data type of this field + */ + public DataType getDataType() { + return dataType; + } + + /** + * @return the metadata for this field + */ + public Map getMetadata() { + return metadata; + } + + /** + * @return whether this field allows to have a {@code null} value. + */ + public boolean isNullable() { + return nullable; + } + + @Override + public String toString() { + return String.format("StructField(name=%s,type=%s,nullable=%s,metadata=%s)", + name, dataType, nullable, "empty(fix - this)"); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java new file mode 100644 index 00000000000..aca1f7b5dc0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructType.java @@ -0,0 +1,159 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +import java.util.*; +import java.util.stream.Collectors; + +import io.delta.kernel.data.Row; +import io.delta.kernel.expressions.Column; +import io.delta.kernel.utils.Tuple2; + +public final class StructType extends DataType { + + //////////////////////////////////////////////////////////////////////////////// + // Static Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + public static StructType EMPTY_INSTANCE = new StructType(); + + // TODO: docs + public static StructType fromRow(Row row) { + final List fields = row.getList(0); + return new StructType( + fields + .stream() + .map(StructField::fromRow) + .collect(Collectors.toList()) + ); + } + + // TODO: docs + public static StructType READ_SCHEMA = new StructType() + .add("fields", new ArrayType(StructField.READ_SCHEMA, false /* contains null */ )); + + //////////////////////////////////////////////////////////////////////////////// + // Instance Fields / Methods + //////////////////////////////////////////////////////////////////////////////// + + private final Map> nameToFieldAndOrdinal; + private final List fields; + private final List fieldNames; + + public StructType() { + this(new ArrayList<>()); + } + + public StructType(List fields) { + this.fields = fields; + this.fieldNames = fields.stream().map(f -> f.getName()).collect(Collectors.toList()); + + this.nameToFieldAndOrdinal = new HashMap<>(); + for (int i = 0; i < fields.size(); i++) { + nameToFieldAndOrdinal.put(fields.get(i).getName(), new Tuple2<>(fields.get(i), i)); + } + } + + public StructType add(StructField field) { + final List fieldsCopy = new ArrayList<>(fields); + fieldsCopy.add(field); + + return new StructType(fieldsCopy); + } + + public StructType add(String name, DataType dataType) { + return add(new StructField(name, dataType, true /* nullable */, + new HashMap())); + } + + public StructType add(String name, DataType dataType, Map metadata) { + return add(new StructField(name, dataType, true /* nullable */, metadata)); + } + + /** + * @return array of fields + */ + public List fields() { + return Collections.unmodifiableList(fields); + } + + /** + * @return array of field names + */ + public List fieldNames() { + return fieldNames; + } + + /** + * @return the number of fields + */ + public int length() { + return fields.size(); + } + + public int indexOf(String fieldName) { + return fieldNames.indexOf(fieldName); + } + + public StructField get(String fieldName) { + return nameToFieldAndOrdinal.get(fieldName)._1; + } + + public StructField at(int index) { + return fields.get(index); + } + + /** + * Creates a {@link Column} expression for the field at the given {@code ordinal} + * + * @param ordinal the ordinal of the {@link StructField} to create a column for + * @return a {@link Column} expression for the {@link StructField} with ordinal {@code ordinal} + */ + public Column column(int ordinal) { + final StructField field = at(ordinal); + return new Column(ordinal, field.getName(), field.getDataType()); + } + + /** + * Creates a {@link Column} expression for the field with the given {@code fieldName}. + * + * @param fieldName the name of the {@link StructField} to create a column for + * @return a {@link Column} expression for the {@link StructField} with name {@code fieldName} + */ + public Column column(String fieldName) { + Tuple2 fieldAndOrdinal = nameToFieldAndOrdinal.get(fieldName); + System.out.println("Created column " + fieldName + " with ordinal " + fieldAndOrdinal._2); + return new Column(fieldAndOrdinal._2, fieldName, fieldAndOrdinal._1.getDataType()); + } + + @Override + public String toString() { + return String.format( + "%s(%s)", + getClass().getSimpleName(), + fields.stream().map(StructField::toString).collect(Collectors.joining(", ")) + ); + } + + /** + * @return a readable indented tree representation of this {@code StructType} + * and all of its nested elements + */ + public String treeString() { + return "TODO"; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java new file mode 100644 index 00000000000..10ad336c2ca --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/UnresolvedDataType.java @@ -0,0 +1,40 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.types; + +import io.delta.kernel.data.Row; + +/** + * TODO: this needs to be removed, for now ignore this. + */ +public class UnresolvedDataType extends DataType { + + public static final UnresolvedDataType INSTANCE = new UnresolvedDataType(); + + public static DataType fromRow(Row row, int ordinal) { + try { + // e.g. IntegerType -> {"name":"as_int","type":"integer","nullable":true,"metadata":{} + // e.g. LongType -> {"name":"as_long","type":"long","nullable":true,"metadata":{}} + final String typeName = row.getString(ordinal); + return DataType.createPrimitive(typeName); + } catch (RuntimeException ex) { + throw new RuntimeException("Failed to parse UnresolvedDataType"); + } + } + + private UnresolvedDataType() { } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/package-info.java new file mode 100644 index 00000000000..830064eb4a0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/package-info.java @@ -0,0 +1,21 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Data types defined by the Delta Kernel to exchange the type info between the Delta Kernel and + * the connectors. + */ +package io.delta.kernel.types; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/CloseableIterator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/CloseableIterator.java new file mode 100644 index 00000000000..d32e92ca8c0 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/CloseableIterator.java @@ -0,0 +1,61 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.utils; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; +import java.util.function.Consumer; +import java.util.function.Function; + +public interface CloseableIterator extends Iterator, Closeable { + default CloseableIterator map(Function mapper) { + CloseableIterator delegate = this; + return new CloseableIterator() { + @Override + public void remove() + { + delegate.remove(); + } + + @Override + public void forEachRemaining(Consumer action) + { + this.forEachRemaining(action); + } + + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public U next() + { + return mapper.apply(delegate.next()); + } + + @Override + public void close() + throws IOException + { + delegate.close(); + } + }; + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Tuple2.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Tuple2.java new file mode 100644 index 00000000000..0d8de078d4f --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Tuple2.java @@ -0,0 +1,43 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.utils; + +import java.util.Objects; + +public class Tuple2 { + + public final K _1; + public final V _2; + + public Tuple2(K _1, V _2){ + this._1 = _1; + this._2 = _2; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Tuple2 tuple2 = (Tuple2) o; + return Objects.equals(_1, tuple2._1) && Objects.equals(_2, tuple2._2); + } + + @Override + public int hashCode() { + return Objects.hash(_1, _2); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java new file mode 100644 index 00000000000..a7162528078 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java @@ -0,0 +1,125 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.kernel.utils; + +import java.io.IOException; + +import io.delta.kernel.Scan; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.Row; +import io.delta.kernel.fs.FileStatus; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructType; + +public class Utils { + /** + * Utility method to create a singleton {@link CloseableIterator}. + * + * @param elem Element to create iterator with. + * @param Element type. + * @return A {@link CloseableIterator} with just one element. + */ + public static CloseableIterator singletonCloseableIterator(T elem) { + return new CloseableIterator() { + private boolean accessed; + + @Override + public void close() throws IOException { + // nothing to close + } + + @Override + public boolean hasNext() { + return !accessed; + } + + @Override + public T next() { + accessed = true; + return elem; + } + }; + } + + /** + * Utility method to create a singleton string {@link ColumnVector} + * + * @param value the string element to create the vector with + * @return A {@link ColumnVector} with a single element {@code value} + */ + // TODO: add String to method name or make generic? + public static ColumnVector singletonColumnVector(String value) { + return new ColumnVector() { + @Override + public DataType getDataType() + { + return StringType.INSTANCE; + } + + @Override + public int getSize() + { + return 1; + } + + @Override + public void close() {} + + @Override + public boolean isNullAt(int rowId) + { + return value == null; + } + + @Override + public String getString(int rowId) + { + if (rowId != 0) { + throw new IllegalArgumentException("Invalid row id: " + rowId); + } + return value; + } + }; + } + + /** + * Utility method to get the physical schema from the scan state {@link Row} returned by + * {@link Scan#getScanState(TableClient)}. + * + * @param scanState Scan state {@link Row} + * @return Physical schema to read from the data files. + */ + public static StructType getPhysicalSchema(Row scanState) { + // TODO needs io.delta.kernel.internal.data.ScanStateRow + throw new UnsupportedOperationException("not implemented yet"); + } + + /** + * Get the {@link FileStatus} from given scan file {@link Row}. The {@link FileStatus} contains + * file metadata about the scan file. + * + * @param scanFileInfo {@link Row} representing one scan file. + * @return a {@link FileStatus} object created from the given scan file row. + */ + public static FileStatus getFileStatus(Row scanFileInfo) { + String path = scanFileInfo.getString(0); + Long size = scanFileInfo.getLong(2); + + return FileStatus.of(path, size, 0); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/package-info.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/package-info.java new file mode 100644 index 00000000000..263b656fbf4 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/package-info.java @@ -0,0 +1,20 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utilities. + */ +package io.delta.kernel.utils; diff --git a/kernel/project/plugins.sbt b/kernel/project/plugins.sbt index 5f54199a54f..7f4c9acc2ad 100644 --- a/kernel/project/plugins.sbt +++ b/kernel/project/plugins.sbt @@ -26,9 +26,8 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.0.1") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.15") -// TODO: let's add java checkstyle in a separate PR -// addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") +addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") // By default, sbt-checkstyle-plugin uses checkstyle version 6.15, but we should set it to use the // same version as Spark -// dependencyOverrides += "com.puppycrawl.tools" % "checkstyle" % "8.43" +dependencyOverrides += "com.puppycrawl.tools" % "checkstyle" % "8.43"