-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce row ID write table feature and table property
This change adds knobs to enable Row IDs when creating a new Delta table, as defined in the Row ID specification (#1610): - Write table feature `rowIds`: Require writers to support row IDs. Used to enable row IDs on newly created tables. - Table property: `rowIds.enabled`: Indicate whether all rows have an assigned row ID. - SQL conf `rowIds.allowForDevOnly`: restrict the use of Row IDs to testing for now. Adding test suite RowIdSuite: - Test enabling Row IDs on a new table succeeds - Test enabling Row IDs on an existing table fails. Closes #1702 GitOrigin-RevId: 961ff72f1ae7abf1f08d53052062ce20669d4aad
- Loading branch information
1 parent
c47445e
commit 8272ee9
Showing
7 changed files
with
238 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
core/src/main/scala/org/apache/spark/sql/delta/RowId.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.delta | ||
|
||
import org.apache.spark.sql.delta.actions.{Metadata, Protocol} | ||
import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.propertyKey | ||
import org.apache.spark.sql.delta.sources.DeltaSQLConf | ||
|
||
import org.apache.spark.sql.SparkSession | ||
|
||
/** | ||
* Collection of helpers to handle Row IDs. | ||
*/ | ||
object RowId { | ||
|
||
/** | ||
* Returns whether Row IDs can be written to Delta tables and read from Delta tables. This acts as | ||
* a feature flag during development: every Row ID code path should be hidden behind this flag and | ||
* behave as if Row IDs didn't exist when this returns false to avoid leaking an incomplete | ||
* implementation. | ||
*/ | ||
def rowIdsAllowed(spark: SparkSession): Boolean = { | ||
spark.conf.get(DeltaSQLConf.ROW_IDS_ALLOWED) | ||
} | ||
|
||
/** | ||
* Returns whether the protocol version supports the Row ID table feature. Whenever Row IDs are | ||
* supported, fresh Row IDs must be assigned to all newly committed files, even when Row IDs are | ||
* disabled in the current table version. | ||
*/ | ||
def rowIdsSupported(protocol: Protocol): Boolean = { | ||
protocol.isFeatureSupported(RowIdFeature) | ||
} | ||
|
||
/** | ||
* Returns whether Row IDs are enabled on this table version. Checks that Row IDs are supported, | ||
* which is a pre-requisite for enabling Row IDs, throws an error if not. | ||
*/ | ||
def rowIdsEnabled(protocol: Protocol, metadata: Metadata): Boolean = { | ||
val isEnabled = DeltaConfigs.ROW_IDS_ENABLED.fromMetaData(metadata) | ||
if (isEnabled && !rowIdsSupported(protocol)) { | ||
throw new IllegalStateException(s"Table property '${DeltaConfigs.ROW_IDS_ENABLED.key}' is" + | ||
s"set on the table but this table version doesn't support table feature " + | ||
s"'${propertyKey(RowIdFeature)}'.") | ||
} | ||
isEnabled | ||
} | ||
|
||
/** | ||
* Marks row IDs as readable if the row ID writer feature is enabled on a new table and | ||
* verifies that row IDs are only set as readable when a new table is created. | ||
*/ | ||
private[delta] def verifyAndUpdateMetadata( | ||
spark: SparkSession, | ||
protocol: Protocol, | ||
oldMetadata: Metadata, | ||
newMetadata: Metadata, | ||
isCreatingNewTable: Boolean): Metadata = { | ||
if (!rowIdsAllowed(spark)) return newMetadata | ||
val latestMetadata = if (isCreatingNewTable && rowIdsSupported(protocol)) { | ||
val newConfig = newMetadata.configuration + (DeltaConfigs.ROW_IDS_ENABLED.key -> "true") | ||
newMetadata.copy(configuration = newConfig) | ||
} else { | ||
newMetadata | ||
} | ||
|
||
val rowIdsEnabledBefore = rowIdsEnabled(protocol, oldMetadata) | ||
val rowIdsEnabledAfter = rowIdsEnabled(protocol, latestMetadata) | ||
|
||
if (rowIdsEnabledAfter && !rowIdsEnabledBefore && !isCreatingNewTable) { | ||
throw new UnsupportedOperationException( | ||
"Cannot enable Row IDs on an existing table.") | ||
} | ||
latestMetadata | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
core/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.delta.rowid | ||
|
||
import org.apache.spark.sql.delta.{DeltaLog, RowId} | ||
import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.TABLE_FEATURES_MIN_WRITER_VERSION | ||
|
||
import org.apache.spark.sql.QueryTest | ||
import org.apache.spark.sql.catalyst.TableIdentifier | ||
import org.apache.spark.sql.test.SharedSparkSession | ||
|
||
class RowIdSuite extends QueryTest | ||
with SharedSparkSession | ||
with RowIdTestUtils { | ||
test("Creating a new table with row ID table feature sets row IDs as readable") { | ||
withRowIdsEnabled(enabled = false) { | ||
withTable("tbl") { | ||
spark.range(10).write.format("delta") | ||
.option(rowIdFeatureName, "supported").saveAsTable("tbl") | ||
|
||
val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) | ||
assert(RowId.rowIdsEnabled(log.update().protocol, log.update().metadata)) | ||
} | ||
} | ||
} | ||
|
||
test("Enabling row IDs on existing table does not set row IDs as readable") { | ||
withRowIdsEnabled(enabled = false) { | ||
withTable("tbl") { | ||
spark.range(10).write.format("delta") | ||
.saveAsTable("tbl") | ||
|
||
sql( | ||
s""" | ||
|ALTER TABLE tbl | ||
|SET TBLPROPERTIES ( | ||
|'$rowIdFeatureName' = 'supported', | ||
|'delta.minWriterVersion' = $TABLE_FEATURES_MIN_WRITER_VERSION)""".stripMargin) | ||
|
||
val log = DeltaLog.forTable(spark, TableIdentifier("tbl")) | ||
assert(RowId.rowIdsSupported(log.update().protocol)) | ||
assert(!RowId.rowIdsEnabled(log.update().protocol, log.update().metadata)) | ||
} | ||
} | ||
} | ||
} |
47 changes: 47 additions & 0 deletions
47
core/src/test/scala/org/apache/spark/sql/delta/rowid/RowIdTestUtils.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.delta.rowid | ||
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
import org.apache.spark.sql.delta.RowIdFeature | ||
import org.apache.spark.sql.delta.actions.TableFeatureProtocolUtils.{defaultPropertyKey, propertyKey} | ||
import org.apache.spark.sql.delta.sources.DeltaSQLConf | ||
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest | ||
|
||
import org.apache.spark.SparkConf | ||
import org.apache.spark.sql.QueryTest | ||
import org.apache.spark.sql.test.SharedSparkSession | ||
|
||
trait RowIdTestUtils extends QueryTest | ||
with SharedSparkSession | ||
with DeltaSQLCommandTest { | ||
|
||
val rowIdFeatureName: String = propertyKey(RowIdFeature) | ||
val defaultRowIdFeatureProperty: String = defaultPropertyKey(RowIdFeature) | ||
|
||
override protected def sparkConf: SparkConf = | ||
super.sparkConf.set(DeltaSQLConf.ROW_IDS_ALLOWED.key, "true") | ||
|
||
def withRowIdsEnabled(enabled: Boolean)(f: => Unit): Unit = { | ||
// Even when we don't want Row Ids on created tables, we want to enable code paths that | ||
// interact with them, which is controlled by this config. | ||
assert(spark.conf.get(DeltaSQLConf.ROW_IDS_ALLOWED.key) == "true") | ||
val configPairs = if (enabled) Seq(defaultRowIdFeatureProperty -> "enabled") else Seq.empty | ||
withSQLConf(configPairs: _*)(f) | ||
} | ||
} |