Skip to content

Commit

Permalink
[SPARK-38236][SQL] Treat table location as absolute when the first le…
Browse files Browse the repository at this point in the history
…tter of its path is slash in create/alter table

After apache#28527, we change to create table under the database location when the table location is relative. However the criteria to determine if a table location is relative/absolute is `URI.isAbsolute`, which basically checks if the table location URI has a scheme defined. So table URIs like `/table/path` are treated as relative and the scheme and authority of the database location URI are used to create the table. For example, when the database location URI is `s3a://bucket/db`, the table will be created at `s3a://bucket/table/path`, while it should be created under the file system defined in `SessionCatalog.hadoopConf` instead.

This change fixes that by treating table location as absolute when the first letter of its path is slash.

This also applies to alter table.

This is to fix the behavior described above.

Yes. When users try to create/alter a table with a location that starts with a slash but without a scheme defined, the table will be created under/altered to the file system defined in `SessionCatalog.hadoopConf`, instead of the one defined in the database location URI.

Updated unit tests.

Closes apache#35462 from bozhang2820/spark-31709.

Authored-by: Bo Zhang <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
  • Loading branch information
bozhang2820 committed Feb 21, 2022
1 parent a963b35 commit 89d4cc1
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ abstract class AvroSuite

override protected def beforeAll(): Unit = {
super.beforeAll()
// initialize SessionCatalog here so it has a clean hadoopConf
spark.sessionState.catalog
spark.conf.set(SQLConf.FILES_MAX_PARTITION_BYTES.key, 1024)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
.appName("MLlibUnitTest")
.getOrCreate()
sc = spark.sparkContext
// initialize SessionCatalog here so it has a clean hadoopConf
spark.sessionState.catalog

checkpointDir = Utils.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString
sc.setCheckpointDir(checkpointDir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,8 @@ class SessionCatalog(
private def makeQualifiedTablePath(locationUri: URI, database: String): URI = {
if (locationUri.isAbsolute) {
locationUri
} else if (new Path(locationUri).isAbsolute) {
makeQualifiedPath(locationUri)
} else {
val dbName = formatDatabaseName(database)
val dbLocation = makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ CREATE TABLE `default`.`tbl` (
`b` STRING,
`c` INT)
USING parquet
LOCATION 'file:/path/to/table'
LOCATION 'file:///path/to/table'


-- !query
Expand Down Expand Up @@ -110,7 +110,7 @@ CREATE TABLE `default`.`tbl` (
`b` STRING,
`c` INT)
USING parquet
LOCATION 'file:/path/to/table'
LOCATION 'file:///path/to/table'


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
import org.apache.spark.sql.util.CaseInsensitiveStringMap
Expand Down Expand Up @@ -60,7 +60,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
super.beforeAll()
val catalog = newCatalog()
catalog.createNamespace(Array("db"), emptyProps)
catalog.createNamespace(Array("db2"), emptyProps)
catalog.createNamespace(Array("db2"),
Map(SupportsNamespaces.PROP_LOCATION -> "file:///db2.db").asJava)
catalog.createNamespace(Array("ns"), emptyProps)
catalog.createNamespace(Array("ns2"), emptyProps)
}
Expand Down Expand Up @@ -186,10 +187,17 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
catalog.dropTable(testIdent)

// absolute path
// absolute path without scheme
properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
val t3 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
assert(t3.catalogTable.location.toString === "file:/absolute/path")
assert(t3.catalogTable.location.toString === "file:///absolute/path")
catalog.dropTable(testIdent)

// absolute path with scheme
properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
val t4 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
assert(t4.catalogTable.location.toString === "file:/absolute/path")
catalog.dropTable(testIdent)
}

test("tableExists") {
Expand Down Expand Up @@ -686,10 +694,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path")).asInstanceOf[V1Table]
assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))

// absolute path
// absolute path without scheme
val t3 = catalog.alterTable(testIdent,
TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path")).asInstanceOf[V1Table]
assert(t3.catalogTable.location.toString === "file:/absolute/path")
assert(t3.catalogTable.location.toString === "file:///absolute/path")

// absolute path with scheme
val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
assert(t4.catalogTable.location.toString === "file:/absolute/path")
}

test("dropTable") {
Expand Down

0 comments on commit 89d4cc1

Please sign in to comment.