diff --git a/clade/proto/schema.proto b/clade/proto/schema.proto index d58cbd09..5d726731 100644 --- a/clade/proto/schema.proto +++ b/clade/proto/schema.proto @@ -11,16 +11,18 @@ message TableObject { string name = 1; // Path within the provided storage location, if any string path = 2; - // URL of the root storage location - optional string location = 3; + // Storage location identifier + optional string store = 4; } // A single root storage location, hosting many individual tables message StorageLocation { - // URL of the root storage location + // URL of the storage location root string location = 1; // Connection options for the object store client map options = 2; + // Unique storage location identifier + string name = 3; } message ListSchemaRequest { diff --git a/src/catalog/metastore.rs b/src/catalog/metastore.rs index 622d4946..98786df4 100644 --- a/src/catalog/metastore.rs +++ b/src/catalog/metastore.rs @@ -26,6 +26,9 @@ use std::str::FromStr; use std::sync::Arc; use url::Url; +// Root URL for a storage location alongside client connection options +type LocationAndOptions = (String, HashMap); + // This is the main entrypoint to all individual catalogs for various objects types. // The intention is to make it extensible and de-coupled from the underlying metastore // persistence mechanism (such as the presently used `Repository`). @@ -97,7 +100,7 @@ impl Metastore { let store_options = catalog_schemas .stores .into_iter() - .map(|store| (store.location, store.options)) + .map(|store| (store.name, (store.location, store.options))) .collect(); // Turn the list of all collections, tables and their columns into a nested map. @@ -119,7 +122,7 @@ impl Metastore { async fn build_schema( &self, schema: SchemaObject, - store_options: &HashMap>, + store_options: &HashMap, ) -> CatalogResult<(Arc, Arc)> { let schema_name = schema.name; @@ -140,7 +143,7 @@ impl Metastore { async fn build_table( &self, table: TableObject, - store_options: &HashMap>, + store_options: &HashMap, ) -> CatalogResult<(Arc, Arc)> { // Build a delta table but don't load it yet; we'll do that only for tables that are // actually referenced in a statement, via the async `table` method of the schema provider. @@ -148,13 +151,13 @@ impl Metastore { // delta tables present in the database. The real fix for this is to make DF use `TableSource` // for the information schema, and then implement `TableSource` for `DeltaTable` in delta-rs. - let table_log_store = match table.location { + let table_log_store = match table.store { // Use the provided customized location - Some(location) => { - let this_store_options = store_options - .get(&location) + Some(name) => { + let (location, this_store_options) = store_options + .get(&name) .ok_or(CatalogError::Generic { - reason: format!("Object store for location {location} not found"), + reason: format!("Object store with name {name} not found"), })? .clone(); diff --git a/src/catalog/repository.rs b/src/catalog/repository.rs index 07595429..278c8d07 100644 --- a/src/catalog/repository.rs +++ b/src/catalog/repository.rs @@ -130,7 +130,7 @@ impl SchemaStore for RepositoryStore { Some(TableObject { name: name.clone(), path: uuid.to_string(), - location: None, + store: None, }) } else { None diff --git a/tests/fixtures.rs b/tests/fixtures.rs index f50c71c4..46832963 100644 --- a/tests/fixtures.rs +++ b/tests/fixtures.rs @@ -29,7 +29,7 @@ pub fn schemas() -> ListSchemaResponse { tables: vec![TableObject { name: "file".to_string(), path: "delta-0.8.0-partitioned".to_string(), - location: None, + store: None, }], }, SchemaObject { @@ -37,7 +37,7 @@ pub fn schemas() -> ListSchemaResponse { tables: vec![TableObject { name: "minio".to_string(), path: "test-data/delta-0.8.0-partitioned".to_string(), - location: Some("s3://seafowl-test-bucket".to_string()), + store: Some("minio".to_string()), }], }, SchemaObject { @@ -45,12 +45,13 @@ pub fn schemas() -> ListSchemaResponse { tables: vec![TableObject { name: "fake".to_string(), path: "delta-0.8.0-partitioned".to_string(), - location: Some("gs://test-data".to_string()), + store: Some("fake-gcs".to_string()), }], }, ], stores: vec![ StorageLocation { + name: "minio".to_string(), location: "s3://seafowl-test-bucket".to_string(), options: HashMap::from([ ( @@ -74,6 +75,7 @@ pub fn schemas() -> ListSchemaResponse { ]), }, StorageLocation { + name: "fake-gcs".to_string(), location: "gs://test-data".to_string(), options: HashMap::from([( GoogleConfigKey::ServiceAccount.as_ref().to_string(),