From 56f35fa280c895469ebeb7bea1bdb12c24d6c5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:08:29 +0200 Subject: [PATCH 01/17] Implement crate object_store_factory --- Cargo.lock | 81 +++++++++-- Cargo.toml | 3 +- object_store_factory/Cargo.toml | 19 +++ object_store_factory/src/aws.rs | 225 +++++++++++++++++++++++++++++ object_store_factory/src/google.rs | 169 ++++++++++++++++++++++ object_store_factory/src/lib.rs | 70 +++++++++ object_store_factory/src/local.rs | 74 ++++++++++ object_store_factory/src/memory.rs | 17 +++ src/object_store/factory.rs | 4 + 9 files changed, 649 insertions(+), 13 deletions(-) create mode 100644 object_store_factory/Cargo.toml create mode 100644 object_store_factory/src/aws.rs create mode 100644 object_store_factory/src/google.rs create mode 100644 object_store_factory/src/lib.rs create mode 100644 object_store_factory/src/local.rs create mode 100644 object_store_factory/src/memory.rs diff --git a/Cargo.lock b/Cargo.lock index 3143bb76..92c7099c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1873,7 +1873,7 @@ dependencies = [ "itertools 0.12.1", "log", "num_cpus", - "object_store 0.10.1", + "object_store 0.10.2", "parking_lot", "parquet", "paste", @@ -1905,7 +1905,7 @@ dependencies = [ "instant", "libc", "num_cpus", - "object_store 0.10.1", + "object_store 0.10.2", "parquet", "sqlparser 0.47.0", ] @@ -1931,7 +1931,7 @@ dependencies = [ "futures", "hashbrown 0.14.5", "log", - "object_store 0.10.1", + "object_store 0.10.2", "parking_lot", "rand 0.8.5", "tempfile", @@ -2164,7 +2164,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-proto-common", - "object_store 0.10.1", + "object_store 0.10.2", "prost", ] @@ -2337,7 +2337,7 @@ dependencies = [ "num-bigint", "num-traits", "num_cpus", - "object_store 0.10.1", + "object_store 0.10.2", "once_cell", "parking_lot", "parquet", @@ -2465,6 +2465,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + [[package]] name = "either" version = "1.13.0" @@ -2721,6 +2727,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fragile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" + [[package]] name = "frunk" version = "0.4.2" @@ -4007,6 +4019,32 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "mockall" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c28b3fb6d753d28c20e826cd46ee611fda1cf3cde03a443a974043247c065a" +dependencies = [ + "cfg-if", + "downcast", + "fragile", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "341014e7f530314e9a1fdbc7400b244efea7122662c96bfa248c31da5bfb2020" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "syn 2.0.70", +] + [[package]] name = "moka" version = "0.12.8" @@ -4345,9 +4383,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" +checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" dependencies = [ "async-trait", "base64 0.22.1", @@ -4356,7 +4394,7 @@ dependencies = [ "futures", "humantime", "hyper 1.4.1", - "itertools 0.12.1", + "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", @@ -4374,6 +4412,24 @@ dependencies = [ "walkdir", ] +[[package]] +name = "object_store_factory" +version = "0.1.0" +dependencies = [ + "async-trait", + "futures", + "mockall", + "object_store 0.10.2", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-log", + "tracing-subscriber", + "url", +] + [[package]] name = "once_cell" version = "1.19.0" @@ -4534,7 +4590,7 @@ dependencies = [ "lz4_flex", "num", "num-bigint", - "object_store 0.10.1", + "object_store 0.10.2", "paste", "seq-macro", "snap", @@ -5017,9 +5073,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.31.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +checksum = "4091e032efecb09d7b1f711f487b85ab925632a842627e3200fb088382cde32c" dependencies = [ "memchr", "serde", @@ -5902,7 +5958,8 @@ dependencies = [ "metrics", "metrics-exporter-prometheus", "moka", - "object_store 0.10.1", + "object_store 0.10.2", + "object_store_factory", "percent-encoding", "prost", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index f297c8ed..55a0c14f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["clade"] +members = ["clade", "object_store_factory"] [workspace.dependencies] arrow = { version = "52.0.0", features = ["test_utils"] } @@ -80,6 +80,7 @@ base64 = "0.21.0" bytes = "1.4.0" chrono = { version = "0.4", default-features = false } clade = { path = "clade" } +object_store_factory = { path = "object_store_factory" } clap = { version = "3.2.19", features = [ "derive" ] } config = "0.13.3" diff --git a/object_store_factory/Cargo.toml b/object_store_factory/Cargo.toml new file mode 100644 index 00000000..c3950bcc --- /dev/null +++ b/object_store_factory/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "object_store_factory" +version = "0.1.0" +edition = "2021" + +[dependencies] +async-trait = "0.1.64" +tokio = { version = "1", features = ["full"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +url = "2.5" + +tracing = { workspace = true } +tracing-log = "0.2" +tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] } +object_store = { version = "0.10.1", features = ["aws", "azure", "gcp"] } +mockall = "0.13.0" +futures = "0.3.30" +tempfile = "3.10.1" diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs new file mode 100644 index 00000000..1df5d5a8 --- /dev/null +++ b/object_store_factory/src/aws.rs @@ -0,0 +1,225 @@ +use object_store::{ + aws::resolve_bucket_region, aws::AmazonS3Builder, aws::AmazonS3ConfigKey, + ClientOptions, ObjectStore, +}; +use std::collections::HashMap; +use std::env; +use tracing::info; +use url::Url; + +pub struct Config { + pub region: Option, + pub access_key_id: Option, + pub secret_access_key: Option, + pub session_token: Option, + pub endpoint: Option, + pub bucket: String, + pub _prefix: Option, +} + +impl Config { + pub fn from_hashmap( + map: &HashMap, + ) -> Result { + Ok(Self { + region: map.get("region").map(|s| s.to_string()), + access_key_id: map.get("access_key_id").map(|s| s.to_string()), + secret_access_key: map.get("secret_access_key").map(|s| s.to_string()), + session_token: map.get("session_token").map(|s| s.to_string()), + endpoint: map.get("endpoint").map(|s| s.to_string()), + bucket: map.get("bucket").unwrap().clone(), + _prefix: map.get("prefix").map(|s| s.to_string()), + }) + } +} + +pub fn build_amazon_s3_from_config( + config: &Config, +) -> Result, object_store::Error> { + let mut builder = AmazonS3Builder::new() + .with_region(config.region.clone().unwrap_or_default()) + .with_bucket_name(config.bucket.clone()) + .with_allow_http(true); + + if let Some(endpoint) = &config.endpoint { + builder = builder.with_endpoint(endpoint.clone()); + } + + if let (Some(access_key_id), Some(secret_access_key)) = + (&config.access_key_id, &config.secret_access_key) + { + builder = builder + .with_access_key_id(access_key_id.clone()) + .with_secret_access_key(secret_access_key.clone()); + + if let Some(token) = &config.session_token { + builder = builder.with_token(token.clone()) + } + } else { + builder = builder.with_skip_signature(true) + } + + let store = builder.build()?; + Ok(Box::new(store)) +} + +pub async fn add_amazon_s3_specific_options( + url: &Url, + mut options: HashMap, +) -> HashMap { + if !options.contains_key(AmazonS3ConfigKey::Bucket.as_ref()) + && !options.contains_key(AmazonS3ConfigKey::Endpoint.as_ref()) + { + let region = detect_region(url).await.unwrap(); + options.insert("region".to_string(), region.to_string()); + } + options +} + +pub fn add_amazon_s3_environment_variables( + mut options: HashMap, +) -> HashMap { + let env_vars = &[ + ("AWS_ACCESS_KEY_ID", "access_key_id"), + ("AWS_SECRET_ACCESS_KEY", "secret_access_key"), + ("AWS_DEFAULT_REGION", "region"), + ("AWS_ENDPOINT", "endpoint"), + ("AWS_SESSION_TOKEN", "session_token"), + ("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", "credentials_uri"), + ]; + + for &(env_var_name, config_key) in env_vars.iter() { + if let Ok(val) = env::var(env_var_name) { + options.insert(config_key.to_string(), val); + } + } + + if env::var("AWS_ALLOW_HTTP") == Ok("true".to_string()) { + options.insert("allow_http".to_string(), "true".to_string()); + } + + options +} + +// For "real" S3, if we don't have a region passed to us, we have to figure it out +// ourselves (note this won't work with HTTP paths that are actually S3, but those +// usually include the region already). +pub async fn detect_region(url: &Url) -> Result { + let bucket = url.host_str().ok_or(object_store::Error::Generic { + store: "parse_url", + source: format!("Could not find a bucket in S3 path {0}", url).into(), + })?; + + info!("Autodetecting region for bucket {}", bucket); + let region = resolve_bucket_region(bucket, &ClientOptions::new()).await?; + + info!("Using autodetected region {} for bucket {}", region, bucket); + + Ok(region) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_config_from_hashmap_with_all_fields() { + let mut map = HashMap::new(); + map.insert("region".to_string(), "us-west-2".to_string()); + map.insert("access_key_id".to_string(), "access_key".to_string()); + map.insert("secret_access_key".to_string(), "secret_key".to_string()); + map.insert("session_token".to_string(), "session_token".to_string()); + map.insert("endpoint".to_string(), "http://localhost:9000".to_string()); + map.insert("bucket".to_string(), "my-bucket".to_string()); + map.insert("prefix".to_string(), "my-prefix".to_string()); + + let config = + Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + assert_eq!(config.region, Some("us-west-2".to_string())); + assert_eq!(config.access_key_id, Some("access_key".to_string())); + assert_eq!(config.secret_access_key, Some("secret_key".to_string())); + assert_eq!(config.session_token, Some("session_token".to_string())); + assert_eq!(config.endpoint, Some("http://localhost:9000".to_string())); + assert_eq!(config.bucket, "my-bucket".to_string()); + assert_eq!(config._prefix, Some("my-prefix".to_string())); + } + + #[test] + fn test_config_from_hashmap_with_missing_optional_fields() { + let mut map = HashMap::new(); + map.insert("region".to_string(), "us-west-2".to_string()); + map.insert("bucket".to_string(), "my-bucket".to_string()); + + let config = + Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + assert_eq!(config.region, Some("us-west-2".to_string())); + assert!(config.access_key_id.is_none()); + assert!(config.secret_access_key.is_none()); + assert!(config.session_token.is_none()); + assert!(config.endpoint.is_none()); + assert_eq!(config.bucket, "my-bucket".to_string()); + assert!(config._prefix.is_none()); + } + + #[test] + #[should_panic(expected = "called `Option::unwrap()` on a `None` value")] + fn test_config_from_hashmap_without_required_fields() { + let map = HashMap::new(); + Config::from_hashmap(&map).unwrap(); // Missing "region" and "bucket" + } + + #[test] + fn test_build_amazon_s3_from_config_with_all_fields() { + let config = Config { + region: Some("us-west-2".to_string()), + access_key_id: Some("access_key".to_string()), + secret_access_key: Some("secret_key".to_string()), + session_token: Some("session_token".to_string()), + endpoint: Some("http://localhost:9000".to_string()), + bucket: "my-bucket".to_string(), + _prefix: Some("my-prefix".to_string()), + }; + + let result = build_amazon_s3_from_config(&config); + + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + + let store = result.unwrap(); + let debug_output = format!("{:?}", store); + + assert!(debug_output.contains("region: \"us-west-2\"")); + assert!(debug_output.contains("bucket: \"my-bucket\"")); + assert!(debug_output.contains("endpoint: Some(\"http://localhost:9000\")")); + assert!(debug_output.contains("key_id: \"access_key\"")); + assert!(debug_output.contains("secret_key: \"secret_key\"")); + assert!(debug_output.contains("token: Some(\"session_token\")")); + } + + #[test] + fn test_build_amazon_s3_from_config_with_missing_optional_fields() { + let config = Config { + region: Some("us-west-2".to_string()), + access_key_id: None, + secret_access_key: None, + session_token: None, + endpoint: None, + bucket: "my-bucket".to_string(), + _prefix: None, + }; + + let result = build_amazon_s3_from_config(&config); + + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + + let store = result.unwrap(); + let debug_output = format!("{:?}", store); + + assert!(debug_output.contains("region: \"us-west-2\"")); + assert!(debug_output.contains("bucket: \"my-bucket\"")); + assert!(debug_output.contains("endpoint: None")); + assert_eq!(debug_output.contains("key_id: \"\""), false); + assert_eq!(debug_output.contains("secret_key: \"\""), false); + assert_eq!(debug_output.contains("token: \"\""), false); + } +} diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs new file mode 100644 index 00000000..97420fcf --- /dev/null +++ b/object_store_factory/src/google.rs @@ -0,0 +1,169 @@ +use object_store::{gcp::GoogleCloudStorageBuilder, ObjectStore}; +use std::collections::HashMap; +use std::env; + +pub struct Config { + pub bucket: String, + pub _prefix: Option, + pub google_application_credentials: Option, +} + +impl Config { + pub fn from_hashmap( + map: &HashMap, + ) -> Result { + Ok(Self { + bucket: map.get("bucket").unwrap().clone(), + _prefix: map.get("prefix").map(|s| s.to_string()), + google_application_credentials: map + .get("google_application_credentials") + .map(|s| s.to_string()), + }) + } +} + +pub fn build_google_cloud_storage_from_config( + config: &Config, +) -> Result, object_store::Error> { + let mut builder: GoogleCloudStorageBuilder = + GoogleCloudStorageBuilder::new().with_bucket_name(&config.bucket.clone()); + + builder = if let Some(path) = &config.google_application_credentials { + builder.with_service_account_path(path.clone()) + } else { + builder + }; + + let store = builder.build()?; + Ok(Box::new(store)) +} + +pub fn add_google_cloud_storage_environment_variables( + mut options: HashMap, +) -> HashMap { + let env_vars = &[ + ("GOOGLE_SERVICE_ACCOUNT", "service_account_file"), + ("GOOGLE_SERVICE_ACCOUNT_PATH", "service_account_path"), + ("SERVICE_ACCOUNT", "service_account_file"), + ("GOOGLE_SERVICE_ACCOUNT_KEY", "service_account_key"), + ("GOOGLE_BUCKET", "bucket"), + ("GOOGLE_BUCKET_NAME", "bucket"), + ]; + + for &(env_var_name, config_key) in env_vars.iter() { + if let Ok(val) = env::var(env_var_name) { + options.insert(config_key.to_string(), val); + } + } + + options +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::fs; + use tempfile::NamedTempFile; + + #[test] + fn test_config_from_hashmap_with_all_fields() { + let mut map = HashMap::new(); + map.insert("bucket".to_string(), "my-bucket".to_string()); + map.insert("prefix".to_string(), "my-prefix".to_string()); + map.insert( + "google_application_credentials".to_string(), + "/path/to/credentials.json".to_string(), + ); + + let config = + Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + assert_eq!(config.bucket, "my-bucket"); + assert_eq!(config._prefix, Some("my-prefix".to_string())); + assert_eq!( + config.google_application_credentials, + Some("/path/to/credentials.json".to_string()) + ); + } + + #[test] + fn test_config_from_hashmap_with_missing_optional_fields() { + let mut map = HashMap::new(); + map.insert("bucket".to_string(), "my-bucket".to_string()); + + let config = + Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + assert_eq!(config.bucket, "my-bucket"); + assert!(config._prefix.is_none()); + assert!(config.google_application_credentials.is_none()); + } + + #[test] + #[should_panic(expected = "called `Option::unwrap()` on a `None` value")] + fn test_config_from_hashmap_without_bucket() { + let map = HashMap::new(); + Config::from_hashmap(&map).unwrap(); + } + + #[test] + fn test_build_google_cloud_storage_from_config_with_all_fields() { + // Create a temporary file for the credentials + let temp_file = NamedTempFile::new().expect("Failed to create temporary file"); + + // Write some mock credentials to the file + let credentials_content = r#"{ + "type": "service_account", + "project_id": "gcloud-123456", + "private_key_id": "123456", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCxjzFuu7kO+sfY\nXPq0EZo1Oth9YjCyrhIQr6XavJQyD/OT9gcd9Q5+/VvLwCXBijEgVdXFQf5Tcsh2\ndpp/hOjGuc7Lh9Kk+DtebUDZ9AIF92LvRX2yKJJ4a6zqV9iEqCfxAhSrwsYMLnp0\nGbxG0ACUR/VdLv8U2ctNDG4DL8jk6yYowABbsL/074GOFWtwW99w1BJb09+l0f2l\njIom15iY897W1gjOBskM7fsHm3WwlCwD/+4PPodp8PRIjvefnMwx7E0Lu6IcJ8Kg\n4Rhm1Rk5hJWKWEgQHmZ4ik4kc/FKdHRMGERkMY5VVYoZ6bUx7OdhF7Vt3HVZDA88\nsx9fbTBxAgMBAAECggEAAWSAHMA4KVfqLVY9WSAyN2yougMFIsGevqbCBD8qYmIh\npO1vDNsZLAHMsIJnSWdOD1TdAlkMJ5dk3xj7CTj/ol9esdX03vpbbNgqhAsX4PgZ\nvIqs+7K5w1wE1SmvNwsilQ9RHi++4eWTbEmvYlbLSl5uHDb8JSu4HniUfE3po3H5\nWDj01OMSe9dhaXrzhqOn2qo37XJ9xF1VCSkY3JRj3cY7W7crVE3UmDyYT+ZE1Tei\nyYhrZh1QDFeQVCFiHEP3RA1T/MYaFn1ylkwGcvgFvoB81vOJaVEXh1Xldwx/6KZC\nyrXBlnVqa//IuCtEE4zTl146G99kRdQFrAdqTadlSQKBgQDauQefH+zCpxTaO03E\nlzGoXr9mxo6Rzhim60e+uDgkCnDhElc3rqiuxFH6QNORa2/A/zvc7iHYZsu8QAvB\n776S9rrpxHoc1271fLqzMBR6gDkTzh/MjUJnsPNjnfehE2h6U8Zoeq755Xv9S85I\nuk9bIJzs5JH6xBEDxnIb/ier5wKBgQDP0i9jTb5TgrcqYYpjURsHGQRv+6lOaZrC\nD94vNDmhTLg3kW5b2BD0ZeZwGCwiSOSqL/5fjlRie94pPnIn6pm5uGgndgdRLQvw\nIdpRyvAUAOY7SnoLhZjVue4syzwV3k7+d4x7LrzpZclBH8uc3sLU3vOSsmFRIkf+\nfK9qcVv15wKBgQDL2fHRi/algQW9U9JqbKQakZwAVQThvd1aDSVECvxAEv8btnVV\nb1LF+DGTdUH6YdC5ZujLQ6KFx2ERZfvPV/wdixmv8LADG4LOB98WTLR5a/JGlDEs\n+2ctr01YxgzasnUItfXQwK8+N3U1Iab0P7jgbOf1Hh80QfK9uwH1Nw6QdwKBgCuP\nigFNpWxJxOzsPx6sPHcTZlu2q3lVJ2wv+Ul5r+7AbwiuwiwcMQmZZmDuoCmbj9qg\nbrhG1CdEgX+xqCn3wbstDR/gXI5GW+88mU91szbuLVQWO1i46x05eNQI0ZJf47zx\nABA97rkZbcLp0DsUclA+X13LaByii+aq6fXsxvLXAoGBALzkBzJ/SOvotz/UnBxl\nGU9QWmptZttaqtLKizPNQZpY1KO9VxeyoGbkTnN0M58ktpIp8LGlSJejk/tkRKBG\nUFRW/v49GW3eCgl4D+MOTFLCJDT68D2lp4F9hdBHsoH17ZdHy8rennmJN3QExIjx\n0xoq6OYjjzNwhFqkPl0H6HrM\n-----END PRIVATE KEY-----\n", + "client_email": "mail@gcloud-123456.iam.gserviceaccount.com", + "client_id": "102784232161964177687", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gcloud-acc%40gcloud-123456.iam.gserviceaccount.com" + }"#; + + fs::write(temp_file.path(), credentials_content) + .expect("Failed to write to temporary file"); + + let config = Config { + bucket: "my-bucket".to_string(), + _prefix: Some("my-prefix".to_string()), + google_application_credentials: Some( + temp_file.path().to_str().unwrap().to_string(), + ), + }; + + let result = build_google_cloud_storage_from_config(&config); + + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + + let store = result.unwrap(); + let debug_output = format!("{:?}", store); + + println!("{:?}", debug_output); + + // Check the configuration values + assert!(debug_output.contains("bucket_name: \"my-bucket\"")); + } + + #[test] + fn test_build_google_cloud_storage_from_config_with_missing_optional_fields() { + let config = Config { + bucket: "my-bucket".to_string(), + _prefix: None, + google_application_credentials: None, + }; + + let result = build_google_cloud_storage_from_config(&config); + + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + + let store = result.unwrap(); + let debug_output = format!("{:?}", store); + + // Check the configuration values + assert!(debug_output.contains("bucket_name: \"my-bucket\"")); + } +} diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs new file mode 100644 index 00000000..63b73ab0 --- /dev/null +++ b/object_store_factory/src/lib.rs @@ -0,0 +1,70 @@ +mod aws; +mod google; +mod local; +mod memory; + +use object_store::parse_url_opts; +use object_store::ObjectStore; +use object_store::ObjectStoreScheme; +use std::collections::HashMap; +use tracing::warn; +use url::Url; + +pub struct Config { + pub settings: HashMap, +} + +pub fn build_object_store_from_config( + scheme: ObjectStoreScheme, + config: HashMap, +) -> Result, object_store::Error> { + match scheme { + ObjectStoreScheme::Memory => memory::build_in_memory_storage(), + ObjectStoreScheme::Local => { + let file_config = local::Config::from_hashmap(&config)?; + local::build_local_storage(&file_config) + } + ObjectStoreScheme::AmazonS3 => { + let aws_config = aws::Config::from_hashmap(&config)?; + aws::build_amazon_s3_from_config(&aws_config) + } + ObjectStoreScheme::GoogleCloudStorage => { + let google_config = google::Config::from_hashmap(&config)?; + google::build_google_cloud_storage_from_config(&google_config) + } + _ => { + warn!("Unsupported scheme: {:?}", scheme); + return Err(object_store::Error::Generic { + store: "unsupported_url_scheme", + source: format!("Unsupported scheme: {:?}", scheme).into(), + }); + } + } +} + +pub async fn build_object_store_from_opts( + url: &Url, + mut options: HashMap, +) -> Result, object_store::Error> { + let (scheme, _) = ObjectStoreScheme::parse(&url).unwrap(); + + match scheme { + ObjectStoreScheme::AmazonS3 => { + options = aws::add_amazon_s3_specific_options(url, options).await; + options = aws::add_amazon_s3_environment_variables(options); + } + ObjectStoreScheme::GoogleCloudStorage => { + options = google::add_google_cloud_storage_environment_variables(options); + } + _ => { + warn!("Unsupported URL scheme: {}", url); + return Err(object_store::Error::Generic { + store: "unsupported_url_scheme", + source: format!("Unsupported URL scheme: {}", url).into(), + }); + } + } + + let store = parse_url_opts(&url, &options)?.0; + Ok(store) +} diff --git a/object_store_factory/src/local.rs b/object_store_factory/src/local.rs new file mode 100644 index 00000000..b33f73e8 --- /dev/null +++ b/object_store_factory/src/local.rs @@ -0,0 +1,74 @@ +use object_store::{local::LocalFileSystem, ObjectStore}; +use std::collections::HashMap; + +#[derive(Debug)] +pub struct Config { + pub data_dir: String, +} + +impl Config { + pub fn from_hashmap( + map: &HashMap, + ) -> Result { + Ok(Self { + data_dir: map.get("data_dir").unwrap().clone(), + }) + } +} + +pub fn build_local_storage( + config: &Config, +) -> Result, object_store::Error> { + let store = LocalFileSystem::new_with_prefix(config.data_dir.clone())?; + Ok(Box::new(store)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn test_config_from_hashmap_with_data_dir() { + let mut map = HashMap::new(); + map.insert("data_dir".to_string(), "/tmp/data".to_string()); + + let config = + Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + assert_eq!(config.data_dir, "/tmp/data".to_string()); + } + + #[test] + #[should_panic(expected = "called `Option::unwrap()` on a `None` value")] + fn test_config_from_hashmap_without_data_dir() { + let map = HashMap::new(); + // This test will panic because data_dir is missing, which causes unwrap() to panic. + Config::from_hashmap(&map).unwrap(); + } + + #[test] + fn test_build_local_storage_with_valid_config() { + let temp_dir = tempdir().expect("Failed to create temporary directory"); + let data_dir = temp_dir + .path() + .to_str() + .expect("Failed to convert path to string"); + + let config = Config { + data_dir: data_dir.to_string(), + }; + + let result = build_local_storage(&config); + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + } + + #[test] + fn test_build_local_storage_with_invalid_path() { + let config = Config { + data_dir: "".to_string(), + }; + + let result = build_local_storage(&config); + assert!(result.is_err(), "Expected Err due to invalid path, got Ok"); + } +} diff --git a/object_store_factory/src/memory.rs b/object_store_factory/src/memory.rs new file mode 100644 index 00000000..3f87e9b3 --- /dev/null +++ b/object_store_factory/src/memory.rs @@ -0,0 +1,17 @@ +use object_store::{memory::InMemory, ObjectStore}; + +pub fn build_in_memory_storage() -> Result, object_store::Error> { + let store = InMemory::new(); + Ok(Box::new(store)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_build_in_memory_storage() { + let result = build_in_memory_storage(); + assert!(result.is_ok(), "Expected Ok, got Err: {:?}", result); + } +} diff --git a/src/object_store/factory.rs b/src/object_store/factory.rs index e9faea5c..3f9bde7e 100644 --- a/src/object_store/factory.rs +++ b/src/object_store/factory.rs @@ -18,6 +18,7 @@ use object_store::{ prefix::PrefixStore, ClientOptions, ObjectStore, }; +use object_store_factory; use tracing::info; use url::Url; @@ -202,6 +203,9 @@ impl ObjectStoreFactory { used_options.insert("region".to_string(), region.to_string()); }; + let env_variables = + object_store_factory::parse_env_variables(&key.url); + used_options.extend(env_variables.into_iter()); let mut store = parse_url_opts(&key.url, &used_options)?.0.into(); if !(key.url.scheme() == "file" || key.url.scheme() == "memory") From 38e1f7f6d38aa1fc1e370fde06098aea1c4fc870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:09:16 +0200 Subject: [PATCH 02/17] Implement to_config() methods on ObjectStore schemas --- src/config/schema.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/config/schema.rs b/src/config/schema.rs index e4d01cb9..18ac03f2 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -65,6 +65,17 @@ pub enum ObjectStore { GCS(GCS), } +impl ObjectStore { + pub fn to_config(&self) -> HashMap { + match self { + ObjectStore::Local(local) => local.to_config(), + ObjectStore::InMemory(memory) => memory.to_config(), + ObjectStore::S3(s3) => s3.to_config(), + ObjectStore::GCS(gcs) => gcs.to_config(), + } + } +} + /// Build a default config file and struct pub fn build_default_config() -> (String, SeafowlConfig) { let dsn = Path::new(DEFAULT_DATA_DIR) @@ -119,9 +130,24 @@ pub struct Local { pub data_dir: String, } +impl Local { + pub fn to_config(&self) -> HashMap { + let mut config = HashMap::new(); + config.insert("data_dir".to_string(), self.data_dir.clone()); + config + } +} + #[derive(Deserialize, Debug, PartialEq, Eq, Clone)] pub struct InMemory {} +impl InMemory { + pub fn to_config(&self) -> HashMap { + // InMemory does not have any specific configurations. + HashMap::new() + } +} + #[derive(Deserialize, Debug, PartialEq, Eq, Clone)] pub struct S3 { pub region: Option, @@ -149,6 +175,30 @@ impl S3 { prefix: None, }) } + + pub fn to_config(&self) -> HashMap { + let mut config = HashMap::new(); + if let Some(region) = &self.region { + config.insert("region".to_string(), region.clone()); + } + if let Some(access_key_id) = &self.access_key_id { + config.insert("access_key_id".to_string(), access_key_id.clone()); + } + if let Some(secret_access_key) = &self.secret_access_key { + config.insert("secret_access_key".to_string(), secret_access_key.clone()); + } + if let Some(session_token) = &self.session_token { + config.insert("session_token".to_string(), session_token.clone()); + } + if let Some(endpoint) = &self.endpoint { + config.insert("endpoint".to_string(), endpoint.clone()); + } + config.insert("bucket".to_string(), self.bucket.clone()); + if let Some(prefix) = &self.prefix { + config.insert("prefix".to_string(), prefix.clone()); + } + config + } } #[derive(Deserialize, Debug, PartialEq, Eq, Clone)] @@ -170,6 +220,21 @@ impl GCS { .remove("format.google_application_credentials"), } } + + pub fn to_config(&self) -> HashMap { + let mut config = HashMap::new(); + config.insert("bucket".to_string(), self.bucket.clone()); + if let Some(prefix) = &self.prefix { + config.insert("prefix".to_string(), prefix.clone()); + } + if let Some(credentials) = &self.google_application_credentials { + config.insert( + "google_application_credentials".to_string(), + credentials.clone(), + ); + } + config + } } #[derive(Deserialize, Debug, PartialEq, Eq, Clone)] From d9a49f1091363b206c3cee79c653d94dfef5c9c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:09:36 +0200 Subject: [PATCH 03/17] Change factory to use new crate methods --- src/object_store/factory.rs | 132 ++++++------------------------------ 1 file changed, 21 insertions(+), 111 deletions(-) diff --git a/src/object_store/factory.rs b/src/object_store/factory.rs index 3f9bde7e..8e8f4f5f 100644 --- a/src/object_store/factory.rs +++ b/src/object_store/factory.rs @@ -9,20 +9,11 @@ use deltalake::{ storage::{FactoryRegistry, ObjectStoreRef, StorageOptions}, DeltaResult, DeltaTableError, Path, }; -use object_store::{ - aws::{resolve_bucket_region, AmazonS3Builder, AmazonS3ConfigKey}, - gcp::GoogleCloudStorageBuilder, - local::LocalFileSystem, - memory::InMemory, - parse_url_opts, - prefix::PrefixStore, - ClientOptions, ObjectStore, -}; +use object_store::{prefix::PrefixStore, ObjectStore, ObjectStoreScheme}; use object_store_factory; -use tracing::info; use url::Url; -use crate::config::schema::{self, ObjectCacheProperties, SeafowlConfig, GCS, S3}; +use crate::config::schema::{self, ObjectCacheProperties, SeafowlConfig}; use super::{cache::CachingObjectStore, wrapped::InternalObjectStore}; @@ -30,75 +21,22 @@ pub fn build_object_store( object_store_cfg: &schema::ObjectStore, cache_properties: &Option, ) -> Result, object_store::Error> { - Ok(match &object_store_cfg { - schema::ObjectStore::Local(schema::Local { data_dir }) => { - Arc::new(LocalFileSystem::new_with_prefix(data_dir)?) - } - schema::ObjectStore::InMemory(_) => Arc::new(InMemory::new()), + let scheme = match &object_store_cfg { + schema::ObjectStore::Local(_) => ObjectStoreScheme::Local, + schema::ObjectStore::InMemory(_) => ObjectStoreScheme::Memory, #[cfg(feature = "object-store-s3")] - schema::ObjectStore::S3(S3 { - region, - access_key_id, - secret_access_key, - session_token, - endpoint, - bucket, - .. - }) => { - let mut builder = AmazonS3Builder::new() - .with_region(region.clone().unwrap_or_default()) - .with_bucket_name(bucket) - .with_allow_http(true); - - if let Some(endpoint) = endpoint { - builder = builder.with_endpoint(endpoint); - } - - if let (Some(access_key_id), Some(secret_access_key)) = - (&access_key_id, &secret_access_key) - { - builder = builder - .with_access_key_id(access_key_id) - .with_secret_access_key(secret_access_key); - - if let Some(token) = session_token { - builder = builder.with_token(token) - } - } else { - builder = builder.with_skip_signature(true) - } - - let store = builder.build()?; - - if let Some(props) = cache_properties { - Arc::new(CachingObjectStore::new_from_config(props, Arc::new(store))) - } else { - Arc::new(store) - } - } + schema::ObjectStore::S3(_) => ObjectStoreScheme::AmazonS3, #[cfg(feature = "object-store-gcs")] - schema::ObjectStore::GCS(GCS { - bucket, - google_application_credentials, - .. - }) => { - let gcs_builder: GoogleCloudStorageBuilder = - GoogleCloudStorageBuilder::new().with_bucket_name(bucket); - - let gcs_builder = if let Some(path) = google_application_credentials { - gcs_builder.with_service_account_path(path) - } else { - gcs_builder - }; + schema::ObjectStore::GCS(_) => ObjectStoreScheme::GoogleCloudStorage, + }; - let store = gcs_builder.build()?; + let config = object_store_cfg.to_config(); + let store = object_store_factory::build_object_store_from_config(scheme, config)?; - if let Some(props) = cache_properties { - Arc::new(CachingObjectStore::new_from_config(props, Arc::new(store))) - } else { - Arc::new(store) - } - } + Ok(if let Some(props) = cache_properties { + Arc::new(CachingObjectStore::new_from_config(props, Arc::new(store))) + } else { + Arc::new(store) }) } @@ -164,7 +102,7 @@ impl ObjectStoreFactory { table_path: String, ) -> Result, object_store::Error> { let store = { - let mut used_options = options.clone(); + let used_options = options.clone(); let key = StoreCacheKey { url: url.clone(), options, @@ -173,40 +111,12 @@ impl ObjectStoreFactory { match self.custom_stores.get_mut(&key) { Some(store) => store.clone(), None => { - if (key.url.scheme() == "s3" || key.url.scheme() == "s3a") - && !used_options.contains_key(AmazonS3ConfigKey::Bucket.as_ref()) - && !used_options - .contains_key(AmazonS3ConfigKey::Endpoint.as_ref()) - { - // For "real" S3, if we don't have a region passed to us, we have to figure it out - // ourselves (note this won't work with HTTP paths that are actually S3, but those - // usually include the region already). - - let bucket = - key.url.host_str().ok_or(object_store::Error::Generic { - store: "parse_url", - source: format!( - "Could not find a bucket in S3 path {0}", - key.url - ) - .into(), - })?; - - info!("Autodetecting region for bucket {}", bucket); - let region = - resolve_bucket_region(bucket, &ClientOptions::new()).await?; - info!( - "Using autodetected region {} for bucket {}", - region, bucket - ); - - used_options.insert("region".to_string(), region.to_string()); - }; - - let env_variables = - object_store_factory::parse_env_variables(&key.url); - used_options.extend(env_variables.into_iter()); - let mut store = parse_url_opts(&key.url, &used_options)?.0.into(); + let mut store = object_store_factory::build_object_store_from_opts( + &url, + used_options, + ) + .await? + .into(); if !(key.url.scheme() == "file" || key.url.scheme() == "memory") && let Some(ref cache) = self.object_store_cache From 1c9f72a2e7afdf5082da0185d7d8aeca0781a69d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:22:17 +0200 Subject: [PATCH 04/17] Clean up --- Cargo.lock | 42 --------------------------------- object_store_factory/Cargo.toml | 12 ++++------ object_store_factory/src/lib.rs | 31 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 92c7099c..5a1f07de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2465,12 +2465,6 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" -[[package]] -name = "downcast" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" - [[package]] name = "either" version = "1.13.0" @@ -2727,12 +2721,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fragile" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" - [[package]] name = "frunk" version = "0.4.2" @@ -4019,32 +4007,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "mockall" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c28b3fb6d753d28c20e826cd46ee611fda1cf3cde03a443a974043247c065a" -dependencies = [ - "cfg-if", - "downcast", - "fragile", - "mockall_derive", - "predicates", - "predicates-tree", -] - -[[package]] -name = "mockall_derive" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "341014e7f530314e9a1fdbc7400b244efea7122662c96bfa248c31da5bfb2020" -dependencies = [ - "cfg-if", - "proc-macro2", - "quote", - "syn 2.0.70", -] - [[package]] name = "moka" version = "0.12.8" @@ -4418,12 +4380,8 @@ version = "0.1.0" dependencies = [ "async-trait", "futures", - "mockall", "object_store 0.10.2", - "serde", - "serde_json", "tempfile", - "tokio", "tracing", "tracing-log", "tracing-subscriber", diff --git a/object_store_factory/Cargo.toml b/object_store_factory/Cargo.toml index c3950bcc..89bbd593 100644 --- a/object_store_factory/Cargo.toml +++ b/object_store_factory/Cargo.toml @@ -5,15 +5,11 @@ edition = "2021" [dependencies] async-trait = "0.1.64" -tokio = { version = "1", features = ["full"] } -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -url = "2.5" +futures = "0.3.30" +object_store = { version = "0.10.1", features = ["aws", "azure", "gcp"] } +tempfile = "3.10.1" tracing = { workspace = true } tracing-log = "0.2" tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] } -object_store = { version = "0.10.1", features = ["aws", "azure", "gcp"] } -mockall = "0.13.0" -futures = "0.3.30" -tempfile = "3.10.1" +url = "2.5" diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index 63b73ab0..812e8689 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -10,10 +10,26 @@ use std::collections::HashMap; use tracing::warn; use url::Url; +/// Configuration for object storage, holding settings in a key-value format. pub struct Config { pub settings: HashMap, } +/// Builds an object store based on the specified scheme and configuration. +/// +/// # Arguments +/// +/// * `scheme` - The scheme to use for object storage, such as Memory, Local, AmazonS3, or GoogleCloudStorage. +/// * `config` - A hash map containing configuration settings needed to build the object store. +/// +/// # Returns +/// +/// Returns a `Result` that, on success, contains a boxed `ObjectStore` trait object. On failure, it returns an `object_store::Error` +/// indicating what went wrong, such as an unsupported scheme. +/// +/// # Errors +/// +/// * If the scheme is not supported or the configuration is invalid, an error is returned. pub fn build_object_store_from_config( scheme: ObjectStoreScheme, config: HashMap, @@ -42,6 +58,21 @@ pub fn build_object_store_from_config( } } +/// Builds an object store based on the URL and options provided. +/// +/// # Arguments +/// +/// * `url` - The URL that determines the object store scheme and configuration. +/// * `options` - A hash map containing configuration options for the object store, which may be modified based on the URL scheme. +/// +/// # Returns +/// +/// Returns a `Result` that, on success, contains a boxed `ObjectStore` trait object. On failure, it returns an `object_store::Error` +/// indicating what went wrong, such as an unsupported URL scheme. +/// +/// # Errors +/// +/// * If the URL scheme is unsupported or there is an error parsing the URL or options, an error is returned. pub async fn build_object_store_from_opts( url: &Url, mut options: HashMap, From 9b12d911f4e17bae27ad4bd98f0121bff0574cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:29:45 +0200 Subject: [PATCH 05/17] Fix toml formatting --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 55a0c14f..a60422d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,7 +80,6 @@ base64 = "0.21.0" bytes = "1.4.0" chrono = { version = "0.4", default-features = false } clade = { path = "clade" } -object_store_factory = { path = "object_store_factory" } clap = { version = "3.2.19", features = [ "derive" ] } config = "0.13.3" @@ -107,6 +106,7 @@ metrics = { version = "0.22.1" } metrics-exporter-prometheus = { version = "0.13.1" } moka = { version = "0.12.5", default-features = false, features = ["future", "atomic64", "quanta"] } object_store = "0.10.1" +object_store_factory = { path = "object_store_factory" } percent-encoding = "2.2.0" prost = "0.12.1" From 981338943bd1d82eb09e5e8fc02617e0e2b92fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:36:12 +0200 Subject: [PATCH 06/17] Address clippy warnings --- object_store_factory/src/google.rs | 2 +- object_store_factory/src/lib.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs index 97420fcf..cb23eae1 100644 --- a/object_store_factory/src/google.rs +++ b/object_store_factory/src/google.rs @@ -26,7 +26,7 @@ pub fn build_google_cloud_storage_from_config( config: &Config, ) -> Result, object_store::Error> { let mut builder: GoogleCloudStorageBuilder = - GoogleCloudStorageBuilder::new().with_bucket_name(&config.bucket.clone()); + GoogleCloudStorageBuilder::new().with_bucket_name(config.bucket.clone()); builder = if let Some(path) = &config.google_application_credentials { builder.with_service_account_path(path.clone()) diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index 812e8689..6e30857a 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -50,10 +50,10 @@ pub fn build_object_store_from_config( } _ => { warn!("Unsupported scheme: {:?}", scheme); - return Err(object_store::Error::Generic { + Err(object_store::Error::Generic { store: "unsupported_url_scheme", source: format!("Unsupported scheme: {:?}", scheme).into(), - }); + }) } } } @@ -77,7 +77,7 @@ pub async fn build_object_store_from_opts( url: &Url, mut options: HashMap, ) -> Result, object_store::Error> { - let (scheme, _) = ObjectStoreScheme::parse(&url).unwrap(); + let (scheme, _) = ObjectStoreScheme::parse(url).unwrap(); match scheme { ObjectStoreScheme::AmazonS3 => { @@ -96,6 +96,6 @@ pub async fn build_object_store_from_opts( } } - let store = parse_url_opts(&url, &options)?.0; + let store = parse_url_opts(url, &options)?.0; Ok(store) } From d11b61c1aae171e36887ba04d491cf09bb872b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 16:44:16 +0200 Subject: [PATCH 07/17] Fix clippy complaints --- object_store_factory/src/aws.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs index 1df5d5a8..e7ce0274 100644 --- a/object_store_factory/src/aws.rs +++ b/object_store_factory/src/aws.rs @@ -218,8 +218,8 @@ mod tests { assert!(debug_output.contains("region: \"us-west-2\"")); assert!(debug_output.contains("bucket: \"my-bucket\"")); assert!(debug_output.contains("endpoint: None")); - assert_eq!(debug_output.contains("key_id: \"\""), false); - assert_eq!(debug_output.contains("secret_key: \"\""), false); - assert_eq!(debug_output.contains("token: \"\""), false); + assert!(!debug_output.contains("key_id: \"\"")); + assert!(!debug_output.contains("secret_key: \"\"")); + assert!(!debug_output.contains("token: \"\"")); } } From f97f54e59a5b2dcb903f3f891714742c58a27eea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 21:35:44 +0200 Subject: [PATCH 08/17] Update Cargo.toml --- Cargo.lock | 4 ++-- Cargo.toml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a1f07de..778469d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5031,9 +5031,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.36.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4091e032efecb09d7b1f711f487b85ab925632a842627e3200fb088382cde32c" +checksum = "96a05e2e8efddfa51a84ca47cec303fac86c8541b686d37cac5efc0e094417bc" dependencies = [ "memchr", "serde", diff --git a/Cargo.toml b/Cargo.toml index a60422d9..493d1bd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["clade", "object_store_factory"] +members = ["clade"] [workspace.dependencies] arrow = { version = "52.0.0", features = ["test_utils"] } @@ -105,8 +105,8 @@ lazy_static = ">=1.4.0" metrics = { version = "0.22.1" } metrics-exporter-prometheus = { version = "0.13.1" } moka = { version = "0.12.5", default-features = false, features = ["future", "atomic64", "quanta"] } -object_store = "0.10.1" -object_store_factory = { path = "object_store_factory" } +object_store = "0.10.2" +object_store_factory = { version = "0.1.0", path = "object_store_factory" } percent-encoding = "2.2.0" prost = "0.12.1" From d6225ac946c5f078e19eefd54c0f99f43dd5fc8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 21:42:56 +0200 Subject: [PATCH 09/17] Update Cargo.toml on object_store_factory --- object_store_factory/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object_store_factory/Cargo.toml b/object_store_factory/Cargo.toml index 89bbd593..1a510d22 100644 --- a/object_store_factory/Cargo.toml +++ b/object_store_factory/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] async-trait = "0.1.64" futures = "0.3.30" -object_store = { version = "0.10.1", features = ["aws", "azure", "gcp"] } +object_store = { version = "0.10.2", features = ["aws", "azure", "gcp"] } tempfile = "3.10.1" tracing = { workspace = true } From c9b24b14208f0d67c17aaffc252a51339626d90d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Wed, 24 Jul 2024 23:55:34 +0200 Subject: [PATCH 10/17] Add object_store_factory to workspace --- Cargo.lock | 266 ++++++++++++++++++++++++++--------------------------- Cargo.toml | 4 +- 2 files changed, 134 insertions(+), 136 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 778469d4..ed0a6719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,9 +131,9 @@ checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "arrayref" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" @@ -453,9 +453,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" +checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" dependencies = [ "bzip2", "flate2", @@ -499,7 +499,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -510,7 +510,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -622,9 +622,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.34.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdcfae7bf8b8f14cade7579ffa8956fcee91dc23633671096b4b5de7d16f682a" +checksum = "6acca681c53374bf1d9af0e317a41d12a44902ca0f2d1e10e5cb5bb98ed74f35" dependencies = [ "aws-credential-types", "aws-runtime", @@ -644,9 +644,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.35.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b30def8f02ba81276d5dbc22e7bf3bed20d62d1b175eef82680d6bdc7a6f4c" +checksum = "b79c6bdfe612503a526059c05c9ccccbf6bd9530b003673cb863e547fd7c0c9a" dependencies = [ "aws-credential-types", "aws-runtime", @@ -666,9 +666,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.34.0" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0804f840ad31537d5d1a4ec48d59de5e674ad05f1db7d3def2c9acadaf1f7e60" +checksum = "32e6ecdb2bd756f3b2383e6f0588dc10a4e65f5d551e70a56e0bfe0c884673ce" dependencies = [ "aws-credential-types", "aws-runtime", @@ -775,7 +775,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "http-body 1.0.0", + "http-body 1.0.1", "httparse", "hyper 0.14.30", "hyper-rustls 0.24.2", @@ -817,7 +817,7 @@ dependencies = [ "http 0.2.12", "http 1.1.0", "http-body 0.4.6", - "http-body 1.0.0", + "http-body 1.0.1", "http-body-util", "itoa", "num-integer", @@ -909,7 +909,7 @@ dependencies = [ "cfg-if", "libc", "miniz_oxide", - "object 0.36.1", + "object 0.36.2", "rustc-demangle", ] @@ -1024,9 +1024,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" dependencies = [ "arrayref", "arrayvec", @@ -1064,7 +1064,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "syn_derive", ] @@ -1151,7 +1151,7 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1162,9 +1162,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" [[package]] name = "bytes-utils" @@ -1276,13 +1276,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.0" +version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8" +checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" dependencies = [ "jobserver", "libc", - "once_cell", ] [[package]] @@ -2283,7 +2282,7 @@ checksum = "e4d2127a34b12919a6bce08225f0ca6fde8a19342a32675370edfc8795e7c38a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2385,7 +2384,7 @@ checksum = "61bb5a1014ce6dfc2a378578509abe775a5aa06bff584a547555d9efdb81b926" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2506,7 +2505,7 @@ checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2635,18 +2634,18 @@ dependencies = [ [[package]] name = "fix-hidden-lifetime-bug" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4ae9c2016a663983d4e40a9ff967d6dcac59819672f0b47f2b17574e99c33c8" +checksum = "40976db4d694ad47c9cdf86bd978cf4cfbe1c29469d5d1fc5b730a75e3d43a9b" dependencies = [ "fix-hidden-lifetime-bug-proc_macros", ] [[package]] name = "fix-hidden-lifetime-bug-proc_macros" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" +checksum = "75ab4fc77a21a7e7704c9ddbceaeb00ad8512b71b83738f3c0f17a0f44ee24a3" dependencies = [ "proc-macro2", "quote", @@ -2746,7 +2745,7 @@ checksum = "b0fa992f1656e1707946bbba340ad244f0814009ef8c0118eb7b658395f19a2e" dependencies = [ "frunk_proc_macro_helpers", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2758,7 +2757,7 @@ dependencies = [ "frunk_core", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2770,7 +2769,7 @@ dependencies = [ "frunk_core", "frunk_proc_macro_helpers", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -2872,7 +2871,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -3221,9 +3220,9 @@ dependencies = [ [[package]] name = "http-body" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", "http 1.1.0", @@ -3238,7 +3237,7 @@ dependencies = [ "bytes", "futures-util", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -3316,7 +3315,7 @@ dependencies = [ "futures-util", "h2 0.4.5", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -3351,7 +3350,7 @@ dependencies = [ "http 1.1.0", "hyper 1.4.1", "hyper-util", - "rustls 0.23.11", + "rustls 0.23.12", "rustls-native-certs 0.7.1", "rustls-pki-types", "tokio", @@ -3394,7 +3393,7 @@ dependencies = [ "futures-channel", "futures-util", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "hyper 1.4.1", "pin-project-lite", "socket2 0.5.7", @@ -3583,9 +3582,9 @@ dependencies = [ [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -3724,9 +3723,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", "windows-targets 0.52.6", @@ -3962,7 +3961,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -3998,13 +3997,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.11" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" dependencies = [ + "hermit-abi 0.3.9", "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -4315,9 +4315,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.1" +version = "0.36.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" dependencies = [ "memchr", ] @@ -4396,9 +4396,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -4417,7 +4417,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -4437,9 +4437,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" dependencies = [ "cc", "libc", @@ -4518,7 +4518,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.2", + "redox_syscall 0.5.3", "smallvec", "windows-targets 0.52.6", ] @@ -4641,7 +4641,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -4720,7 +4720,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -4764,15 +4764,15 @@ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" [[package]] name = "postgres" -version = "0.19.7" +version = "0.19.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" +checksum = "6c9ec84ab55b0f9e418675de50052d494ba893fd28c65769a6e68fcdacbee2b8" dependencies = [ "bytes", "fallible-iterator 0.2.0", @@ -4810,11 +4810,11 @@ dependencies = [ [[package]] name = "postgres-protocol" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +checksum = "acda0ebdebc28befa84bee35e651e4c5f09073d668c7aed4cf7e23c3cda84b23" dependencies = [ - "base64 0.21.7", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator 0.2.0", @@ -4828,9 +4828,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +checksum = "02048d9e032fb3cc3413bbf7b83a15d84a5d419778e2628751896d856498eee9" dependencies = [ "bytes", "chrono", @@ -4887,7 +4887,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -4959,7 +4959,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.70", + "syn 2.0.72", "tempfile", ] @@ -4973,7 +4973,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -5050,7 +5050,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.11", + "rustls 0.23.12", "thiserror", "tokio", "tracing", @@ -5066,7 +5066,7 @@ dependencies = [ "rand 0.8.5", "ring", "rustc-hash", - "rustls 0.23.11", + "rustls 0.23.12", "slab", "thiserror", "tinyvec", @@ -5075,14 +5075,13 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.2" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9096629c45860fc7fb143e125eb826b5e721e10be3263160c7d60ca832cf8c46" +checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" dependencies = [ "libc", "once_cell", "socket2 0.5.7", - "tracing", "windows-sys 0.52.0", ] @@ -5226,9 +5225,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.2" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +checksum = "cb9ee317cfe3fbd54b36a511efc1edd42e216903c9cd575e686dd68a2ba90d8d" dependencies = [ "bitflags 2.6.0", ] @@ -5264,9 +5263,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags 2.6.0", ] @@ -5414,7 +5413,7 @@ dependencies = [ "futures-util", "h2 0.4.5", "http 1.1.0", - "http-body 1.0.0", + "http-body 1.0.1", "http-body-util", "hyper 1.4.1", "hyper-rustls 0.27.2", @@ -5427,7 +5426,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.11", + "rustls 0.23.12", "rustls-native-certs 0.7.1", "rustls-pemfile 2.1.2", "rustls-pki-types", @@ -5598,7 +5597,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.70", + "syn 2.0.72", "unicode-ident", ] @@ -5646,9 +5645,9 @@ dependencies = [ [[package]] name = "rust_decimal_macros" -version = "1.34.2" +version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e418701588729bef95e7a655f2b483ad64bb97c46e8e79fde83efd92aaab6d82" +checksum = "a05bf7103af0797dbce0667c471946b29b9eaea34652eff67324f360fec027de" dependencies = [ "quote", "rust_decimal", @@ -5704,14 +5703,14 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.11" +version = "0.23.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4828ea528154ae444e5a642dbb7d5623354030dc9822b83fd9bb79683c7399d0" +checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.102.5", + "rustls-webpki 0.102.6", "subtle", "zeroize", ] @@ -5778,9 +5777,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.102.5" +version = "0.102.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" +checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" dependencies = [ "ring", "rustls-pki-types", @@ -5964,9 +5963,9 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.6.0", "core-foundation", @@ -5977,9 +5976,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" dependencies = [ "core-foundation-sys", "libc", @@ -6023,7 +6022,7 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6082,7 +6081,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6299,7 +6298,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6551,7 +6550,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6583,9 +6582,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.70" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -6601,7 +6600,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6720,22 +6719,22 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6816,21 +6815,20 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.38.0" +version = "1.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a" dependencies = [ "backtrace", "bytes", "libc", "mio", - "num_cpus", "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.5.7", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -6862,13 +6860,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -6895,9 +6893,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +checksum = "03adcf0147e203b6032c0b2d30be1415ba03bc348901f3ff1cc0df6a733e60c3" dependencies = [ "async-trait", "byteorder", @@ -6935,7 +6933,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.11", + "rustls 0.23.12", "rustls-pki-types", "tokio", ] @@ -7039,7 +7037,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -7107,7 +7105,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -7373,7 +7371,7 @@ checksum = "b3fd98999db9227cf28e59d83e1f120f42bc233d4b152e8fab9bc87d5bb1e0f8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -7527,7 +7525,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -7561,7 +7559,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -7583,9 +7581,9 @@ dependencies = [ [[package]] name = "wasm-encoder" -version = "0.212.0" +version = "0.214.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501940df4418b8929eb6d52f1aade1fdd15a5b86c92453cb696e3c906bd3fc33" +checksum = "ff694f02a8d7a50b6922b197ae03883fbf18cdb2ae9fbee7b6148456f5f44041" dependencies = [ "leb128", ] @@ -7711,7 +7709,7 @@ dependencies = [ "anyhow", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasmtime-component-util", "wasmtime-wit-bindgen", "wit-parser", @@ -7903,7 +7901,7 @@ checksum = "e04682ce587aa8fa9311d3c95148381f08a1db274ad6bcd3553f7c97c8c2debb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -7987,24 +7985,24 @@ dependencies = [ [[package]] name = "wast" -version = "212.0.0" +version = "214.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4606a05fb0aae5d11dd7d8280a640d88a63ee019360ba9be552da3d294b8d1f5" +checksum = "694bcdb24c49c8709bd8713768b71301a11e823923eee355d530f1d8d0a7f8e9" dependencies = [ "bumpalo", "leb128", "memchr", "unicode-width", - "wasm-encoder 0.212.0", + "wasm-encoder 0.214.0", ] [[package]] name = "wat" -version = "1.212.0" +version = "1.214.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c74ca7f93f11a5d6eed8499f2a8daaad6e225cab0151bc25a091fff3b987532f" +checksum = "347249eb56773fa728df2656cfe3a8c19437ded61a922a0b5e0839d9790e278e" dependencies = [ - "wast 212.0.0", + "wast 214.0.0", ] [[package]] @@ -8060,7 +8058,7 @@ dependencies = [ "proc-macro2", "quote", "shellexpand", - "syn 2.0.70", + "syn 2.0.72", "witx", ] @@ -8072,7 +8070,7 @@ checksum = "4a781d29bfd788595f4a392a6f606699e59577b7f4b2858da2ae4068f4d757c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wiggle-generate", ] @@ -8417,7 +8415,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 493d1bd5..471aa209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["clade"] +members = ["clade", "object_store_factory"] [workspace.dependencies] arrow = { version = "52.0.0", features = ["test_utils"] } @@ -105,7 +105,7 @@ lazy_static = ">=1.4.0" metrics = { version = "0.22.1" } metrics-exporter-prometheus = { version = "0.13.1" } moka = { version = "0.12.5", default-features = false, features = ["future", "atomic64", "quanta"] } -object_store = "0.10.2" +object_store = { version = "0.10.2", features = ["aws", "azure", "gcp"] } object_store_factory = { version = "0.1.0", path = "object_store_factory" } percent-encoding = "2.2.0" prost = "0.12.1" From 287b19465b2b79fcaed41e5fd5e76be7ec383b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Thu, 25 Jul 2024 12:12:48 +0200 Subject: [PATCH 11/17] Fix errors for LocalStorage and InMemory --- object_store_factory/src/aws.rs | 5 +++-- object_store_factory/src/google.rs | 5 +++-- object_store_factory/src/lib.rs | 7 ++++--- object_store_factory/src/local.rs | 5 +++-- object_store_factory/src/memory.rs | 5 +++-- src/object_store/factory.rs | 15 ++++++++------- 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs index e7ce0274..65cd3a9e 100644 --- a/object_store_factory/src/aws.rs +++ b/object_store_factory/src/aws.rs @@ -4,6 +4,7 @@ use object_store::{ }; use std::collections::HashMap; use std::env; +use std::sync::Arc; use tracing::info; use url::Url; @@ -35,7 +36,7 @@ impl Config { pub fn build_amazon_s3_from_config( config: &Config, -) -> Result, object_store::Error> { +) -> Result, object_store::Error> { let mut builder = AmazonS3Builder::new() .with_region(config.region.clone().unwrap_or_default()) .with_bucket_name(config.bucket.clone()) @@ -60,7 +61,7 @@ pub fn build_amazon_s3_from_config( } let store = builder.build()?; - Ok(Box::new(store)) + Ok(Arc::new(store)) } pub async fn add_amazon_s3_specific_options( diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs index cb23eae1..56d2466b 100644 --- a/object_store_factory/src/google.rs +++ b/object_store_factory/src/google.rs @@ -1,6 +1,7 @@ use object_store::{gcp::GoogleCloudStorageBuilder, ObjectStore}; use std::collections::HashMap; use std::env; +use std::sync::Arc; pub struct Config { pub bucket: String, @@ -24,7 +25,7 @@ impl Config { pub fn build_google_cloud_storage_from_config( config: &Config, -) -> Result, object_store::Error> { +) -> Result, object_store::Error> { let mut builder: GoogleCloudStorageBuilder = GoogleCloudStorageBuilder::new().with_bucket_name(config.bucket.clone()); @@ -35,7 +36,7 @@ pub fn build_google_cloud_storage_from_config( }; let store = builder.build()?; - Ok(Box::new(store)) + Ok(Arc::new(store)) } pub fn add_google_cloud_storage_environment_variables( diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index 6e30857a..f791a4d7 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -7,6 +7,7 @@ use object_store::parse_url_opts; use object_store::ObjectStore; use object_store::ObjectStoreScheme; use std::collections::HashMap; +use std::sync::Arc; use tracing::warn; use url::Url; @@ -24,16 +25,16 @@ pub struct Config { /// /// # Returns /// -/// Returns a `Result` that, on success, contains a boxed `ObjectStore` trait object. On failure, it returns an `object_store::Error` +/// Returns a `Result` that, on success, contains an Arc `ObjectStore` trait object. On failure, it returns an `object_store::Error` /// indicating what went wrong, such as an unsupported scheme. /// /// # Errors /// /// * If the scheme is not supported or the configuration is invalid, an error is returned. pub fn build_object_store_from_config( - scheme: ObjectStoreScheme, + scheme: &ObjectStoreScheme, config: HashMap, -) -> Result, object_store::Error> { +) -> Result, object_store::Error> { match scheme { ObjectStoreScheme::Memory => memory::build_in_memory_storage(), ObjectStoreScheme::Local => { diff --git a/object_store_factory/src/local.rs b/object_store_factory/src/local.rs index b33f73e8..e5b8ce99 100644 --- a/object_store_factory/src/local.rs +++ b/object_store_factory/src/local.rs @@ -1,5 +1,6 @@ use object_store::{local::LocalFileSystem, ObjectStore}; use std::collections::HashMap; +use std::sync::Arc; #[derive(Debug)] pub struct Config { @@ -18,9 +19,9 @@ impl Config { pub fn build_local_storage( config: &Config, -) -> Result, object_store::Error> { +) -> Result, object_store::Error> { let store = LocalFileSystem::new_with_prefix(config.data_dir.clone())?; - Ok(Box::new(store)) + Ok(Arc::new(store)) } #[cfg(test)] diff --git a/object_store_factory/src/memory.rs b/object_store_factory/src/memory.rs index 3f87e9b3..9f88dcca 100644 --- a/object_store_factory/src/memory.rs +++ b/object_store_factory/src/memory.rs @@ -1,8 +1,9 @@ use object_store::{memory::InMemory, ObjectStore}; +use std::sync::Arc; -pub fn build_in_memory_storage() -> Result, object_store::Error> { +pub fn build_in_memory_storage() -> Result, object_store::Error> { let store = InMemory::new(); - Ok(Box::new(store)) + Ok(Arc::new(store)) } #[cfg(test)] diff --git a/src/object_store/factory.rs b/src/object_store/factory.rs index 8e8f4f5f..1e881ca9 100644 --- a/src/object_store/factory.rs +++ b/src/object_store/factory.rs @@ -24,20 +24,21 @@ pub fn build_object_store( let scheme = match &object_store_cfg { schema::ObjectStore::Local(_) => ObjectStoreScheme::Local, schema::ObjectStore::InMemory(_) => ObjectStoreScheme::Memory, - #[cfg(feature = "object-store-s3")] schema::ObjectStore::S3(_) => ObjectStoreScheme::AmazonS3, - #[cfg(feature = "object-store-gcs")] schema::ObjectStore::GCS(_) => ObjectStoreScheme::GoogleCloudStorage, }; let config = object_store_cfg.to_config(); - let store = object_store_factory::build_object_store_from_config(scheme, config)?; + let store = object_store_factory::build_object_store_from_config(&scheme, config)?; - Ok(if let Some(props) = cache_properties { - Arc::new(CachingObjectStore::new_from_config(props, Arc::new(store))) + if matches!(scheme, ObjectStoreScheme::Local | ObjectStoreScheme::Memory) { + Ok(store) } else { - Arc::new(store) - }) + Ok(match cache_properties { + Some(props) => Arc::new(CachingObjectStore::new_from_config(props, store)), + None => store, + }) + } } #[derive(PartialEq, Eq)] From bb268710e09c3040693ae498ee76575c6fd47515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Thu, 25 Jul 2024 12:38:37 +0200 Subject: [PATCH 12/17] Address review comments --- Cargo.toml | 9 +- object_store_factory/Cargo.toml | 14 +-- object_store_factory/src/aws.rs | 150 +++++++++++++++++++++++------ object_store_factory/src/google.rs | 141 ++++++++++++++++++++++----- object_store_factory/src/lib.rs | 31 +++--- object_store_factory/src/local.rs | 16 +-- 6 files changed, 280 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 471aa209..2ba7f5eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,9 +18,16 @@ datafusion = "39.0.0" datafusion-common = "39.0.0" datafusion-expr = "39.0.0" +futures = "0.3" + itertools = ">=0.10.0" +object_store = { version = "0.10.2", features = ["aws", "azure", "gcp"] } +tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "signal", "process"] } tracing = { version = "0.1", features = ["log"] } +tracing-log = "0.2" +tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] } +url = "2.5" [patch.crates-io] # Pick up backport of https://github.com/apache/arrow-datafusion/pull/11386 @@ -106,7 +113,7 @@ metrics = { version = "0.22.1" } metrics-exporter-prometheus = { version = "0.13.1" } moka = { version = "0.12.5", default-features = false, features = ["future", "atomic64", "quanta"] } object_store = { version = "0.10.2", features = ["aws", "azure", "gcp"] } -object_store_factory = { version = "0.1.0", path = "object_store_factory" } +object_store_factory = { path = "object_store_factory" } percent-encoding = "2.2.0" prost = "0.12.1" diff --git a/object_store_factory/Cargo.toml b/object_store_factory/Cargo.toml index 1a510d22..d858999d 100644 --- a/object_store_factory/Cargo.toml +++ b/object_store_factory/Cargo.toml @@ -4,12 +4,12 @@ version = "0.1.0" edition = "2021" [dependencies] -async-trait = "0.1.64" -futures = "0.3.30" -object_store = { version = "0.10.2", features = ["aws", "azure", "gcp"] } -tempfile = "3.10.1" +async-trait = { workspace = true } +futures = { workspace = true } +object_store = { workspace = true } +tempfile = { workspace = true } tracing = { workspace = true } -tracing-log = "0.2" -tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] } -url = "2.5" +tracing-log = { workspace = true } +tracing-subscriber = { workspace = true } +url = { workspace = true } diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs index 65cd3a9e..f406e008 100644 --- a/object_store_factory/src/aws.rs +++ b/object_store_factory/src/aws.rs @@ -1,14 +1,16 @@ +use object_store::ClientConfigKey; use object_store::{ aws::resolve_bucket_region, aws::AmazonS3Builder, aws::AmazonS3ConfigKey, ClientOptions, ObjectStore, }; use std::collections::HashMap; use std::env; +use std::str::FromStr; use std::sync::Arc; use tracing::info; use url::Url; -pub struct Config { +pub struct S3Config { pub region: Option, pub access_key_id: Option, pub secret_access_key: Option, @@ -18,7 +20,7 @@ pub struct Config { pub _prefix: Option, } -impl Config { +impl S3Config { pub fn from_hashmap( map: &HashMap, ) -> Result { @@ -35,7 +37,7 @@ impl Config { } pub fn build_amazon_s3_from_config( - config: &Config, + config: &S3Config, ) -> Result, object_store::Error> { let mut builder = AmazonS3Builder::new() .with_region(config.region.clone().unwrap_or_default()) @@ -64,48 +66,70 @@ pub fn build_amazon_s3_from_config( Ok(Arc::new(store)) } +pub fn map_options_into_amazon_s3_config_keys( + input_options: HashMap, +) -> Result, object_store::Error> { + let mut mapped_keys = HashMap::new(); + + for (key, value) in input_options { + match AmazonS3ConfigKey::from_str(&key) { + Ok(config_key) => { + mapped_keys.insert(config_key, value); + } + Err(err) => { + return Err(err); + } + } + } + + Ok(mapped_keys) +} + pub async fn add_amazon_s3_specific_options( url: &Url, - mut options: HashMap, -) -> HashMap { - if !options.contains_key(AmazonS3ConfigKey::Bucket.as_ref()) - && !options.contains_key(AmazonS3ConfigKey::Endpoint.as_ref()) + options: &mut HashMap, +) { + if !options.contains_key(&AmazonS3ConfigKey::Bucket) + && !options.contains_key(&AmazonS3ConfigKey::Endpoint) { let region = detect_region(url).await.unwrap(); - options.insert("region".to_string(), region.to_string()); + options.insert(AmazonS3ConfigKey::Region, region.to_string()); } - options } pub fn add_amazon_s3_environment_variables( - mut options: HashMap, -) -> HashMap { + options: &mut HashMap, +) { let env_vars = &[ - ("AWS_ACCESS_KEY_ID", "access_key_id"), - ("AWS_SECRET_ACCESS_KEY", "secret_access_key"), - ("AWS_DEFAULT_REGION", "region"), - ("AWS_ENDPOINT", "endpoint"), - ("AWS_SESSION_TOKEN", "session_token"), - ("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", "credentials_uri"), + ("AWS_ACCESS_KEY_ID", AmazonS3ConfigKey::AccessKeyId), + ("AWS_SECRET_ACCESS_KEY", AmazonS3ConfigKey::SecretAccessKey), + ("AWS_DEFAULT_REGION", AmazonS3ConfigKey::Region), + ("AWS_ENDPOINT", AmazonS3ConfigKey::Endpoint), + ("AWS_SESSION_TOKEN", AmazonS3ConfigKey::Token), + ( + "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", + AmazonS3ConfigKey::ContainerCredentialsRelativeUri, + ), ]; for &(env_var_name, config_key) in env_vars.iter() { if let Ok(val) = env::var(env_var_name) { - options.insert(config_key.to_string(), val); + options.insert(config_key, val); } } if env::var("AWS_ALLOW_HTTP") == Ok("true".to_string()) { - options.insert("allow_http".to_string(), "true".to_string()); + options.insert( + AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp), + "true".to_string(), + ); } - - options } // For "real" S3, if we don't have a region passed to us, we have to figure it out // ourselves (note this won't work with HTTP paths that are actually S3, but those // usually include the region already). -pub async fn detect_region(url: &Url) -> Result { +async fn detect_region(url: &Url) -> Result { let bucket = url.host_str().ok_or(object_store::Error::Generic { store: "parse_url", source: format!("Could not find a bucket in S3 path {0}", url).into(), @@ -136,7 +160,7 @@ mod tests { map.insert("prefix".to_string(), "my-prefix".to_string()); let config = - Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + S3Config::from_hashmap(&map).expect("Failed to create config from hashmap"); assert_eq!(config.region, Some("us-west-2".to_string())); assert_eq!(config.access_key_id, Some("access_key".to_string())); assert_eq!(config.secret_access_key, Some("secret_key".to_string())); @@ -153,7 +177,7 @@ mod tests { map.insert("bucket".to_string(), "my-bucket".to_string()); let config = - Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + S3Config::from_hashmap(&map).expect("Failed to create config from hashmap"); assert_eq!(config.region, Some("us-west-2".to_string())); assert!(config.access_key_id.is_none()); assert!(config.secret_access_key.is_none()); @@ -167,12 +191,12 @@ mod tests { #[should_panic(expected = "called `Option::unwrap()` on a `None` value")] fn test_config_from_hashmap_without_required_fields() { let map = HashMap::new(); - Config::from_hashmap(&map).unwrap(); // Missing "region" and "bucket" + S3Config::from_hashmap(&map).unwrap(); // Missing "region" and "bucket" } #[test] fn test_build_amazon_s3_from_config_with_all_fields() { - let config = Config { + let config = S3Config { region: Some("us-west-2".to_string()), access_key_id: Some("access_key".to_string()), secret_access_key: Some("secret_key".to_string()), @@ -199,7 +223,7 @@ mod tests { #[test] fn test_build_amazon_s3_from_config_with_missing_optional_fields() { - let config = Config { + let config = S3Config { region: Some("us-west-2".to_string()), access_key_id: None, secret_access_key: None, @@ -223,4 +247,76 @@ mod tests { assert!(!debug_output.contains("secret_key: \"\"")); assert!(!debug_output.contains("token: \"\"")); } + + #[test] + fn test_map_options_into_amazon_s3_config_keys_with_valid_keys() { + let mut input_options = HashMap::new(); + input_options.insert("access_key_id".to_string(), "ACCESS_KEY".to_string()); + input_options.insert("secret_access_key".to_string(), "SECRET_KEY".to_string()); + input_options.insert("region".to_string(), "us-west-2".to_string()); + input_options.insert("bucket".to_string(), "my-bucket".to_string()); + + let result = map_options_into_amazon_s3_config_keys(input_options); + assert!(result.is_ok()); + + let mapped_keys = result.unwrap(); + assert_eq!( + mapped_keys.get(&AmazonS3ConfigKey::AccessKeyId), + Some(&"ACCESS_KEY".to_string()) + ); + assert_eq!( + mapped_keys.get(&AmazonS3ConfigKey::SecretAccessKey), + Some(&"SECRET_KEY".to_string()) + ); + assert_eq!( + mapped_keys.get(&AmazonS3ConfigKey::Region), + Some(&"us-west-2".to_string()) + ); + assert_eq!( + mapped_keys.get(&AmazonS3ConfigKey::Bucket), + Some(&"my-bucket".to_string()) + ); + } + + #[test] + fn test_map_options_into_amazon_s3_config_keys_with_invalid_key() { + let mut input_options = HashMap::new(); + input_options.insert("invalid_key".to_string(), "some_value".to_string()); + + let result = map_options_into_amazon_s3_config_keys(input_options); + assert!(result.is_err()); + + let error = result.err().unwrap(); + assert_eq!( + error.to_string(), + "Configuration key: 'invalid_key' is not valid for store 'S3'." + ) + } + + #[test] + fn test_map_options_into_amazon_s3_config_keys_with_mixed_keys() { + let mut input_options = HashMap::new(); + input_options.insert("access_key_id".to_string(), "ACCESS_KEY".to_string()); + input_options.insert("invalid_key".to_string(), "some_value".to_string()); + input_options.insert("bucket".to_string(), "my-bucket".to_string()); + + let result = map_options_into_amazon_s3_config_keys(input_options); + assert!(result.is_err()); + + let error = result.err().unwrap(); + assert_eq!( + error.to_string(), + "Configuration key: 'invalid_key' is not valid for store 'S3'." + ) + } + + #[test] + fn test_map_options_into_amazon_s3_config_keys_empty_input() { + let input_options = HashMap::new(); + let result = map_options_into_amazon_s3_config_keys(input_options); + assert!(result.is_ok()); + + let mapped_keys = result.unwrap(); + assert!(mapped_keys.is_empty()); + } } diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs index 56d2466b..4e5a3018 100644 --- a/object_store_factory/src/google.rs +++ b/object_store_factory/src/google.rs @@ -1,15 +1,16 @@ -use object_store::{gcp::GoogleCloudStorageBuilder, ObjectStore}; +use object_store::{gcp::GoogleCloudStorageBuilder, gcp::GoogleConfigKey, ObjectStore}; use std::collections::HashMap; use std::env; +use std::str::FromStr; use std::sync::Arc; -pub struct Config { +pub struct GCSConfig { pub bucket: String, pub _prefix: Option, pub google_application_credentials: Option, } -impl Config { +impl GCSConfig { pub fn from_hashmap( map: &HashMap, ) -> Result { @@ -24,7 +25,7 @@ impl Config { } pub fn build_google_cloud_storage_from_config( - config: &Config, + config: &GCSConfig, ) -> Result, object_store::Error> { let mut builder: GoogleCloudStorageBuilder = GoogleCloudStorageBuilder::new().with_bucket_name(config.bucket.clone()); @@ -39,25 +40,48 @@ pub fn build_google_cloud_storage_from_config( Ok(Arc::new(store)) } +pub fn map_options_into_google_config_keys( + input_options: HashMap, +) -> Result, object_store::Error> { + let mut mapped_keys = HashMap::new(); + + for (key, value) in input_options { + match GoogleConfigKey::from_str(&key) { + Ok(config_key) => { + mapped_keys.insert(config_key, value); + } + Err(err) => { + return Err(err); + } + } + } + + Ok(mapped_keys) +} + pub fn add_google_cloud_storage_environment_variables( - mut options: HashMap, -) -> HashMap { + options: &mut HashMap, +) { let env_vars = &[ - ("GOOGLE_SERVICE_ACCOUNT", "service_account_file"), - ("GOOGLE_SERVICE_ACCOUNT_PATH", "service_account_path"), - ("SERVICE_ACCOUNT", "service_account_file"), - ("GOOGLE_SERVICE_ACCOUNT_KEY", "service_account_key"), - ("GOOGLE_BUCKET", "bucket"), - ("GOOGLE_BUCKET_NAME", "bucket"), + ("GOOGLE_SERVICE_ACCOUNT", GoogleConfigKey::ServiceAccount), + ( + "GOOGLE_SERVICE_ACCOUNT_PATH", + GoogleConfigKey::ServiceAccount, + ), + ("SERVICE_ACCOUNT", GoogleConfigKey::ServiceAccount), + ( + "GOOGLE_SERVICE_ACCOUNT_KEY", + GoogleConfigKey::ServiceAccountKey, + ), + ("GOOGLE_BUCKET", GoogleConfigKey::Bucket), + ("GOOGLE_BUCKET_NAME", GoogleConfigKey::Bucket), ]; for &(env_var_name, config_key) in env_vars.iter() { if let Ok(val) = env::var(env_var_name) { - options.insert(config_key.to_string(), val); + options.insert(config_key, val); } } - - options } #[cfg(test)] @@ -78,7 +102,7 @@ mod tests { ); let config = - Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + GCSConfig::from_hashmap(&map).expect("Failed to create config from hashmap"); assert_eq!(config.bucket, "my-bucket"); assert_eq!(config._prefix, Some("my-prefix".to_string())); assert_eq!( @@ -93,7 +117,7 @@ mod tests { map.insert("bucket".to_string(), "my-bucket".to_string()); let config = - Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + GCSConfig::from_hashmap(&map).expect("Failed to create config from hashmap"); assert_eq!(config.bucket, "my-bucket"); assert!(config._prefix.is_none()); assert!(config.google_application_credentials.is_none()); @@ -103,7 +127,7 @@ mod tests { #[should_panic(expected = "called `Option::unwrap()` on a `None` value")] fn test_config_from_hashmap_without_bucket() { let map = HashMap::new(); - Config::from_hashmap(&map).unwrap(); + GCSConfig::from_hashmap(&map).unwrap(); } #[test] @@ -128,7 +152,7 @@ mod tests { fs::write(temp_file.path(), credentials_content) .expect("Failed to write to temporary file"); - let config = Config { + let config = GCSConfig { bucket: "my-bucket".to_string(), _prefix: Some("my-prefix".to_string()), google_application_credentials: Some( @@ -143,15 +167,12 @@ mod tests { let store = result.unwrap(); let debug_output = format!("{:?}", store); - println!("{:?}", debug_output); - - // Check the configuration values assert!(debug_output.contains("bucket_name: \"my-bucket\"")); } #[test] fn test_build_google_cloud_storage_from_config_with_missing_optional_fields() { - let config = Config { + let config = GCSConfig { bucket: "my-bucket".to_string(), _prefix: None, google_application_credentials: None, @@ -164,7 +185,79 @@ mod tests { let store = result.unwrap(); let debug_output = format!("{:?}", store); - // Check the configuration values assert!(debug_output.contains("bucket_name: \"my-bucket\"")); } + + #[test] + fn test_map_options_into_google_config_keys_with_valid_keys() { + let mut input_options = HashMap::new(); + input_options.insert( + "google_service_account".to_string(), + "my_google_service_account".to_string(), + ); + input_options.insert("bucket".to_string(), "my-bucket".to_string()); + input_options.insert( + "google_service_account_key".to_string(), + "my_google_service_account_key".to_string(), + ); + + let result = map_options_into_google_config_keys(input_options); + assert!(result.is_ok()); + + let mapped_keys = result.unwrap(); + assert_eq!( + mapped_keys.get(&GoogleConfigKey::ServiceAccount), + Some(&"my_google_service_account".to_string()) + ); + assert_eq!( + mapped_keys.get(&GoogleConfigKey::Bucket), + Some(&"my-bucket".to_string()) + ); + assert_eq!( + mapped_keys.get(&GoogleConfigKey::ServiceAccountKey), + Some(&"my_google_service_account_key".to_string()) + ); + } + + #[test] + fn test_map_options_into_google_config_keys_with_invalid_key() { + let mut input_options = HashMap::new(); + input_options.insert("invalid_key".to_string(), "some_value".to_string()); + + let result = map_options_into_google_config_keys(input_options); + assert!(result.is_err()); + + let error = result.err().unwrap(); + print!("ERROR1: {:?}", error.to_string()); + assert_eq!( + error.to_string(), + "Configuration key: 'invalid_key' is not valid for store 'GCS'." + ) + } + + #[test] + fn test_map_options_into_google_config_keys_with_mixed_keys() { + let mut input_options = HashMap::new(); + input_options.insert("invalid_key".to_string(), "some_value".to_string()); + input_options.insert("bucket".to_string(), "my-bucket".to_string()); + + let result = map_options_into_google_config_keys(input_options); + assert!(result.is_err()); + + let error = result.err().unwrap(); + assert_eq!( + error.to_string(), + "Configuration key: 'invalid_key' is not valid for store 'GCS'." + ) + } + + #[test] + fn test_map_options_into_google_config_keys_empty_input() { + let input_options = HashMap::new(); + let result = map_options_into_google_config_keys(input_options); + assert!(result.is_ok()); + + let mapped_keys = result.unwrap(); + assert!(mapped_keys.is_empty()); + } } diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index f791a4d7..6317da18 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -12,9 +12,7 @@ use tracing::warn; use url::Url; /// Configuration for object storage, holding settings in a key-value format. -pub struct Config { - pub settings: HashMap, -} +pub type StorageConfig = HashMap; /// Builds an object store based on the specified scheme and configuration. /// @@ -33,20 +31,20 @@ pub struct Config { /// * If the scheme is not supported or the configuration is invalid, an error is returned. pub fn build_object_store_from_config( scheme: &ObjectStoreScheme, - config: HashMap, + config: StorageConfig, ) -> Result, object_store::Error> { match scheme { ObjectStoreScheme::Memory => memory::build_in_memory_storage(), ObjectStoreScheme::Local => { - let file_config = local::Config::from_hashmap(&config)?; + let file_config = local::LocalConfig::from_hashmap(&config)?; local::build_local_storage(&file_config) } ObjectStoreScheme::AmazonS3 => { - let aws_config = aws::Config::from_hashmap(&config)?; + let aws_config = aws::S3Config::from_hashmap(&config)?; aws::build_amazon_s3_from_config(&aws_config) } ObjectStoreScheme::GoogleCloudStorage => { - let google_config = google::Config::from_hashmap(&config)?; + let google_config = google::GCSConfig::from_hashmap(&config)?; google::build_google_cloud_storage_from_config(&google_config) } _ => { @@ -76,17 +74,25 @@ pub fn build_object_store_from_config( /// * If the URL scheme is unsupported or there is an error parsing the URL or options, an error is returned. pub async fn build_object_store_from_opts( url: &Url, - mut options: HashMap, + options: HashMap, ) -> Result, object_store::Error> { let (scheme, _) = ObjectStoreScheme::parse(url).unwrap(); match scheme { ObjectStoreScheme::AmazonS3 => { - options = aws::add_amazon_s3_specific_options(url, options).await; - options = aws::add_amazon_s3_environment_variables(options); + let mut s3_options = aws::map_options_into_amazon_s3_config_keys(options)?; + aws::add_amazon_s3_specific_options(url, &mut s3_options).await; + aws::add_amazon_s3_environment_variables(&mut s3_options); + + let store = parse_url_opts(url, s3_options)?.0; + Ok(store) } ObjectStoreScheme::GoogleCloudStorage => { - options = google::add_google_cloud_storage_environment_variables(options); + let mut gcs_options = google::map_options_into_google_config_keys(options)?; + google::add_google_cloud_storage_environment_variables(&mut gcs_options); + + let store = parse_url_opts(url, gcs_options)?.0; + Ok(store) } _ => { warn!("Unsupported URL scheme: {}", url); @@ -96,7 +102,4 @@ pub async fn build_object_store_from_opts( }); } } - - let store = parse_url_opts(url, &options)?.0; - Ok(store) } diff --git a/object_store_factory/src/local.rs b/object_store_factory/src/local.rs index e5b8ce99..1bf9bd7c 100644 --- a/object_store_factory/src/local.rs +++ b/object_store_factory/src/local.rs @@ -3,11 +3,11 @@ use std::collections::HashMap; use std::sync::Arc; #[derive(Debug)] -pub struct Config { +pub struct LocalConfig { pub data_dir: String, } -impl Config { +impl LocalConfig { pub fn from_hashmap( map: &HashMap, ) -> Result { @@ -18,7 +18,7 @@ impl Config { } pub fn build_local_storage( - config: &Config, + config: &LocalConfig, ) -> Result, object_store::Error> { let store = LocalFileSystem::new_with_prefix(config.data_dir.clone())?; Ok(Arc::new(store)) @@ -34,8 +34,8 @@ mod tests { let mut map = HashMap::new(); map.insert("data_dir".to_string(), "/tmp/data".to_string()); - let config = - Config::from_hashmap(&map).expect("Failed to create config from hashmap"); + let config = LocalConfig::from_hashmap(&map) + .expect("Failed to create config from hashmap"); assert_eq!(config.data_dir, "/tmp/data".to_string()); } @@ -44,7 +44,7 @@ mod tests { fn test_config_from_hashmap_without_data_dir() { let map = HashMap::new(); // This test will panic because data_dir is missing, which causes unwrap() to panic. - Config::from_hashmap(&map).unwrap(); + LocalConfig::from_hashmap(&map).unwrap(); } #[test] @@ -55,7 +55,7 @@ mod tests { .to_str() .expect("Failed to convert path to string"); - let config = Config { + let config = LocalConfig { data_dir: data_dir.to_string(), }; @@ -65,7 +65,7 @@ mod tests { #[test] fn test_build_local_storage_with_invalid_path() { - let config = Config { + let config = LocalConfig { data_dir: "".to_string(), }; From b6e6a42c7469958b3217f89398bda2530b420de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Thu, 25 Jul 2024 15:26:17 +0200 Subject: [PATCH 13/17] Remove unneeded `return` statement --- object_store_factory/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index 6317da18..bec11a5e 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -96,10 +96,10 @@ pub async fn build_object_store_from_opts( } _ => { warn!("Unsupported URL scheme: {}", url); - return Err(object_store::Error::Generic { + Err(object_store::Error::Generic { store: "unsupported_url_scheme", source: format!("Unsupported URL scheme: {}", url).into(), - }); + }) } } } From 853ee479042eeec200a97bacffc7e4dd5b9fe3cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Fri, 26 Jul 2024 10:54:15 +0200 Subject: [PATCH 14/17] Clean up build_object_store_form_opts --- object_store_factory/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index bec11a5e..e6454cc0 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -84,14 +84,14 @@ pub async fn build_object_store_from_opts( aws::add_amazon_s3_specific_options(url, &mut s3_options).await; aws::add_amazon_s3_environment_variables(&mut s3_options); - let store = parse_url_opts(url, s3_options)?.0; + let (store, _) = parse_url_opts(url, s3_options)?; Ok(store) } ObjectStoreScheme::GoogleCloudStorage => { let mut gcs_options = google::map_options_into_google_config_keys(options)?; google::add_google_cloud_storage_environment_variables(&mut gcs_options); - let store = parse_url_opts(url, gcs_options)?.0; + let (store, _) = parse_url_opts(url, gcs_options)?; Ok(store) } _ => { From 26bb61effb9d8bf70f7a90f4b2f390b45b7547ae Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Fri, 26 Jul 2024 14:23:26 +0200 Subject: [PATCH 15/17] Make inline metastore robust when local FS store with path is provided --- object_store_factory/src/lib.rs | 14 ++++++++++++++ tests/fixtures.rs | 25 ++++++++++++++++++++----- tests/flight/inline_metastore.rs | 1 + 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/object_store_factory/src/lib.rs b/object_store_factory/src/lib.rs index bec11a5e..266c0736 100644 --- a/object_store_factory/src/lib.rs +++ b/object_store_factory/src/lib.rs @@ -4,8 +4,10 @@ mod local; mod memory; use object_store::parse_url_opts; +use object_store::prefix::PrefixStore; use object_store::ObjectStore; use object_store::ObjectStoreScheme; +use object_store::{local::LocalFileSystem, memory::InMemory}; use std::collections::HashMap; use std::sync::Arc; use tracing::warn; @@ -79,6 +81,18 @@ pub async fn build_object_store_from_opts( let (scheme, _) = ObjectStoreScheme::parse(url).unwrap(); match scheme { + // `parse_url_opts` will swallow the URL path for memory/local FS stores + ObjectStoreScheme::Memory => { + let mut store: Box = Box::new(InMemory::new()); + if !url.path().is_empty() { + store = Box::new(PrefixStore::new(store, url.path())); + } + + Ok(store) + } + ObjectStoreScheme::Local => { + Ok(Box::new(LocalFileSystem::new_with_prefix(url.path())?)) + } ObjectStoreScheme::AmazonS3 => { let mut s3_options = aws::map_options_into_amazon_s3_config_keys(options)?; aws::add_amazon_s3_specific_options(url, &mut s3_options).await; diff --git a/tests/fixtures.rs b/tests/fixtures.rs index 46832963..4a05ec41 100644 --- a/tests/fixtures.rs +++ b/tests/fixtures.rs @@ -26,11 +26,18 @@ pub fn schemas() -> ListSchemaResponse { schemas: vec![ SchemaObject { name: "local".to_string(), - tables: vec![TableObject { - name: "file".to_string(), - path: "delta-0.8.0-partitioned".to_string(), - store: None, - }], + tables: vec![ + TableObject { + name: "file".to_string(), + path: "delta-0.8.0-partitioned".to_string(), + store: None, + }, + TableObject { + name: "file_with_store".to_string(), + path: "delta-0.8.0-partitioned".to_string(), + store: Some("local_fs".to_string()), + }, + ], }, SchemaObject { name: "s3".to_string(), @@ -82,6 +89,14 @@ pub fn schemas() -> ListSchemaResponse { fake_gcs_creds(), )]), }, + StorageLocation { + name: "local_fs".to_string(), + location: format!( + "file://{}/tests/data/", + std::env::current_dir().unwrap().display() + ), + options: HashMap::new(), + }, ], } } diff --git a/tests/flight/inline_metastore.rs b/tests/flight/inline_metastore.rs index 422c8efe..9d5fb889 100644 --- a/tests/flight/inline_metastore.rs +++ b/tests/flight/inline_metastore.rs @@ -3,6 +3,7 @@ use crate::flight::*; #[rstest] #[should_panic(expected = "External error: Not a Delta table: no log files")] #[case("local.file", false)] +#[case("local.file_with_store", false)] #[case("local.file", true)] #[case("s3.minio", true)] #[case("gcs.fake", true)] From 7fb20c657dbfff4a1c0df6a30912ab4cd88ce28b Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Fri, 26 Jul 2024 14:31:48 +0200 Subject: [PATCH 16/17] Make the test case invariant to the working directory that it is being run from --- tests/fixtures.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures.rs b/tests/fixtures.rs index 4a05ec41..7ba83c1c 100644 --- a/tests/fixtures.rs +++ b/tests/fixtures.rs @@ -93,7 +93,7 @@ pub fn schemas() -> ListSchemaResponse { name: "local_fs".to_string(), location: format!( "file://{}/tests/data/", - std::env::current_dir().unwrap().display() + std::env::var("CARGO_MANIFEST_DIR").unwrap() ), options: HashMap::new(), }, From 147c6f218840547efad60ba89988294c1f5f89ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Lizardo?= Date: Fri, 26 Jul 2024 15:14:14 +0200 Subject: [PATCH 17/17] Change add environment variable methods to a more robust algorithm --- object_store_factory/src/aws.rs | 29 +++++++++++++---------------- object_store_factory/src/google.rs | 29 +++++++++++------------------ 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/object_store_factory/src/aws.rs b/object_store_factory/src/aws.rs index f406e008..0869d434 100644 --- a/object_store_factory/src/aws.rs +++ b/object_store_factory/src/aws.rs @@ -100,25 +100,22 @@ pub async fn add_amazon_s3_specific_options( pub fn add_amazon_s3_environment_variables( options: &mut HashMap, ) { - let env_vars = &[ - ("AWS_ACCESS_KEY_ID", AmazonS3ConfigKey::AccessKeyId), - ("AWS_SECRET_ACCESS_KEY", AmazonS3ConfigKey::SecretAccessKey), - ("AWS_DEFAULT_REGION", AmazonS3ConfigKey::Region), - ("AWS_ENDPOINT", AmazonS3ConfigKey::Endpoint), - ("AWS_SESSION_TOKEN", AmazonS3ConfigKey::Token), - ( - "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", - AmazonS3ConfigKey::ContainerCredentialsRelativeUri, - ), - ]; - - for &(env_var_name, config_key) in env_vars.iter() { - if let Ok(val) = env::var(env_var_name) { - options.insert(config_key, val); + for (os_key, os_value) in std::env::vars_os() { + if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { + if key.starts_with("AWS_") { + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + options.insert(config_key, value.to_string()); + } + } } } - if env::var("AWS_ALLOW_HTTP") == Ok("true".to_string()) { + if env::var( + AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp) + .as_ref() + .to_uppercase(), + ) == Ok("true".to_string()) + { options.insert( AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp), "true".to_string(), diff --git a/object_store_factory/src/google.rs b/object_store_factory/src/google.rs index 4e5a3018..a163ea39 100644 --- a/object_store_factory/src/google.rs +++ b/object_store_factory/src/google.rs @@ -62,24 +62,17 @@ pub fn map_options_into_google_config_keys( pub fn add_google_cloud_storage_environment_variables( options: &mut HashMap, ) { - let env_vars = &[ - ("GOOGLE_SERVICE_ACCOUNT", GoogleConfigKey::ServiceAccount), - ( - "GOOGLE_SERVICE_ACCOUNT_PATH", - GoogleConfigKey::ServiceAccount, - ), - ("SERVICE_ACCOUNT", GoogleConfigKey::ServiceAccount), - ( - "GOOGLE_SERVICE_ACCOUNT_KEY", - GoogleConfigKey::ServiceAccountKey, - ), - ("GOOGLE_BUCKET", GoogleConfigKey::Bucket), - ("GOOGLE_BUCKET_NAME", GoogleConfigKey::Bucket), - ]; - - for &(env_var_name, config_key) in env_vars.iter() { - if let Ok(val) = env::var(env_var_name) { - options.insert(config_key, val); + if let Ok(service_account_path) = env::var("SERVICE_ACCOUNT") { + options.insert(GoogleConfigKey::ServiceAccount, service_account_path); + } + + for (os_key, os_value) in env::vars_os() { + if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) { + if key.starts_with("GOOGLE_") { + if let Ok(config_key) = key.to_ascii_lowercase().parse() { + options.insert(config_key, value.to_string()); + } + } } } }