From 4be368637e1486141b563da5d08253e4175297a2 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Thu, 18 Apr 2024 12:38:37 -0700 Subject: [PATCH] fix: do not create directory when open an non-existing dataset (#2215) Closes https://github.com/lancedb/lancedb/issues/1223 --- Cargo.toml | 1 + rust/lance-io/Cargo.toml | 3 ++- rust/lance-io/src/object_store.rs | 30 ++++++++++++++++-------------- rust/lance/src/dataset.rs | 14 ++++++++++++++ 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index badfa41057..caad0c22ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,6 +106,7 @@ num_cpus = "1.0" object_store = { version = "0.9.0" } parquet = "49.0" pin-project = "1.0" +path_abs = "0.5" pprof = { version = "0.13", features = ["flamegraph", "criterion"] } proptest = "1.3.1" prost = "0.12.2" diff --git a/rust/lance-io/Cargo.toml b/rust/lance-io/Cargo.toml index 51e40f8225..47f65582b2 100644 --- a/rust/lance-io/Cargo.toml +++ b/rust/lance-io/Cargo.toml @@ -37,10 +37,11 @@ pin-project.workspace = true prost.workspace = true shellexpand.workspace = true snafu.workspace = true -tokio.workspace = true tokio-stream.workspace = true +tokio.workspace = true tracing.workspace = true url.workspace = true +path_abs.workspace = true [dev-dependencies] criterion.workspace = true diff --git a/rust/lance-io/src/object_store.rs b/rust/lance-io/src/object_store.rs index d29cfeb4a5..1b92de4434 100644 --- a/rust/lance-io/src/object_store.rs +++ b/rust/lance-io/src/object_store.rs @@ -4,7 +4,7 @@ //! Extend [object_store::ObjectStore] functionalities use std::collections::HashMap; -use std::path::{Path as StdPath, PathBuf}; +use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, SystemTime}; @@ -17,11 +17,9 @@ use futures::{future, stream::BoxStream, StreamExt, TryStreamExt}; use object_store::aws::{ AmazonS3ConfigKey, AwsCredential as ObjectStoreAwsCredential, AwsCredentialProvider, }; -use object_store::azure::AzureConfigKey; -use object_store::gcp::GoogleConfigKey; use object_store::{ - aws::AmazonS3Builder, local::LocalFileSystem, memory::InMemory, CredentialProvider, - Error as ObjectStoreError, Result as ObjectStoreResult, + aws::AmazonS3Builder, azure::AzureConfigKey, gcp::GoogleConfigKey, local::LocalFileSystem, + memory::InMemory, CredentialProvider, Error as ObjectStoreError, Result as ObjectStoreResult, }; use object_store::{parse_url_opts, ClientOptions, DynObjectStore, StaticCredentialProvider}; use object_store::{path::Path, ObjectMeta, ObjectStore as OSObjectStore}; @@ -31,7 +29,6 @@ use tokio::{io::AsyncWriteExt, sync::RwLock}; use url::Url; use super::local::LocalObjectReader; - mod tracing; use self::tracing::ObjectStoreTracingExt; use crate::{object_reader::CloudObjectReader, object_writer::ObjectWriter, traits::Reader}; @@ -371,28 +368,32 @@ impl ObjectStore { pub fn from_path(str_path: &str) -> Result<(Self, Path)> { let expanded = tilde(str_path).to_string(); - let expanded_path = StdPath::new(&expanded); - if !expanded_path.try_exists()? { - std::fs::create_dir_all(expanded_path)?; + let mut expanded_path = path_abs::PathAbs::new(expanded) + .unwrap() + .as_path() + .to_path_buf(); + // path_abs::PathAbs::new(".") returns an empty string. + if let Some(s) = expanded_path.as_path().to_str() { + if s.is_empty() { + expanded_path = std::env::current_dir()?.to_path_buf(); + } } - - let expanded_path = expanded_path.canonicalize()?; - Ok(( Self { inner: Arc::new(LocalFileSystem::new()).traced(), scheme: String::from("file"), - base_path: Path::from_absolute_path(&expanded_path)?, + base_path: Path::from_absolute_path(expanded_path.as_path())?, block_size: 4 * 1024, // 4KB block size }, - Path::from_filesystem_path(&expanded_path)?, + Path::from_absolute_path(expanded_path.as_path())?, )) } async fn new_from_url(url: Url, params: ObjectStoreParams) -> Result { configure_store(url.as_str(), params).await } + /// Local object store. pub fn local() -> Self { Self { @@ -870,6 +871,7 @@ mod tests { use parquet::data_type::AsBytes; use std::env::set_current_dir; use std::fs::{create_dir_all, write}; + use std::path::Path as StdPath; use std::sync::atomic::{AtomicBool, Ordering}; /// Write test content to file. diff --git a/rust/lance/src/dataset.rs b/rust/lance/src/dataset.rs index 3aa3b7d257..1d4f61efc8 100644 --- a/rust/lance/src/dataset.rs +++ b/rust/lance/src/dataset.rs @@ -4920,4 +4920,18 @@ mod tests { Ok(()) } + + // Bug: https://github.com/lancedb/lancedb/issues/1223 + #[tokio::test] + async fn test_open_nonexisting_dataset() { + let test_dir = tempdir().unwrap(); + let base_dir = test_dir.path(); + let dataset_dir = base_dir.join("non_existing"); + let dataset_uri = dataset_dir.to_str().unwrap(); + + let res = Dataset::open(dataset_uri).await; + assert!(res.is_err()); + + assert!(!dataset_dir.exists()); + } }