Skip to content

Commit

Permalink
fix: do not create directory when open an non-existing dataset (#2215)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyxu authored Apr 18, 2024
1 parent 9ae76fb commit 4be3686
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 15 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ num_cpus = "1.0"
object_store = { version = "0.9.0" }
parquet = "49.0"
pin-project = "1.0"
path_abs = "0.5"
pprof = { version = "0.13", features = ["flamegraph", "criterion"] }
proptest = "1.3.1"
prost = "0.12.2"
Expand Down
3 changes: 2 additions & 1 deletion rust/lance-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ pin-project.workspace = true
prost.workspace = true
shellexpand.workspace = true
snafu.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
tokio.workspace = true
tracing.workspace = true
url.workspace = true
path_abs.workspace = true

[dev-dependencies]
criterion.workspace = true
Expand Down
30 changes: 16 additions & 14 deletions rust/lance-io/src/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! Extend [object_store::ObjectStore] functionalities
use std::collections::HashMap;
use std::path::{Path as StdPath, PathBuf};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
Expand All @@ -17,11 +17,9 @@ use futures::{future, stream::BoxStream, StreamExt, TryStreamExt};
use object_store::aws::{
AmazonS3ConfigKey, AwsCredential as ObjectStoreAwsCredential, AwsCredentialProvider,
};
use object_store::azure::AzureConfigKey;
use object_store::gcp::GoogleConfigKey;
use object_store::{
aws::AmazonS3Builder, local::LocalFileSystem, memory::InMemory, CredentialProvider,
Error as ObjectStoreError, Result as ObjectStoreResult,
aws::AmazonS3Builder, azure::AzureConfigKey, gcp::GoogleConfigKey, local::LocalFileSystem,
memory::InMemory, CredentialProvider, Error as ObjectStoreError, Result as ObjectStoreResult,
};
use object_store::{parse_url_opts, ClientOptions, DynObjectStore, StaticCredentialProvider};
use object_store::{path::Path, ObjectMeta, ObjectStore as OSObjectStore};
Expand All @@ -31,7 +29,6 @@ use tokio::{io::AsyncWriteExt, sync::RwLock};
use url::Url;

use super::local::LocalObjectReader;

mod tracing;
use self::tracing::ObjectStoreTracingExt;
use crate::{object_reader::CloudObjectReader, object_writer::ObjectWriter, traits::Reader};
Expand Down Expand Up @@ -371,28 +368,32 @@ impl ObjectStore {

pub fn from_path(str_path: &str) -> Result<(Self, Path)> {
let expanded = tilde(str_path).to_string();
let expanded_path = StdPath::new(&expanded);

if !expanded_path.try_exists()? {
std::fs::create_dir_all(expanded_path)?;
let mut expanded_path = path_abs::PathAbs::new(expanded)
.unwrap()
.as_path()
.to_path_buf();
// path_abs::PathAbs::new(".") returns an empty string.
if let Some(s) = expanded_path.as_path().to_str() {
if s.is_empty() {
expanded_path = std::env::current_dir()?.to_path_buf();
}
}

let expanded_path = expanded_path.canonicalize()?;

Ok((
Self {
inner: Arc::new(LocalFileSystem::new()).traced(),
scheme: String::from("file"),
base_path: Path::from_absolute_path(&expanded_path)?,
base_path: Path::from_absolute_path(expanded_path.as_path())?,
block_size: 4 * 1024, // 4KB block size
},
Path::from_filesystem_path(&expanded_path)?,
Path::from_absolute_path(expanded_path.as_path())?,
))
}

async fn new_from_url(url: Url, params: ObjectStoreParams) -> Result<Self> {
configure_store(url.as_str(), params).await
}

/// Local object store.
pub fn local() -> Self {
Self {
Expand Down Expand Up @@ -870,6 +871,7 @@ mod tests {
use parquet::data_type::AsBytes;
use std::env::set_current_dir;
use std::fs::{create_dir_all, write};
use std::path::Path as StdPath;
use std::sync::atomic::{AtomicBool, Ordering};

/// Write test content to file.
Expand Down
14 changes: 14 additions & 0 deletions rust/lance/src/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4920,4 +4920,18 @@ mod tests {

Ok(())
}

// Bug: https://github.com/lancedb/lancedb/issues/1223
#[tokio::test]
async fn test_open_nonexisting_dataset() {
let test_dir = tempdir().unwrap();
let base_dir = test_dir.path();
let dataset_dir = base_dir.join("non_existing");
let dataset_uri = dataset_dir.to_str().unwrap();

let res = Dataset::open(dataset_uri).await;
assert!(res.is_err());

assert!(!dataset_dir.exists());
}
}

0 comments on commit 4be3686

Please sign in to comment.