Skip to content

Commit

Permalink
fix: Expand brackets in async glob expansion (#17630)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Jul 15, 2024
1 parent 9a13c82 commit c6d6d73
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
5 changes: 2 additions & 3 deletions crates/polars-io/src/cloud/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ fn extract_prefix_expansion(url: &str) -> PolarsResult<(String, Option<String>)>
let mut expansion = String::new();
let mut last_split_was_wildcard = false;
for split in splits {
let has_star = split.contains('*');
if expansion.is_empty() && !has_star {
if expansion.is_empty() && memchr::memchr2(b'*', b'[', split.as_bytes()).is_none() {
// We are still gathering splits in the prefix.
if !prefix.is_empty() {
prefix.push(DELIMITER);
Expand All @@ -44,7 +43,7 @@ fn extract_prefix_expansion(url: &str) -> PolarsResult<(String, Option<String>)>
expansion.push(DELIMITER);
}
// Handle '.' inside a split.
if split.contains('.') || split.contains('*') {
if memchr::memchr2(b'.', b'*', split.as_bytes()).is_some() {
let processed = split.replace('.', "\\.");
expansion.push_str(&processed.replace('*', "([^/]*)"));
continue;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/utils/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ pub fn expand_paths_hive(
if path.extension() != ext {
polars_bail!(
InvalidOperation: r#"directory contained paths with different file extensions: \
first path: {}, second path: {}. Please use a glob pattern to explicitly specify
first path: {}, second path: {}. Please use a glob pattern to explicitly specify \
which files to read (e.g. "dir/**/*", "dir/**/*.parquet")"#,
out_paths[i - 1].to_str().unwrap(), path.to_str().unwrap()
);
Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/io/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,3 +655,13 @@ def test_scan_include_file_name(

# Test codepaths that materialize empty DataFrames
assert_frame_equal(lf.head(0).collect(streaming=streaming), df.head(0))


@pytest.mark.write_disk()
def test_async_path_expansion_bracket_17629(tmp_path: Path) -> None:
path = tmp_path / "data.parquet"

df = pl.DataFrame({"x": 1})
df.write_parquet(path)

assert_frame_equal(pl.scan_parquet(tmp_path / "[d]ata.parquet").collect(), df)

0 comments on commit c6d6d73

Please sign in to comment.