Skip to content

Commit

Permalink
fix(ingest/s3): incorrectly parsing path in s3_uri
Browse files Browse the repository at this point in the history
  • Loading branch information
eagle-25 committed Dec 15, 2024
1 parent 2291c71 commit 932f39a
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/s3/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from itertools import groupby
from pathlib import PurePath
from typing import Any, Dict, Iterable, List, Optional, Tuple
from urllib.parse import urlparse

import smart_open.compression as so_compression
from more_itertools import peekable
Expand Down Expand Up @@ -993,9 +994,7 @@ def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePa
folders = []
for dir in dirs_to_process:
logger.info(f"Getting files from folder: {dir}")
prefix_to_process = dir.rstrip("\\").lstrip(
self.create_s3_path(bucket_name, "/")
)
prefix_to_process = urlparse(dir).path.lstrip("/")

folders.extend(
self.get_folder_info(
Expand Down

0 comments on commit 932f39a

Please sign in to comment.