-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix empty path segments in Data Path transformations #453
base: main
Are you sure you want to change the base?
Changes from 34 commits
6aeb241
d040095
b943218
d3909c2
91015cd
7e8e55f
2ce1169
f847a23
8e68394
85afee0
7a17a79
9a571ff
4ed076d
bb012f8
2fa69e8
def765b
5a459da
8d6b230
f48d1ea
2c3d77e
2283dcb
01ff833
bb9b1bb
73464d7
cbf15e9
820f8a0
8c3bcea
3c0746b
c5770be
0f71abc
f4c4610
917199b
3e0f229
5e19b0e
80c9717
2183be5
fb9ba51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,8 @@ | |
# limitations under the License. | ||
# | ||
|
||
import re | ||
|
||
from dataclasses import dataclass | ||
from urllib.parse import urlparse | ||
|
||
|
@@ -33,19 +35,13 @@ def to_uri(self) -> str: | |
Converts the S3Path to a URI. | ||
:return: URI path | ||
""" | ||
if not self.bucket or not self.path: | ||
raise ValueError("Bucket and path must be defined") | ||
|
||
return f"s3://{self.bucket}/{self.path}" | ||
|
||
def base_uri(self) -> str: | ||
""" | ||
Returns the base URI of the S3Path. | ||
:return: URI path | ||
""" | ||
if not self.bucket: | ||
raise ValueError("Bucket must be defined") | ||
|
||
return f"https://{self.bucket}.s3.amazonaws.com" | ||
|
||
@classmethod | ||
|
@@ -62,6 +58,18 @@ def from_uri(cls, url: str) -> "S3Path": | |
path: str | ||
protocol: str = DataProtocols.S3.value | ||
|
||
def __post_init__(self): | ||
if not self.bucket: | ||
raise ValueError("Bucket must be defined") | ||
|
||
path_regex = r"^(?![0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$)[a-z0-9]([a-z0-9\-]{1,61}[a-z0-9])?(\/(?!.*(\/\/|\\))([^\/].{0,1022}\/?)?)?$" | ||
|
||
s3_path_without_prefix = f"{self.bucket}/{self.path}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. regex should only be applied to |
||
match = re.match(path_regex, s3_path_without_prefix) | ||
|
||
if not match: | ||
raise ValueError(f"Invalid S3Path provided, must comply with : {path_regex}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do not put regex in error message, explain in human language :) |
||
|
||
@classmethod | ||
def from_hdfs_path(cls, hdfs_path: str) -> "S3Path": | ||
""" | ||
|
@@ -78,9 +86,6 @@ def to_hdfs_path(self) -> str: | |
Converts the S3Path to an HDFS compatible path. | ||
:return: HDFS path | ||
""" | ||
if not self.bucket or not self.path: | ||
raise ValueError("Bucket and path must be defined") | ||
|
||
return f"s3a://{self.bucket}/{self.path}" | ||
|
||
def to_delta_rs_path(self) -> str: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
simplify this regex to just check for
//