Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: url and object splitting for local files #1007

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
and may be changed without notice.
"""

from __future__ import annotations

import datetime
import glob
import itertools
import numbers
import os
import pathlib
import platform
import re
import warnings
Expand Down Expand Up @@ -280,9 +283,10 @@ def regularize_path(path):
_might_be_port = re.compile(r"^[0-9].*")
_remote_schemes = ["ROOT", "S3", "HTTP", "HTTPS"]
_schemes = ["FILE", *_remote_schemes]
_uri_scheme = re.compile("^[a-zA-Z][a-zA-Z0-9+.-]*://")


def file_object_path_split(path):
def file_object_path_split(path: str) -> tuple[str, str | None]:
"""
Split a path with a colon into a file path and an object-in-file path.

Expand All @@ -296,13 +300,19 @@ def file_object_path_split(path):
"""

path: str = regularize_path(path)
# remove whitespace
path = path.strip()

# split url into parts
parsed_url = urlparse(path)
if _uri_scheme.match(path):
parsed_url = urlparse(path)
parts = parsed_url.path.split(":")
else:
# local file path
parts = path.split(":")
if pathlib.PureWindowsPath(path).drive:
# Windows absolute path
assert len(parts) >= 2, f"could not split object from windows path {path}"
parts = [parts[0] + ":" + parts[1]] + parts[2:]

parts = parsed_url.path.split(":")
if len(parts) == 1:
obj = None
elif len(parts) == 2:
Expand All @@ -311,7 +321,7 @@ def file_object_path_split(path):
path = path[: -len(obj) - 1]
obj = obj.strip()
else:
raise ValueError(f"too many colons in file path: {path} for url {parsed_url}")
raise ValueError(f"could not split object from path {path}")

return path, obj

Expand All @@ -333,7 +343,7 @@ def file_path_to_source_class(file_path, options):
if out is not None:
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'handler' is not a class object inheriting from Source: " + repr(out)
f"'handler' is not a class object inheriting from Source: {out!r}"
)
# check if "object_handler" is set
if (
Expand Down Expand Up @@ -374,8 +384,7 @@ def file_path_to_source_class(file_path, options):
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'object_handler' is not a class object inheriting from Source: "
+ repr(out)
f"'object_handler' is not a class object inheriting from Source: {out!r}"
)

return out, file_path
Expand Down
45 changes: 38 additions & 7 deletions tests/test_0976_path_object_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import pathlib


def test_url_split():
for input_url, result in [
@pytest.mark.parametrize(
"input_value, expected_output",
[
(
"https://github.com/scikit-hep/scikit-hep-testdata/raw/v0.4.33/src/skhep_testdata/data/uproot-issue121.root:Events",
(
Expand Down Expand Up @@ -51,11 +52,17 @@ def test_url_split():
(
r"C:\tmp\test\dir\file.root:Dir/Test",
(
# make it work on Windows and Linux
r"C:\tmp\test\dir\file.root",
"Dir/Test",
),
),
(
r"C:\tmp\test\dir\file.root",
(
r"C:\tmp\test\dir\file.root",
None,
),
),
(
"ssh://user@host:port/path/to/file:object",
(
Expand All @@ -77,7 +84,31 @@ def test_url_split():
"object",
),
),
]:
url, obj = uproot._util.file_object_path_split(input_url)
assert url == result[0]
assert obj == result[1]
(
"00376186-543E-E311-8D30-002618943857.root:Events",
(
"00376186-543E-E311-8D30-002618943857.root",
"Events",
),
),
(
"00376186-543E-E311-8D30-002618943857.root",
(
"00376186-543E-E311-8D30-002618943857.root",
None,
),
),
(
"local/file.root://Events",
(
"local/file.root",
"//Events",
),
),
],
)
def test_url_split(input_value, expected_output):
url, obj = uproot._util.file_object_path_split(input_value)
url_expected, obj_expected = expected_output
assert url == url_expected
assert obj == obj_expected