Skip to content

Commit

Permalink
test: cover more fsspec backends (#1015)
Browse files Browse the repository at this point in the history
* use paramiko instead of sshfs

* use specified port

* test default handler behaviour

* default to fsspec instead of error if scheme not found

* attempt to close socket

* fix ci

* Revert "fix ci"

This reverts commit e56e337.

* broader exception

* also handle socket exception

* get user robust

* enable github test with skip if api limit is hit (so we sometimes test it, but it never fails due to api limits). TODO: update exception class

* add memory filesystem test

* add zip and tar tests

* fix memory test

* zip/tar tests

* rename to reading

* check if test works

* missing import

* remove parent dirs

* fix zip tar tests

* skip github if api limits hit

* attempt to fix windows paths

* use more complex uri with object in zip test

* debug

* add new test case to object url split

* add new failing test case: TODO make it work

* revert debug changes

* working in new test case
  • Loading branch information
lobis authored Nov 7, 2023
1 parent e458411 commit 98b1d48
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 13 deletions.
15 changes: 12 additions & 3 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,17 +311,26 @@ def file_object_path_split(path: str) -> tuple[str, str | None]:
path: str = regularize_path(path)
path = path.strip()

if "://" not in path:
# assume it's a local file path
def _split_path(path: str) -> list[str]:
parts = path.split(":")
if pathlib.PureWindowsPath(path).drive:
# Windows absolute path
assert len(parts) >= 2, f"could not split object from windows path {path}"
parts = [parts[0] + ":" + parts[1]] + parts[2:]
return parts

if "://" not in path:
# assume it's a local file path
parts = _split_path(path)
elif _uri_scheme.match(path):
# if not a local path, attempt to match a URI scheme
parsed_url = urlparse(path)
parts = parsed_url.path.split(":")
parsed_url_path = parsed_url.path
if parsed_url_path.startswith("//"):
# This can be a leftover from url chaining in fsspec
# TODO: replace this with str.removeprefix once Python 3.8 is dropped
parsed_url_path = parsed_url_path[2:]
parts = _split_path(parsed_url_path)
else:
# invalid scheme
scheme = path.split("://")[0]
Expand Down
84 changes: 74 additions & 10 deletions tests/test_0692_fsspec.py → tests/test_0692_fsspec_reading.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE

import pytest
import requests

import uproot
import uproot.source.fsspec

import skhep_testdata
import queue
import subprocess
import fsspec
import os


def test_open_fsspec_http(server):
Expand All @@ -22,16 +25,18 @@ def test_open_fsspec_http(server):


@pytest.mark.network
@pytest.mark.skip(
reason="skipping due to GitHub API rate limitations - this should work fine - see https://github.com/scikit-hep/uproot5/pull/973 for details"
)
def test_open_fsspec_github():
with uproot.open(
"github://scikit-hep:[email protected]/src/skhep_testdata/data/uproot-issue121.root",
handler=uproot.source.fsspec.FSSpecSource,
) as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40
try:
with uproot.open(
"github://scikit-hep:[email protected]/src/skhep_testdata/data/uproot-issue121.root"
) as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40
except requests.exceptions.HTTPError as e:
if e.response.status_code == 403:
pytest.skip("GitHub API limit has been reached")
else:
raise e


def test_open_fsspec_local():
Expand Down Expand Up @@ -140,3 +145,62 @@ def test_fsspec_chunks(server):

chunk_data_sum = {sum(chunk.raw_data) for chunk in chunks}
assert chunk_data_sum == {3967, 413, 10985}, "Chunk data does not match"


def test_fsspec_memory():
# read the file into memory
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

# create a memory filesystem
fs = fsspec.filesystem(protocol="memory")
fs.store.clear()
file_path = "skhep_testdata/uproot-issue121.root"
fs.touch(file_path)
# write contents into memory filesystem
with fs.open(file_path, "wb") as f:
f.write(contents)

# read from memory filesystem
with uproot.open(f"memory://{file_path}") as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40


def test_fsspec_tar(tmp_path):
import tarfile
import io

filename = "uproot-issue121.root"
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

filename_tar = os.path.join(tmp_path, filename + ".tar")
with tarfile.open(filename_tar, mode="w") as tar:
file_info = tarfile.TarInfo(name=filename)
file_info.size = len(contents)
tar.addfile(file_info, fileobj=io.BytesIO(contents))

# open with fsspec
with uproot.open(f"tar://{filename}::file://{filename_tar}") as f:
data = f["Events/MET_pt"].array(library="np")
assert len(data) == 40


def test_fsspec_zip(tmp_path):
import zipfile

filename = "uproot-issue121.root"
with open(skhep_testdata.data_path("uproot-issue121.root"), "rb") as f:
contents = f.read()

filename_zip = os.path.join(tmp_path, filename + ".zip")
with zipfile.ZipFile(filename_zip, mode="w") as zip_file:
zip_file.writestr(filename, data=contents)

# open with fsspec
with uproot.open(
f"zip://{filename}::file://{filename_zip}:Events/MET_pt"
) as branch:
data = branch.array(library="np")
assert len(data) == 40
14 changes: 14 additions & 0 deletions tests/test_0976_path_object_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,20 @@
None,
),
),
(
"zip://uproot-issue121.root::file:///tmp/pytest-of-runner/pytest-0/test_fsspec_zip0/uproot-issue121.root.zip:Events/MET_pt",
(
"zip://uproot-issue121.root::file:///tmp/pytest-of-runner/pytest-0/test_fsspec_zip0/uproot-issue121.root.zip",
"Events/MET_pt",
),
),
(
r"zip://uproot-issue121.root::file://C:\Users\runneradmin\AppData\Local\Temp\pytest-of-runneradmin\pytest-0\test_fsspec_zip0\uproot-issue121.root.zip:Events/MET_pt",
(
r"zip://uproot-issue121.root::file://C:\Users\runneradmin\AppData\Local\Temp\pytest-of-runneradmin\pytest-0\test_fsspec_zip0\uproot-issue121.root.zip",
"Events/MET_pt",
),
),
],
)
def test_url_split(input_value, expected_output):
Expand Down

0 comments on commit 98b1d48

Please sign in to comment.