diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 6248c0dd5..c847e607d 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -116,3 +116,66 @@ jobs: - name: Run pytest run: | python -m pytest -vv tests --reruns 10 --reruns-delay 30 --only-rerun "(?i)http|ssl|timeout|expired|connection|socket" + + pyodide-build: + runs-on: ubuntu-latest + timeout-minutes: 30 + env: + PYODIDE_VERSION: 0.26.2 + PYODIDE_BUILD_VERSION: 0.28.0 + AWKWARD_VERSION: v2.6.4 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install pyodide-build + run: python3 -m pip install pyodide-build==$PYODIDE_BUILD_VERSION + + - name: Determine EMSDK version + id: compute-emsdk-version + run: | + pyodide config list + # Save EMSDK version + EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) + echo "emsdk-version=$EMSCRIPTEN_VERSION" >> $GITHUB_OUTPUT + + - name: Install EMSDK + uses: mymindstorm/setup-emsdk@v14 + with: + version: ${{ steps.compute-emsdk-version.outputs.emsdk-version }} + + - name: Build the package + run: pyodide build + + - name: Build an awkward wheel compatible with the awkward-cpp version in pyodide + run: | + git clone --depth 1 --branch $AWKWARD_VERSION https://github.com/scikit-hep/awkward.git dependencies/awkward + pyodide build dependencies/awkward + rm -rf dependencies/ + + - name: Download Pyodide + uses: pyodide/pyodide-actions/download-pyodide@v1 + with: + version: ${{ env.PYODIDE_VERSION }} + to: pyodide-dist + + - name: Install browser + uses: pyodide/pyodide-actions/install-browser@v1 + with: + runner: selenium + browser: chrome + browser-version: latest + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install dependencies + run: pip install .[test-pyodide] pyodide-py==$PYODIDE_VERSION + + - name: Run pytest + run: | + pytest -vv --dist-dir=./pyodide-dist/ --runner=selenium --runtime=chrome tests-wasm diff --git a/.gitignore b/.gitignore index 0aceed8b2..777961eb1 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,10 @@ dmypy.json # Pyre type checker .pyre/ + +# Local copies of skhep_testdata files +skhep_testdata/ + +# Pyodide +.pyodide* +dist-pyodide/ diff --git a/pyproject.toml b/pyproject.toml index 0a5b74418..08cda684e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,12 @@ test = [ "scikit-hep-testdata", "rangehttpserver" ] +test-pyodide = [ + "pytest>=6", + "pytest-pyodide", + "pytest-timeout", + "scikit-hep-testdata" +] xrootd = ["fsspec-xrootd"] [project.urls] diff --git a/src/uproot/_util.py b/src/uproot/_util.py index 11c1d5bc4..96e9e8d21 100644 --- a/src/uproot/_util.py +++ b/src/uproot/_util.py @@ -13,6 +13,7 @@ import numbers import os import re +import sys import warnings from collections.abc import Iterable from pathlib import Path @@ -27,6 +28,8 @@ import uproot.source.fsspec import uproot.source.object +wasm = sys.platform in ("emscripten", "wasi") + def tobytes(array): """ diff --git a/src/uproot/source/coalesce.py b/src/uproot/source/coalesce.py index 52cad9289..ab3ada9b7 100644 --- a/src/uproot/source/coalesce.py +++ b/src/uproot/source/coalesce.py @@ -33,6 +33,9 @@ def add_done_callback(self, callback, *, context=None): self._parent.add_done_callback(callback) def result(self, timeout=None): + if uproot._util.wasm: + # Pyodide futures don't support timeout + return self._parent.result()[self._s] return self._parent.result(timeout=timeout)[self._s] @@ -126,7 +129,13 @@ def coalesce_requests( def chunkify(req: RangeRequest): chunk = uproot.source.chunk.Chunk(source, req.start, req.stop, req.future) - req.future.add_done_callback(uproot.source.chunk.notifier(chunk, notifications)) + if uproot._util.wasm: + # Callbacks don't work in pyodide yet, so we call the notifier directly + uproot.source.chunk.notifier(chunk, notifications)() + else: + req.future.add_done_callback( + uproot.source.chunk.notifier(chunk, notifications) + ) return chunk return list(map(chunkify, all_requests)) diff --git a/src/uproot/source/fsspec.py b/src/uproot/source/fsspec.py index 588b6b31d..6f47890ba 100644 --- a/src/uproot/source/fsspec.py +++ b/src/uproot/source/fsspec.py @@ -164,6 +164,12 @@ def submit(request_ranges: list[tuple[int, int]]): self._fs.cat_ranges, paths=paths, starts=starts, ends=ends ) ) + if uproot._util.wasm: + # Threads can't be spawned in pyodide yet, so we run the function directly + # and return a future that is already resolved. + return uproot.source.futures.TrivialFuture( + self._fs.cat_ranges(paths=paths, starts=starts, ends=ends) + ) return self._executor.submit(coroutine) return coalesce_requests( diff --git a/src/uproot/source/http.py b/src/uproot/source/http.py index 2f0b64a03..e3fb1e617 100644 --- a/src/uproot/source/http.py +++ b/src/uproot/source/http.py @@ -239,6 +239,23 @@ def future(source: uproot.source.chunk.Source, start: int, stop: int): Returns a :doc:`uproot.source.futures.ResourceFuture` that calls :ref:`uproot.source.http.HTTPResource.get` with ``start`` and ``stop``. """ + # The default implementation doesn't work in Pyodide + if uproot._util.wasm: + + def task(resource): + import requests + + r = requests.get( + source._file_path, + headers=dict( + {"Range": f"bytes={start}-{stop - 1}"}, **source.auth_headers + ), + timeout=source.timeout, + ) + return r.content + + return uproot.source.futures.ResourceFuture(task) + connection = make_connection(source.parsed_url, source.timeout) connection.request( "GET", @@ -281,6 +298,14 @@ def multifuture( ``results`` and ``futures``. Subsequent attempts would immediately use the :ref:`uproot.source.http.HTTPSource.fallback`. """ + # The default implementation doesn't work in Pyodide + if uproot._util.wasm: + + def task(resource): + resource.handle_no_multipart(source, ranges, futures, results) + + return uproot.source.futures.ResourceFuture(task) + connection = make_connection(source.parsed_url, source.timeout) connection.request( diff --git a/tests-wasm/__init__.py b/tests-wasm/__init__.py new file mode 100644 index 000000000..f8c1f9969 --- /dev/null +++ b/tests-wasm/__init__.py @@ -0,0 +1 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE diff --git a/tests-wasm/test_1272_basic_functionality.py b/tests-wasm/test_1272_basic_functionality.py new file mode 100644 index 000000000..68d14e575 --- /dev/null +++ b/tests-wasm/test_1272_basic_functionality.py @@ -0,0 +1,124 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE +from __future__ import annotations + +import pytest +from utils import run_test_in_pyodide + + +# Taken from test_0034_generic_objects_in_ttrees.py +@run_test_in_pyodide(test_file="uproot-HZZ-objects.root", packages=["pytest", "xxhash"]) +def test_read_ttree(selenium): + import pytest + + import uproot + + awkward = pytest.importorskip("awkward") + + with uproot.open("uproot-HZZ-objects.root")["events"] as tree: + result = tree["muonp4"].array(library="ak") + + assert ( + str(awkward.type(result)) + == "2421 * var * TLorentzVector[fP: TVector3[fX: float64, " + "fY: float64, fZ: float64], fE: float64]" + ) + + assert result[0, 0, "fE"] == 54.77949905395508 + assert result[0, 0, "fP", "fX"] == -52.89945602416992 + assert result[0, 0, "fP", "fY"] == -11.654671669006348 + assert result[0, 0, "fP", "fZ"] == -8.16079330444336 + + +# Taken from test_0406_write_a_tree.py +@run_test_in_pyodide() +def test_write_ttree(selenium): + import numpy as np + + import uproot + + newfile = "newfile.root" + + b1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + b2 = [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] + + with uproot.recreate(newfile, compression=None) as fout: + tree = fout.mktree("t", {"b1": np.int32, "b2": np.float64}, "title") + + assert tree._cascading._basket_capacity == 10 + + for _ in range(5): + fout["t"].extend({"b1": b1, "b2": b2}) + + assert tree._cascading._basket_capacity == 10 + + for _ in range(10): + fout["t"].extend({"b1": b1, "b2": b2}) + + assert tree._cascading._basket_capacity == 100 + + for _ in range(90): + fout["t"].extend({"b1": b1, "b2": b2}) + + assert tree._cascading._basket_capacity == 1000 + + with uproot.open(newfile) as fin: + assert fin.keys() == ["t;1"] # same cycle number + t2 = fin["t"] + assert t2.num_entries == len(b1) * 105 + assert t2["b1"].array(library="np").tolist() == b1 * 105 + assert t2["b2"].array(library="np").tolist() == b2 * 105 + + +# Taken from test_1191_rntuple_fixes.py +@run_test_in_pyodide(test_file="test_ntuple_extension_columns.root") +def test_read_rntuple(selenium): + import uproot + + with uproot.open("test_ntuple_extension_columns.root") as f: + obj = f["EventData"] + + assert len(obj.column_records) > len(obj.header.column_records) + assert len(obj.column_records) == 936 + assert obj.column_records[903].first_ele_index == 36 + + arrays = obj.arrays() + + pbs = arrays[ + "HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux::fastDIPS20211215_pb" + ] + assert len(pbs) == 40 + assert all(len(a) == 0 for a in pbs[:36]) + assert next(i for i, a in enumerate(pbs) if len(a) != 0) == 36 + + jets = arrays["HLT_AntiKt4EMPFlowJets_subresjesgscIS_ftf_TLAAux:"] + assert len(jets.pt) == len(pbs) + + +# Taken from test_0088_read_with_http.py +@pytest.mark.network +@run_test_in_pyodide(packages=["requests"]) +def test_read_ttree_http(selenium): + import uproot + + with uproot.open( + "http://starterkit.web.cern.ch/starterkit/data/advanced-python-2019/dalitzdata.root", + handler=uproot.source.http.HTTPSource, + ) as f: + data = f["tree"].arrays(["Y1", "Y2"], library="np") + assert len(data["Y1"]) == 100000 + assert len(data["Y2"]) == 100000 + + +# Taken from test_1191_rntuple_fixes.py +@pytest.mark.network +@run_test_in_pyodide(packages=["requests"]) +def test_read_rntuple_http(selenium): + import uproot + + with uproot.open( + "https://github.com/scikit-hep/scikit-hep-testdata/raw/main/src/skhep_testdata/data/Run2012BC_DoubleMuParked_Muons_rntuple_1000evts.root", + handler=uproot.source.http.HTTPSource, + ) as f: + obj = f["Events"] + arrays = obj.arrays() + assert arrays["nMuon"].tolist() == [len(a) for a in arrays["Muon_pt"]] diff --git a/tests-wasm/utils.py b/tests-wasm/utils.py new file mode 100644 index 000000000..a703b13e2 --- /dev/null +++ b/tests-wasm/utils.py @@ -0,0 +1,63 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE +from __future__ import annotations + +import os +import pathlib +import shutil +from functools import wraps + +import pytest +import skhep_testdata + +try: + import pytest_pyodide + from pytest_pyodide import run_in_pyodide + from pytest_pyodide.decorator import copy_files_to_pyodide +except ImportError: + pytest.skip("Pyodide is not available", allow_module_level=True) + +# Disable CORS so that we can fetch files for http tests +# Currently, this can only be done for Chrome +selenium_config = pytest_pyodide.config.get_global_config() +selenium_config.set_flags( + "chrome", + [ + *selenium_config.get_flags("chrome"), + "--disable-web-security", + "--disable-site-isolation-trials", + ], +) + + +# copy skhep_testdata files to testdata directory (needed for @copy_files_to_pyodide) +def ensure_testdata(filename): + if not pathlib.Path("skhep_testdata/" + filename).is_file(): + filepath = skhep_testdata.data_path(filename) + os.makedirs("skhep_testdata", exist_ok=True) + shutil.copyfile(filepath, "skhep_testdata/" + filename) + + +def run_test_in_pyodide(test_file=None, **kwargs): + def decorator(test_func): + @wraps(test_func) + def wrapper(selenium): + if test_file is not None: + ensure_testdata(test_file) + + @copy_files_to_pyodide( + file_list=[("dist", "dist")] + + ( + [] + if test_file is None + else [("skhep_testdata/" + test_file, test_file)] + ), + install_wheels=True, + ) + def inner_func(selenium): + run_in_pyodide(**kwargs)(test_func)(selenium) + + return inner_func(selenium) + + return wrapper + + return decorator diff --git a/tests/test_1191_rntuple_fixes.py b/tests/test_1191_rntuple_fixes.py index 7b64e45d2..a1e259310 100644 --- a/tests/test_1191_rntuple_fixes.py +++ b/tests/test_1191_rntuple_fixes.py @@ -41,7 +41,7 @@ def test_rntuple_cardinality(): def test_skip_recursively_empty_structs(): filename = skhep_testdata.data_path("DAOD_TRUTH3_RC2.root") with uproot.open(filename) as f: - obj = uproot.open(filename)["RNT:CollectionTree"] + obj = f["RNT:CollectionTree"] arrays = obj.arrays() jets = arrays["AntiKt4TruthDressedWZJetsAux:"] assert len(jets[0].pt) == 5