Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type-check Pandas code and require newer pyarrow. #201

Merged
merged 1 commit into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ dependencies = [
"numpy>=1.21",
"pandas",
"pyarrow",
# TODO: once we no longer support Python 3.7, remove this and pin to pyarrow >= 14.0.1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following on from my review comment: let's keep pyarrow-hotfix and update the reason-why-it-is-needed comment (which has gone stale)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyarrow-hotfix has been restored.

# https://github.com/single-cell-data/TileDB-SOMA/issues/1926
# TODO: pyarrow >= 14.0.1 doesn't play well with some other PyPI packages
# on Mac OS: https://github.com/apache/arrow/issues/42154
# Remove this once we can specify a recent pyarrow.
"pyarrow-hotfix",
"scipy",
"typing-extensions>=4.1", # For LiteralString
"typing-extensions>=4.1", # For LiteralString (py3.11)
]
requires-python = "~=3.8"
urls = { repository = "https://github.com/single-cell-data/SOMA.git" }
Expand Down Expand Up @@ -59,5 +60,5 @@ warn_redundant_casts = true

[[tool.mypy.overrides]]
# These dependencies do not currently have canonical type stubs.
module = ["anndata", "numba", "pandas", "pyarrow", "pyarrow_hotfix", "scipy"]
module = ["anndata", "pyarrow", "pyarrow_hotfix", "scipy"]
ignore_missing_imports = true
26 changes: 13 additions & 13 deletions python-spec/requirements-py3.10.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
anndata==0.10.5.post1
array_api_compat==1.4.1
anndata==0.10.7
array_api_compat==1.7.1
attrs==23.2.0
exceptiongroup==1.2.0
h5py==3.10.0
llvmlite==0.42.0
exceptiongroup==1.2.1
h5py==3.11.0
llvmlite==0.43.0
natsort==8.4.0
numba==0.59.0
numpy==1.26.4
packaging==23.2
pandas==2.2.0
pyarrow==15.0.0
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
pytz==2024.1
scipy==1.12.0
scipy==1.13.1
six==1.16.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
24 changes: 12 additions & 12 deletions python-spec/requirements-py3.11.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
anndata==0.10.5.post1
array_api_compat==1.4.1
anndata==0.10.7
array_api_compat==1.7.1
attrs==23.2.0
h5py==3.10.0
llvmlite==0.42.0
h5py==3.11.0
llvmlite==0.43.0
natsort==8.4.0
numba==0.59.0
numpy==1.26.4
packaging==23.2
pandas==2.2.0
pyarrow==15.0.0
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
pytz==2024.1
scipy==1.12.0
scipy==1.13.1
six==1.16.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
28 changes: 14 additions & 14 deletions python-spec/requirements-py3.12.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
anndata==0.10.5.post1
array_api_compat==1.4.1
anndata==0.10.7
array_api_compat==1.7.1
attrs==23.2.0
h5py==3.10.0
llvmlite==0.42.0
h5py==3.11.0
llvmlite==0.43.0
natsort==8.4.0
numba==0.59.0
numpy==1.26.4
packaging==23.2
pandas==2.2.0
pyarrow==15.0.0
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
pytz==2024.1
scipy==1.12.0
setuptools==69.0.3
scipy==1.13.1
setuptools==70.0.0
six==1.16.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
wheel==0.42.0
wheel==0.43.0
18 changes: 0 additions & 18 deletions python-spec/requirements-py3.7.txt

This file was deleted.

32 changes: 17 additions & 15 deletions python-spec/requirements-py3.8-lint.txt
Original file line number Diff line number Diff line change
@@ -1,36 +1,38 @@
anndata==0.9.2
attrs==23.2.0
black==24.2.0
black==24.4.2
cfgv==3.4.0
click==8.1.7
distlib==0.3.8
filelock==3.13.1
h5py==3.10.0
identify==2.5.34
importlib-metadata==7.0.1
filelock==3.15.1
h5py==3.11.0
identify==2.5.36
importlib_metadata==7.1.0
isort==5.13.2
llvmlite==0.41.1
mypy==1.8.0
mypy==1.10.0
mypy-extensions==1.0.0
natsort==8.4.0
nodeenv==1.8.0
nodeenv==1.9.1
numba==0.58.1
numpy==1.24.4
packaging==23.2
packaging==24.1
pandas==2.0.3
pandas-stubs==2.0.3.230814
pathspec==0.12.1
platformdirs==4.2.0
platformdirs==4.2.2
pre-commit==3.5.0
pyarrow==15.0.0
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
ruff==0.2.1
ruff==0.4.9
scipy==1.10.1
six==1.16.0
tomli==2.0.1
typing_extensions==4.9.0
types-pytz==2024.1.0.20240417
typing_extensions==4.12.2
tzdata==2024.1
virtualenv==20.25.0
zipp==3.17.0
virtualenv==20.26.2
zipp==3.19.2
14 changes: 7 additions & 7 deletions python-spec/requirements-py3.8.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
anndata==0.9.2
attrs==23.2.0
h5py==3.10.0
importlib-metadata==7.0.1
h5py==3.11.0
importlib_metadata==7.1.0
llvmlite==0.41.1
natsort==8.4.0
numba==0.58.1
numpy==1.24.4
packaging==23.2
packaging==24.1
pandas==2.0.3
pyarrow==15.0.0
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
python-dateutil==2.9.0.post0
pytz==2024.1
scipy==1.10.1
six==1.16.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
zipp==3.17.0
zipp==3.19.2
24 changes: 12 additions & 12 deletions python-spec/requirements-py3.9.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
anndata==0.10.5.post1
array_api_compat==1.4.1
anndata==0.10.7
array_api_compat==1.7.1
attrs==23.2.0
docutils==0.16
exceptiongroup==1.2.0
h5py==3.10.0
exceptiongroup==1.2.1
h5py==3.11.0
jmespath==1.0.1
llvmlite==0.42.0
llvmlite==0.43.0
natsort==8.4.0
numba==0.59.0
numpy==1.26.4
packaging==23.2
pandas==2.2.0
pyarrow==15.0.0
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pyarrow==16.1.0
pyarrow-hotfix==0.6
python-dateutil==2.8.2
pytz==2024.1
rsa==4.7.2
s3transfer==0.6.0
scipy==1.12.0
scipy==1.13.1
six==1.16.0
typing_extensions==4.9.0
typing_extensions==4.12.2
tzdata==2024.1
8 changes: 4 additions & 4 deletions python-spec/src/somacore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

from typing import Tuple, Union

# TODO: once we no longer support Python 3.7, remove this and pin to pyarrow >= 14.0.1
# https://github.com/single-cell-data/TileDB-SOMA/issues/1926
# ruff: noqa
import pyarrow_hotfix
# TODO: pyarrow >= 14.0.1 doesn't play well with some other PyPI packages
# on Mac OS: https://github.com/apache/arrow/issues/42154
# Remove this once we can pin to recent pyarrow.
import pyarrow_hotfix # noqa: F401

from .base import SOMAObject
from .collection import Collection
Expand Down
7 changes: 6 additions & 1 deletion python-spec/src/somacore/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from concurrent import futures
from typing import (
Any,
Callable,
Dict,
Generic,
Mapping,
Expand All @@ -10,6 +11,7 @@
Tuple,
TypeVar,
Union,
cast,
overload,
)

Expand Down Expand Up @@ -549,7 +551,10 @@ def _axism_inner(
def _convert_to_ndarray(
self, axis: "_Axis", table: pa.Table, n_row: int, n_col: int
) -> np.ndarray:
indexer: pd.Index = axis.getattr_from(self.indexer, pre="by_")
indexer = cast(
Callable[[_Numpyable], npt.NDArray[np.intp]],
axis.getattr_from(self.indexer, pre="by_"),
)
idx = indexer(table["soma_dim_0"])
z = np.zeros(n_row * n_col, dtype=np.float32)
np.put(z, idx * n_col + table["soma_dim_1"], table["soma_data"])
Expand Down
Loading