Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add typesafety checks #212

Merged
merged 16 commits into from
Jun 15, 2024
Merged
18 changes: 18 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -49,6 +49,24 @@ jobs:
- name: Run tests
run: nox -s tests-${{ matrix.fsspec || matrix.pyv }} -- --cov-report=xml

typesafety:
runs-on: ubuntu-latest

steps:
- name: Check out the repository
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.pyv }}
uses: actions/setup-python@v4
with:
python-version: '3.8'

- name: Install nox
run: python -m pip install --upgrade nox

- name: Run typesafety checks
run: nox -s typesafety

lint:
runs-on: ubuntu-latest

51 changes: 47 additions & 4 deletions dev/generate_flavours.py
Original file line number Diff line number Diff line change
@@ -43,6 +43,7 @@
import logging
import re
from typing import Any
from typing import Literal
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import urlsplit
@@ -67,6 +68,22 @@
class FileSystemFlavourBase:
"""base class for the fsspec flavours"""
protocol: str | tuple[str, ...]
root_marker: Literal["/", ""]
sep: Literal["/"]
@classmethod
def _strip_protocol(cls, path):
raise NotImplementedError
@staticmethod
def _get_kwargs_from_urls(path):
raise NotImplementedError
@classmethod
def _parent(cls, path):
raise NotImplementedError
def __init_subclass__(cls: Any, **kwargs):
if isinstance(cls.protocol, str):
protocols = (cls.protocol,)
@@ -99,12 +116,27 @@ def __init_subclass__(cls: Any, **kwargs):
}


def _fix_abstract_file_system(x: str) -> str:
x = re.sub(
"protocol = 'abstract'", "protocol: str | tuple[str, ...] = 'abstract'", x
)
x = re.sub("root_marker = ''", "root_marker: Literal['', '/'] = ''", x)
x = re.sub("sep = '/'", "sep: Literal['/'] = '/'", x)
return x


def _fix_azure_blob_file_system(x: str) -> str:
return re.sub(
r"host = ops.get\(\"host\", None\)",
'host: str | None = ops.get("host", None)',
x = re.sub(
r"if isinstance\(path, list\):",
"if isinstance(path, list): # type: ignore[unreachable]",
x,
)
x = re.sub(
r"(return \[.*\])",
r"\1 # type: ignore[unreachable]",
x,
)
return x


def _fix_memfs_file_system(x: str) -> str:
@@ -115,6 +147,15 @@ def _fix_memfs_file_system(x: str) -> str:
)


def _fix_oss_file_system(x: str) -> str:
x = re.sub(
r"path_string: str = stringify_path\(path\)",
"path_string = stringify_path(path)",
x,
)
return x


def _fix_xrootd_file_system(x: str) -> str:
x = re.sub(
r"client.URL",
@@ -129,8 +170,10 @@ def _fix_xrootd_file_system(x: str) -> str:


FIX_SOURCE = {
"AbstractFileSystem": _fix_abstract_file_system,
"AzureBlobFileSystem": _fix_azure_blob_file_system,
"MemFS": _fix_memfs_file_system,
"OSSFileSystem": _fix_oss_file_system,
"XRootDFileSystem": _fix_xrootd_file_system,
}

@@ -303,7 +346,7 @@ def create_source() -> str:
AbstractFileSystem,
["_strip_protocol", "_get_kwargs_from_urls", "_parent"],
{},
["protocol", "root_marker"],
["protocol", "root_marker", "sep"],
cls_suffix=BASE_CLASS_NAME_SUFFIX,
base_cls="FileSystemFlavourBase",
)
18 changes: 18 additions & 0 deletions dev/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
fsspec[git,hdfs,dask,http,sftp,smb]==2024.2.0

# these dependencies define their own filesystems
adlfs==2024.2.0
boxfs==0.2.1
dropboxdrivefs==1.3.1
gcsfs==2024.2.0
s3fs==2024.2.0
ocifs==1.3.1
webdav4[fsspec]==0.9.8
# gfrivefs @ git+https://github.com/fsspec/gdrivefs@master broken ...
morefs[asynclocalfs]==0.2.0
dvc==3.47.0
huggingface_hub==0.20.3
lakefs-spec==0.7.0
ossfs==2023.12.0
fsspec-xrootd==0.2.4
wandbfs==0.0.2
16 changes: 16 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,22 @@ def type_checking(session):
session.run("python", "-m", "mypy")


@nox.session
def typesafety(session):
session.install("-e", ".[tests]")
session.run(
"python",
"-m",
"pytest",
"-v",
"-p",
"pytest-mypy-plugins",
"--mypy-pyproject-toml-file",
"pyproject.toml",
"typesafety",
)


@nox.session()
def smoke(session):
print("please tun `nox -s tests` instead")
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -37,7 +37,7 @@ force_single_line = true
line_length = 88

[tool.pytest.ini_options]
addopts = "-ra -m 'not hdfs'"
addopts = "-ra -m 'not hdfs' -p no:pytest-mypy-plugins"
markers = [
"hdfs: mark test as hdfs",
"pathlib: mark cpython pathlib tests",
@@ -61,7 +61,7 @@ exclude_lines = [

[tool.mypy]
# Error output
show_column_numbers = true
show_column_numbers = false
show_error_codes = true
show_error_context = true
show_traceback = true
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ python_requires = >=3.8
zip_safe = False
packages = find:
install_requires=
fsspec>=2022.1.0
fsspec >=2022.1.0,!=2024.3.1

[options.extras_require]
tests =
@@ -33,7 +33,8 @@ tests =
pytest-cov==4.1.0
pytest-mock==3.12.0
pylint==2.17.4
mypy==1.8.0
mypy==1.10.0
pytest-mypy-plugins==3.1.2
packaging
dev =
%(tests)s
567 changes: 567 additions & 0 deletions typesafety/test_upath_interface.yml

Large diffs are not rendered by default.

18 changes: 2 additions & 16 deletions upath/_compat.py
Original file line number Diff line number Diff line change
@@ -304,21 +304,6 @@ def parts(self):
else:
return tuple(self._tail)

def joinpath(self, *pathsegments):
return self.with_segments(self, *pathsegments)

def __truediv__(self, key):
try:
return self.joinpath(key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
except TypeError:
return NotImplemented

@property
def parent(self):
drv = self.drive
@@ -490,7 +475,8 @@ def mv(self, path, target, recursive=False, maxdepth=None, **kwargs):
)


F = TypeVar("F")
RT = TypeVar("RT")
F = Callable[..., RT]


def deprecated(*, python_version: tuple[int, ...]) -> Callable[[F], F]:
46 changes: 22 additions & 24 deletions upath/_flavour.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
TypeAlias = Any

from fsspec.registry import known_implementations
from fsspec.registry import registry as class_registry
from fsspec.registry import registry as _class_registry
from fsspec.spec import AbstractFileSystem

from upath._compat import deprecated
@@ -40,14 +40,14 @@
"upath_get_kwargs_from_url",
]

class_registry: Mapping[str, type[AbstractFileSystem]]
class_registry: Mapping[str, type[AbstractFileSystem]] = _class_registry
PathOrStr: TypeAlias = Union[str, "os.PathLike[str]"]


class AnyProtocolFileSystemFlavour(FileSystemFlavourBase):
sep: str = "/"
protocol: tuple[str, ...] = ()
root_marker: str = "/"
sep = "/"
protocol = ()
root_marker = "/"

@classmethod
def _strip_protocol(cls, path: str) -> str:
@@ -168,10 +168,11 @@ def from_protocol(
) -> WrappedFileSystemFlavour:
"""return the fsspec flavour for the given protocol"""

_c = cls.protocol_config
config = {
key: True
for key, protocols in cls.protocol_config.items()
if protocol in protocols
"netloc_is_anchor": protocol in _c["netloc_is_anchor"],
"supports_empty_parts": protocol in _c["supports_empty_parts"],
"meaningful_trailing_slash": protocol in _c["meaningful_trailing_slash"],
}

# first try to get an already imported fsspec filesystem class
@@ -227,16 +228,12 @@ def stringify_path(pth: PathOrStr) -> str:
out = pth.__fspath__()
elif isinstance(pth, os.PathLike):
out = str(pth)
elif hasattr(pth, "path"):
elif hasattr(pth, "path"): # type: ignore[unreachable]
out = pth.path
else:
out = str(pth)
return normalize_empty_netloc(out)

def empty_part_join(self, path: str, *paths: str) -> str:
sep = self.sep
return sep.join([str_remove_suffix(path, sep), *paths])

def strip_protocol(self, pth: PathOrStr) -> str:
pth = self.stringify_path(pth)
return self._spec._strip_protocol(pth)
@@ -270,21 +267,21 @@ def isabs(self, path: PathOrStr) -> bool:
return path.startswith(self.root_marker)

def join(self, path: PathOrStr, *paths: PathOrStr) -> str:
if self.supports_empty_parts:
_join = self.empty_part_join
else:
_join = posixpath.join
if self.netloc_is_anchor:
drv, p0 = self.splitdrive(path)
pN = list(map(self.stringify_path, paths))
if not drv and not p0:
path, *pN = pN
drv, p0 = self.splitdrive(path)
return drv + _join(p0 or self.sep, *pN)
p0 = p0 or self.sep
else:
p0 = str(self.strip_protocol(path))
pN = map(self.stringify_path, paths)
return _join(p0, *pN)
pN = list(map(self.stringify_path, paths))
drv = ""
if self.supports_empty_parts:
return drv + self.sep.join([str_remove_suffix(p0, self.sep), *pN])
else:
return drv + posixpath.join(p0, *pN)

def split(self, path: PathOrStr):
stripped_path = self.strip_protocol(path)
@@ -385,20 +382,21 @@ class LazyFlavourDescriptor:
"""descriptor to lazily get the flavour for a given protocol"""

def __init__(self) -> None:
self._owner = None
self._owner: type[UPath] | None = None

def __set_name__(self, owner: type[UPath], name: str) -> None:
# helper to provide a more informative repr
self._owner = owner
self._default_protocol: str | None
try:
self._default_protocol = self._owner.protocols[0]
self._default_protocol = self._owner.protocols[0] # type: ignore
except (AttributeError, IndexError):
self._default_protocol = None

def __get__(self, instance: UPath, owner: type[UPath]) -> WrappedFileSystemFlavour:
if instance is not None:
return WrappedFileSystemFlavour.from_protocol(instance.protocol)
elif self._default_protocol:
elif self._default_protocol: # type: ignore
return WrappedFileSystemFlavour.from_protocol(self._default_protocol)
else:
return default_flavour
@@ -465,7 +463,7 @@ def upath_urijoin(base: str, uri: str) -> str:
segments = base_parts + us.path.split("/")
segments[1:-1] = filter(None, segments[1:-1])

resolved_path = []
resolved_path: list[str] = []

for seg in segments:
if seg == "..":
31 changes: 25 additions & 6 deletions upath/_flavour_sources.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
import logging
import re
from typing import Any
from typing import Literal
from typing import cast
from urllib.parse import parse_qs
from urllib.parse import urlsplit
@@ -54,7 +55,24 @@
class FileSystemFlavourBase:
"""base class for the fsspec flavours"""

protocol: str | tuple[str, ...]
root_marker: Literal["/", ""]
sep: Literal["/"]

@classmethod
def _strip_protocol(cls, path):
raise NotImplementedError

@staticmethod
def _get_kwargs_from_urls(path):
raise NotImplementedError

@classmethod
def _parent(cls, path):
raise NotImplementedError

def __init_subclass__(cls: Any, **kwargs):
protocols: tuple[str, ...]
if isinstance(cls.protocol, str):
protocols = (cls.protocol,)
else:
@@ -68,8 +86,9 @@ def __init_subclass__(cls: Any, **kwargs):
class AbstractFileSystemFlavour(FileSystemFlavourBase):
__orig_class__ = 'fsspec.spec.AbstractFileSystem'
__orig_version__ = '2024.2.0'
protocol = 'abstract'
root_marker = ''
protocol: str | tuple[str, ...] = 'abstract'
root_marker: Literal['', '/'] = ''
sep: Literal['/'] = '/'

@classmethod
def _strip_protocol(cls, path):
@@ -164,8 +183,8 @@ def _strip_protocol(cls, path: str):
str
Returns a path without the protocol
"""
if isinstance(path, list):
return [cls._strip_protocol(p) for p in path]
if isinstance(path, list): # type: ignore[unreachable]
return [cls._strip_protocol(p) for p in path] # type: ignore[unreachable]

STORE_SUFFIX = ".dfs.core.windows.net"
logger.debug(f"_strip_protocol for {path}")
@@ -197,7 +216,7 @@ def _get_kwargs_from_urls(urlpath):
"""Get the account_name from the urlpath and pass to storage_options"""
ops = infer_storage_options(urlpath)
out = {}
host: str | None = ops.get("host", None)
host = ops.get("host", None)
if host:
match = re.match(
r"(?P<account_name>.+)\.(dfs|blob)\.core\.windows\.net", host
@@ -675,7 +694,7 @@ def _strip_protocol(cls, path):
"""
if isinstance(path, list):
return [cls._strip_protocol(p) for p in path]
path_string: str = stringify_path(path)
path_string = stringify_path(path)
if path_string.startswith("oss://"):
path_string = path_string[5:]

4 changes: 2 additions & 2 deletions upath/_stat.py
Original file line number Diff line number Diff line change
@@ -45,7 +45,7 @@ def _get_stat_result_extra_fields() -> tuple[str, ...]:
sr = os.stat_result(range(os.stat_result.n_fields))
rd = sr.__reduce__()
assert isinstance(rd, tuple), "unexpected return os.stat_result.__reduce__"
_, (_, extra) = sr.__reduce__()
_, (_, extra) = rd
extra_fields = sorted(extra, key=extra.__getitem__)
return tuple(extra_fields)

@@ -317,7 +317,7 @@ def __iter__(self) -> Iterator[int]:
for field in self._fields:
yield int(getattr(self, field))

def index(self, value: int, start: int = 0, stop: int = None, /) -> int:
def index(self, value: int, start: int = 0, stop: int | None = None, /) -> int:
"""the sequence interface index method."""
if stop is None:
stop = len(self._seq)
218 changes: 148 additions & 70 deletions upath/core.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions upath/implementations/http.py
Original file line number Diff line number Diff line change
@@ -28,11 +28,11 @@ def _transform_init_args(
) -> tuple[tuple[str | os.PathLike, ...], str, dict[str, Any]]:
# allow initialization via a path argument and protocol keyword
if args and not str(args[0]).startswith(protocol):
args = (f"{protocol}://{args[0].lstrip('/')}", *args[1:])
args = (f"{protocol}://{str(args[0]).lstrip('/')}", *args[1:])
return args, protocol, storage_options

@property
def root(self) -> str:
def root(self) -> str: # type: ignore[override]
return super().root or "/"

def __str__(self):
18 changes: 9 additions & 9 deletions upath/implementations/local.py
Original file line number Diff line number Diff line change
@@ -101,17 +101,17 @@ def _upath_init(inst: PosixUPath | WindowsUPath) -> None:
"""helper to initialize the PosixPath/WindowsPath instance with UPath attrs"""
inst._protocol = ""
inst._storage_options = {}
if sys.version_info < (3, 10):
if sys.version_info < (3, 10) and hasattr(inst, "_init"):
inst._init()


class PosixUPath(PosixPath, LocalPath):
class PosixUPath(PosixPath, LocalPath): # type: ignore[misc]
__slots__ = ()

# assign all PosixPath methods/attrs to prevent multi inheritance issues
_set_class_attributes(locals(), src=PosixPath)

def open(
def open( # type: ignore[override]
self,
mode="r",
buffering=-1,
@@ -136,14 +136,14 @@ def open(

def __new__(
cls, *args, protocol: str | None = None, **storage_options: Any
) -> UPath:
) -> PosixUPath:
if os.name == "nt":
raise NotImplementedError(
f"cannot instantiate {cls.__name__} on your system"
)
obj = super().__new__(cls, *args)
obj._protocol = ""
return obj
return obj # type: ignore[return-value]

def __init__(
self, *args, protocol: str | None = None, **storage_options: Any
@@ -169,13 +169,13 @@ def path(self) -> str:
return PosixPath.__str__(self)


class WindowsUPath(WindowsPath, LocalPath):
class WindowsUPath(WindowsPath, LocalPath): # type: ignore[misc]
__slots__ = ()

# assign all WindowsPath methods/attrs to prevent multi inheritance issues
_set_class_attributes(locals(), src=WindowsPath)

def open(
def open( # type: ignore[override]
self,
mode="r",
buffering=-1,
@@ -200,14 +200,14 @@ def open(

def __new__(
cls, *args, protocol: str | None = None, **storage_options: Any
) -> UPath:
) -> WindowsUPath:
if os.name != "nt":
raise NotImplementedError(
f"cannot instantiate {cls.__name__} on your system"
)
obj = super().__new__(cls, *args)
obj._protocol = ""
return obj
return obj # type: ignore[return-value]

def __init__(
self, *args, protocol: str | None = None, **storage_options: Any