diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd2190660..cc5975781 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,9 @@ repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer - repo: https://github.com/pycqa/isort rev: 5.10.1 hooks: @@ -11,5 +16,23 @@ repos: rev: 3.8.3 hooks: - id: flake8 + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + exclude: | + (?x)^( + .*test.*| + ^CHANGELOG.md$| + ^.*versioneer.py$ + ) + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.991' + hooks: + - id: mypy + additional_dependencies: [types-cachetools] + args: ["--module=dask_cuda", "--ignore-missing-imports"] + pass_filenames: false + default_language_version: python: python3 diff --git a/.readthedocs.yml b/.readthedocs.yml index 0b2ac73c0..fd5ccf688 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,4 +4,4 @@ sphinx: configuration: rtd/conf.py formats: - - htmlzip \ No newline at end of file + - htmlzip diff --git a/dask_cuda/benchmarks/utils.py b/dask_cuda/benchmarks/utils.py index 28d43cc13..1d07df30c 100644 --- a/dask_cuda/benchmarks/utils.py +++ b/dask_cuda/benchmarks/utils.py @@ -648,7 +648,7 @@ def bandwidth_statistics( logs: the ``dask_worker.incoming_transfer_log`` object ignore_size: int (optional) - ignore messsages whose total byte count is smaller than this + ignore messages whose total byte count is smaller than this value (if provided) Returns diff --git a/dask_cuda/cli.py b/dask_cuda/cli.py index e2690f155..b7069d632 100644 --- a/dask_cuda/cli.py +++ b/dask_cuda/cli.py @@ -137,7 +137,7 @@ def cuda(): "--rmm-async/--no-rmm-async", default=False, show_default=True, - help="""Initialize each worker withh RMM and set it to use RMM's asynchronous + help="""Initialize each worker with RMM and set it to use RMM's asynchronous allocator. See ``rmm.mr.CudaAsyncMemoryResource`` for more info. .. warning:: diff --git a/dask_cuda/disk_io.py b/dask_cuda/disk_io.py index 7ccda0f3f..0427b77f0 100644 --- a/dask_cuda/disk_io.py +++ b/dask_cuda/disk_io.py @@ -96,8 +96,8 @@ class SpillToDiskProperties: def __init__( self, root_dir: Union[str, os.PathLike], - shared_filesystem: bool = None, - gds: bool = None, + shared_filesystem: Optional[bool] = None, + gds: Optional[bool] = None, ): """ Parameters diff --git a/dask_cuda/explicit_comms/dataframe/shuffle.py b/dask_cuda/explicit_comms/dataframe/shuffle.py index 46c4bccb9..84bc55701 100644 --- a/dask_cuda/explicit_comms/dataframe/shuffle.py +++ b/dask_cuda/explicit_comms/dataframe/shuffle.py @@ -270,7 +270,7 @@ async def send_recv_partitions( myrank The rank of this worker. rank_to_out_part_ids - dict that for each worker rank specifices a set of output partition IDs. + dict that for each worker rank specifies a set of output partition IDs. If the worker shouldn't return any partitions, it is excluded from the dict. Partition IDs are global integers `0..npartitions` and corresponds to the dict keys returned by `group_split_dispatch`. @@ -332,9 +332,9 @@ async def shuffle_task( stage_name: str Name of the stage to retrieve the input keys from. rank_to_inkeys: dict - dict that for each worker rank specifices the set of staged input keys. + dict that for each worker rank specifies the set of staged input keys. rank_to_out_part_ids: dict - dict that for each worker rank specifices a set of output partition IDs. + dict that for each worker rank specifies a set of output partition IDs. If the worker shouldn't return any partitions, it is excluded from the dict. Partition IDs are global integers `0..npartitions` and corresponds to the dict keys returned by `group_split_dispatch`. diff --git a/dask_cuda/initialize.py b/dask_cuda/initialize.py index 52a67e31b..0b9c92a59 100644 --- a/dask_cuda/initialize.py +++ b/dask_cuda/initialize.py @@ -30,7 +30,7 @@ def _create_cuda_context(): try: distributed.comm.ucx.init_once() except ModuleNotFoundError: - # UCX intialization has to be delegated to Distributed, it will take care + # UCX initialization has to be delegated to Distributed, it will take care # of setting correct environment variables and importing `ucp` after that. # Therefore if ``import ucp`` fails we can just continue here. pass diff --git a/dask_cuda/is_spillable_object.py b/dask_cuda/is_spillable_object.py index 9e337aa82..cb85248e5 100644 --- a/dask_cuda/is_spillable_object.py +++ b/dask_cuda/is_spillable_object.py @@ -40,7 +40,7 @@ def is_device_object_cudf_index(s): def cudf_spilling_status() -> Optional[bool]: - """Check the status of cudf's build-in spilling + """Check the status of cudf's built-in spilling Returns: - True if cudf's internal spilling is enabled, or diff --git a/dask_cuda/proxify_device_objects.py b/dask_cuda/proxify_device_objects.py index 923e7cf8e..a8b8a45df 100644 --- a/dask_cuda/proxify_device_objects.py +++ b/dask_cuda/proxify_device_objects.py @@ -19,7 +19,7 @@ def _register_incompatible_types(): """Lazy register types that ProxifyHostFile should unproxify on retrieval. It reads the config key "jit-unspill-incompatible" - (DASK_JIT_UNSPILL_INCOMPATIBLE), which should be a comma seperated + (DASK_JIT_UNSPILL_INCOMPATIBLE), which should be a comma separated list of types. The default value is: DASK_JIT_UNSPILL_INCOMPATIBLE="cupy.ndarray" """ @@ -51,8 +51,8 @@ def f(paths): def proxify_device_objects( obj: T, - proxied_id_to_proxy: MutableMapping[int, ProxyObject] = None, - found_proxies: List[ProxyObject] = None, + proxied_id_to_proxy: Optional[MutableMapping[int, ProxyObject]] = None, + found_proxies: Optional[List[ProxyObject]] = None, excl_proxies: bool = False, mark_as_explicit_proxies: bool = False, ) -> T: @@ -135,7 +135,9 @@ def unproxify_device_objects( pxy = obj._pxy_get(copy=True) if only_incompatible_types: if incompatible_types and isinstance(obj, incompatible_types): - obj = obj._pxy_deserialize(maybe_evict=False, proxy_detail=pxy) + obj = obj._pxy_deserialize( # type: ignore + maybe_evict=False, proxy_detail=pxy + ) elif not skip_explicit_proxies or not pxy.explicit_proxy: pxy.explicit_proxy = False obj = obj._pxy_deserialize(maybe_evict=False, proxy_detail=pxy) diff --git a/dask_cuda/proxify_host_file.py b/dask_cuda/proxify_host_file.py index 47bb3952a..724a08baa 100644 --- a/dask_cuda/proxify_host_file.py +++ b/dask_cuda/proxify_host_file.py @@ -164,7 +164,7 @@ class ProxiesOnDevice(Proxies): In this case the tally of the total device memory usage is incorrect. """ - def __init__(self): + def __init__(self) -> None: super().__init__() self.proxy_id_to_dev_mems: Dict[int, Set[DeviceMemoryId]] = {} self.dev_mem_to_proxy_ids: DefaultDict[DeviceMemoryId, Set[int]] = defaultdict( @@ -477,7 +477,7 @@ class ProxifyHostFile(MutableMapping): spill_on_demand: bool or None, default None Enables spilling when the RMM memory pool goes out of memory. If ``None``, the "spill-on-demand" config value are used, which defaults to True. - Notice, enabling this does nothing when RMM isn't availabe or not used. + Notice, enabling this does nothing when RMM isn't available or not used. gds_spilling: bool Enable GPUDirect Storage spilling. If ``None``, the "gds-spilling" config value are used, which defaults to ``False``. @@ -497,10 +497,10 @@ def __init__( *, device_memory_limit: int, memory_limit: int, - shared_filesystem: bool = None, - compatibility_mode: bool = None, - spill_on_demand: bool = None, - gds_spilling: bool = None, + shared_filesystem: Optional[bool] = None, + compatibility_mode: Optional[bool] = None, + spill_on_demand: Optional[bool] = None, + gds_spilling: Optional[bool] = None, ): if cudf_spilling_status(): warnings.warn( @@ -635,7 +635,7 @@ def evict(self) -> int: def fast(self): """Alternative access to `.evict()` used by Dask - Dask expects `.fast.evict()` to be availabe for manually triggering + Dask expects `.fast.evict()` to be available for manually triggering of CPU-to-Disk spilling. """ if len(self.manager._host) == 0: diff --git a/dask_cuda/proxy_object.py b/dask_cuda/proxy_object.py index 80aaa7c43..21dc15ea1 100644 --- a/dask_cuda/proxy_object.py +++ b/dask_cuda/proxy_object.py @@ -46,7 +46,9 @@ def asproxy( - obj: object, serializers: Iterable[str] = None, subclass: Type["ProxyObject"] = None + obj: object, + serializers: Optional[Iterable[str]] = None, + subclass: Optional[Type["ProxyObject"]] = None, ) -> "ProxyObject": """Wrap `obj` in a ProxyObject object if it isn't already. @@ -344,7 +346,7 @@ class ProxyObject: Attributes ---------- _pxy: ProxyDetail - Details of all proxy information of the underlaying proxied object. + Details of all proxy information of the underlying proxied object. Access to _pxy is not pass-through to the proxied object, which is the case for most other access to the ProxyObject. @@ -380,7 +382,7 @@ def __del__(self): def _pxy_serialize( self, serializers: Iterable[str], - proxy_detail: ProxyDetail = None, + proxy_detail: Optional[ProxyDetail] = None, ) -> None: """Inplace serialization of the proxied object using the `serializers` @@ -410,7 +412,7 @@ def _pxy_serialize( self._pxy_cache.pop("device_memory_objects", None) def _pxy_deserialize( - self, maybe_evict: bool = True, proxy_detail: ProxyDetail = None + self, maybe_evict: bool = True, proxy_detail: Optional[ProxyDetail] = None ): """Inplace deserialization of the proxied object diff --git a/dask_cuda/tests/test_cudf_builtin_spilling.py b/dask_cuda/tests/test_cudf_builtin_spilling.py index c6548e422..d4c28ba06 100644 --- a/dask_cuda/tests/test_cudf_builtin_spilling.py +++ b/dask_cuda/tests/test_cudf_builtin_spilling.py @@ -34,7 +34,7 @@ @pytest.fixture def manager(request): - """Fixture to enable and make a spilling manager availabe""" + """Fixture to enable and make a spilling manager available""" kwargs = dict(getattr(request, "param", {})) set_global_manager(manager=SpillManager(**kwargs)) yield get_global_manager() diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index 850006eac..1a24d80b0 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -682,7 +682,7 @@ def get_gpu_uuid_from_index(device_index=0): def get_worker_config(dask_worker): from .proxify_host_file import ProxifyHostFile - # assume homogenous cluster + # assume homogeneous cluster plugin_vals = dask_worker.plugins.values() ret = {} diff --git a/docs/Makefile b/docs/Makefile index 69fe55ecf..ba501f6f5 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -16,4 +16,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/source/api.rst b/docs/source/api.rst index 7989fa5e9..b9d9d6dfa 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -33,4 +33,3 @@ Explicit-comms .. currentmodule:: dask_cuda.explicit_comms.comms .. autoclass:: CommsContext :members: - diff --git a/docs/source/examples/best-practices.rst b/docs/source/examples/best-practices.rst index 242e90fff..84cc78b88 100644 --- a/docs/source/examples/best-practices.rst +++ b/docs/source/examples/best-practices.rst @@ -114,4 +114,3 @@ With UCX and NVLink, we greatly reduced the wall clock time to: ``347.43 ms +/- 0 | ucx://127.0.0.1:35954 1 | ucx://127.0.0.1:53584 ================================================================================ - diff --git a/docs/source/ucx.rst b/docs/source/ucx.rst index 7463f0c18..d9cacdc77 100644 --- a/docs/source/ucx.rst +++ b/docs/source/ucx.rst @@ -127,8 +127,7 @@ therefore do something like the following: .. note:: - To confirm that no bad fork calls are occuring, start jobs with + To confirm that no bad fork calls are occurring, start jobs with ``UCX_IB_FORK_INIT=n``. UCX will produce a warning ``UCX WARN IB: ibv_fork_init() was disabled or failed, yet a fork() has been issued.`` if the application calls ``fork()``. - diff --git a/rtd/Makefile b/rtd/Makefile index 69fe55ecf..ba501f6f5 100644 --- a/rtd/Makefile +++ b/rtd/Makefile @@ -16,4 +16,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)