From 27ef4ad17f4779017a8310d46268ad6ff643bd50 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 8 Nov 2023 12:23:16 +0100 Subject: [PATCH] io: On importing data with Dask, use `os.cpu_count()` for `npartitions` ... as a default value. --- cratedb_toolkit/util/database.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cratedb_toolkit/util/database.py b/cratedb_toolkit/util/database.py index 7206628..e5d435d 100644 --- a/cratedb_toolkit/util/database.py +++ b/cratedb_toolkit/util/database.py @@ -1,6 +1,7 @@ # Copyright (c) 2023, Crate.io Inc. # Distributed under the terms of the AGPLv3 license, see LICENSE. import io +import os import typing as t from pathlib import Path @@ -227,8 +228,7 @@ def import_csv_dask( from crate.client.sqlalchemy.support import insert_bulk # Set a few defaults. - # TODO: Use amount of CPU cores instead? - npartitions = npartitions or 4 + npartitions = npartitions or os.cpu_count() if progress: from dask.diagnostics import ProgressBar