From 2e6d1c0420cf7ce3a452025c613e629409d8931d Mon Sep 17 00:00:00 2001 From: Matt Aton Date: Thu, 5 Dec 2024 09:50:30 -0700 Subject: [PATCH] Speedup import time with lazy imports (#987) * Initial commit * Implement simple solution * Remove comments and spaces * Fix linting * Update Changelog --- ChangeLog.md | 4 ++++ biom/table.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 3e28019d..b2f048e6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,6 +4,10 @@ BIOM-Format ChangeLog biom-2.1.16-dev --------------- +Performance improvements: + +* Decreased execution time of `import biom` by half with lazy imports. See PR[#987](https://github.com/biocore/biom-format/pull/987) + Maintenance: * Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986). diff --git a/biom/table.py b/biom/table.py index 4b56ab56..2cd40967 100644 --- a/biom/table.py +++ b/biom/table.py @@ -172,8 +172,6 @@ # ----------------------------------------------------------------------------- import numpy as np -import scipy.stats -import h5py from copy import deepcopy from datetime import datetime from json import dumps as _json_dumps, JSONEncoder @@ -184,7 +182,6 @@ from numpy import ndarray, asarray, zeros, newaxis from scipy.sparse import (coo_matrix, csc_matrix, csr_matrix, isspmatrix, vstack, hstack, dok_matrix) -import pandas as pd import re from biom.exception import (TableException, UnknownAxisError, UnknownIDError, DisjointIDError) @@ -3234,6 +3231,8 @@ def rankdata(self, axis='sample', inplace=True, method='average'): o4 1.0 4.0 1.0 """ + import scipy.stats + def f(val, id_, _): return scipy.stats.rankdata(val, method=method) return self.transform(f, axis=axis, inplace=inplace) @@ -4108,6 +4107,7 @@ def from_hdf5(cls, h5grp, ids=None, axis='sample', parse_fs=None, >>> t = Table.from_hdf5(f, ids=["GG_OTU_1"], ... axis='observation') # doctest: +SKIP """ + import h5py if not isinstance(h5grp, (h5py.Group, h5py.File)): raise ValueError("h5grp does not appear to be an HDF5 file or " "group") @@ -4341,6 +4341,7 @@ def to_dataframe(self, dense=False): index = self.ids(axis='observation') columns = self.ids() + import pandas as pd if dense: mat = self.matrix_data.toarray() constructor = pd.DataFrame @@ -4442,6 +4443,7 @@ def metadata_to_dataframe(self, axis): O1 Bacteria Firmicutes O2 Bacteria Bacteroidetes """ + import pandas as pd md = self.metadata(axis=axis) if md is None: raise KeyError("%s does not have metadata" % axis)