Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multi unifrac to file #152

Merged
merged 11 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: flake8
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: 3.9
- name: install dependencies
Expand All @@ -29,11 +29,20 @@ jobs:
needs: lint
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest, linux-gpu-cuda]
exclude:
- os: macos-latest
python-version: '3.9'
- os: macos-latest
python-version: '3.10'
- os: linux-gpu-cuda
python-version: '3.8'
- os: linux-gpu-cuda
python-version: '3.9'
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ The library can be accessed directly from within Python. If operating in this mo
>>> import unifrac
>>> dir(unifrac)
['__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__',
'__package__', '__path__', '__spec__', '__version__', '_api', '_meta', '_methods', 'faith_pd',
'__package__', '__path__', '__spec__', '__version__', '_api', '_meta', '_methods', 'set_random_seed', 'faith_pd',
'generalized', 'generalized_fp32', 'generalized_fp32_to_file', 'generalized_fp64', 'generalized_fp64_to_file', 'generalized_to_file',
'h5pcoa', 'h5pcoa_all', 'h5permanova', 'h5permanova_dict', 'h5unifrac', 'meta', 'pkg_resources', 'ssu', 'ssu_fast', 'ssu_inmem', 'ssu_to_file', 'ssu_to_file_v2',
'unweighted', 'unweighted_fp32', 'unweighted_fp32_to_file', 'unweighted_fp64', 'unweighted_fp64_to_file', 'unweighted_to_file',
Expand Down Expand Up @@ -228,12 +228,15 @@ The library can be accessed directly from within Python. If operating in this mo
Bypass the tips of the tree in the computation. This reduces compute
by about 50%, but is an approximation.
format : str, optional
Output format to use. Defaults to "hdf5".
Output format to use.
Defaults to "hdf5" if n_subsamples<=1 else "hdf5_nodist"
buf_dirname : str, optional
If set, the directory where the disk buffer is hosted,
can be used to reduce the amount of memory needed.
n_substeps : int, optional
Internally split the problem in substeps for reduced memory footprint.
n_subsamples : int
If >1, perform multiple subsamples.
subsample_depth : int
Depth of subsampling, if >0
subsample_with_replacement : bool
Expand Down
4 changes: 2 additions & 2 deletions unifrac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
h5unifrac,
h5pcoa, h5pcoa_all,
h5permanova, h5permanova_dict)
from unifrac._api import ssu, ssu_fast, faith_pd
from unifrac._api import ssu, ssu_fast, faith_pd, set_random_seed
from unifrac._api import ssu_to_file, ssu_to_file_v2, ssu_inmem


Expand All @@ -46,7 +46,7 @@
'weighted_unnormalized_fp64', 'generalized_fp64',
'unweighted_fp32', 'weighted_normalized_fp32',
'weighted_unnormalized_fp32', 'generalized_fp32',
'meta',
'meta', 'set_random_seed',
'unweighted_to_file', 'weighted_normalized_to_file',
'weighted_unnormalized_to_file',
'generalized_to_file', 'unweighted_fp64_to_file',
Expand Down
11 changes: 11 additions & 0 deletions unifrac/_api.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ cdef extern from "api.hpp":
void destroy_mat_full_fp64(mat_full_fp64** result)
void destroy_results_vec(results_vec** result)

void ssu_set_random_seed(unsigned int new_seed)

compute_status unifrac_to_file_v2(const char* biom_filename, const char* tree_filename, const char* out_filename,
const char* unifrac_method, bool variance_adjust, double alpha,
bool bypass_tips, unsigned int n_substeps, const char* format,
Expand All @@ -101,3 +103,12 @@ cdef extern from "api.hpp":
bool bypass_tips, unsigned int n_substeps, const char* format,
unsigned int pcoa_dims, const char *mmap_dir)

compute_status unifrac_multi_to_file_v2(const char* biom_filename, const char* tree_filename, const char* out_filename,
const char* unifrac_method, bool variance_adjust, double alpha,
bool bypass_tips, unsigned int n_substeps, const char* format,
unsigned int n_subsamples, unsigned int subsample_depth, bool subsample_with_replacement,
unsigned int pcoa_dims,
unsigned int permanova_perms, const char *grouping_filename, const char *grouping_columns,
const char *mmap_dir)


49 changes: 39 additions & 10 deletions unifrac/_api.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ def check_status(compute_status status):
else:
raise Exception("Unknown Error: {}".format(status))

def set_random_seed(unsigned int new_seed):
"""Set random seed used by this library"""
ssu_set_random_seed(new_seed)


#
# Functions that compute Unifrac and return a memory object
#
Expand Down Expand Up @@ -527,6 +532,7 @@ def faith_pd(str biom_filename, str tree_filename):
def ssu_to_file_v2(str biom_filename, str tree_filename, str out_filename,
str unifrac_method, bool variance_adjust, double alpha,
bool bypass_tips, unsigned int n_substeps, str format,
unsigned int n_subsamples,
unsigned int subsample_depth, bool subsample_with_replacement,
unsigned int pcoa_dims,
unsigned int permanova_perms,
Expand Down Expand Up @@ -560,7 +566,10 @@ def ssu_to_file_v2(str biom_filename, str tree_filename, str out_filename,
n_substeps : int
The number of substeps to use.
format : str
Onput format to use; one of {hdf5, hdf5_fp32, hdf5_fp64}
Output format to use; one of {hdf5, hdf5_fp32, hdf5_fp64, hdf5_nodist}
If an empty string, use "hdf5" if n_subsamples<=1 else "hdf5_nodist"
n_subsamples : int
If >1, perform multiple subsamples.
subsample_depth : int
Depth of subsampling, if >0
subsample_with_replacement : bool
Expand Down Expand Up @@ -615,11 +624,16 @@ def ssu_to_file_v2(str biom_filename, str tree_filename, str out_filename,
char* dirbuf_c_string
list ids

if format=="":
real_format = "hdf5" if n_subsamples<=1 else "hdf5_nodist"
else:
real_format = format

biom_py_bytes = biom_filename.encode()
tree_py_bytes = tree_filename.encode()
out_py_bytes = out_filename.encode()
met_py_bytes = unifrac_method.encode()
format_py_bytes = format.encode()
format_py_bytes = real_format.encode()
grouping_filename_py_bytes = grouping_filename.encode()
grouping_columns_py_bytes = grouping_columns.encode()
dirbuf_py_bytes = buf_dirname.encode()
Expand All @@ -632,14 +646,29 @@ def ssu_to_file_v2(str biom_filename, str tree_filename, str out_filename,
grouping_columns_c_string = grouping_columns_py_bytes
dirbuf_c_string = dirbuf_py_bytes

status = unifrac_to_file_v2(biom_c_string, tree_c_string, out_c_string,
met_c_string,
variance_adjust, alpha, bypass_tips,
n_substeps, format_c_string,
subsample_depth, subsample_with_replacement,
pcoa_dims,
permanova_perms, grouping_filename_c_string, grouping_columns_c_string,
dirbuf_c_string)
if n_subsamples>1:
if subsample_depth==0:
raise ValueError("subsample_depth cannot be 0 if n_subsamples>1")
status = unifrac_multi_to_file_v2(biom_c_string, tree_c_string, out_c_string,
met_c_string,
variance_adjust, alpha, bypass_tips,
n_substeps, format_c_string,
n_subsamples,
subsample_depth, subsample_with_replacement,
pcoa_dims,
permanova_perms,
grouping_filename_c_string, grouping_columns_c_string,
dirbuf_c_string)
else:
status = unifrac_to_file_v2(biom_c_string, tree_c_string, out_c_string,
met_c_string,
variance_adjust, alpha, bypass_tips,
n_substeps, format_c_string,
subsample_depth, subsample_with_replacement,
pcoa_dims,
permanova_perms,
grouping_filename_c_string, grouping_columns_c_string,
dirbuf_c_string)
check_status(status)

return out_filename
Expand Down
Loading