diff --git a/.gitignore b/.gitignore index f534691..1af936f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ __pycache__/ output/ .parallel/ .pytest_cache +.cache +.vscode # Files to ignore *.sif diff --git a/Snakefile b/Snakefile index e3ad38b..c7a3515 100644 --- a/Snakefile +++ b/Snakefile @@ -10,87 +10,105 @@ Date: 07-03-2022 ################################################################################# from yaml import safe_load -import pathlib +from pathlib import Path from shutil import copyfile ################################################################################# ##### Load samplesheet and config params ##### ################################################################################# -# Loading sample sheet as dictionary +# Loading sample sheet as dictionary # ("R1" and "R2" keys for fastq) sample_sheet = config["sample_sheet"] SAMPLES = {} with open(sample_sheet) as sample_sheet_file: - SAMPLES = safe_load(sample_sheet_file) + SAMPLES = safe_load(sample_sheet_file) -GIVEN_REF=config['ref'] +GIVEN_REF = config["reference"] -#@################################################################################ -#@#### Expected output ##### -#@################################################################################ -output_dir = pathlib.Path(config["out"]) -log_dir = output_dir.joinpath('log') -db_dir = pathlib.Path(config["db_dir"]) -mash_db = db_dir.joinpath('bacteria-refseq', 'db.msh') -referenceseeker_md5 = str(db_dir.joinpath('bacteria-refseq', 'downloaded_db.txt')) +################################################################################# +##### Expected output ##### +################################################################################# +output_dir = Path(config["output_dir"]) +log_dir = output_dir.joinpath("log") +db_dir = Path(config["db_dir"]) +mash_db = db_dir.joinpath("bacteria-refseq", "db.msh") +referenceseeker_md5 = str(db_dir.joinpath("bacteria-refseq", "downloaded_db.txt")) -if config['dryrun'] is True and GIVEN_REF is not None: +if (config["dryrun"] is True) and (GIVEN_REF != "None"): ref_genome = GIVEN_REF else: - ref_genome = output_dir.joinpath('ref_genomes_used', 'cluster_1', 'ref_genome.fasta') + ref_genome = output_dir.joinpath( + "ref_genomes_used", "cluster_1", "ref_genome.fasta" + ) -if GIVEN_REF is not None and not ref_genome.exists(): +# GIVEN_REF is converted to str +if (GIVEN_REF != "None") and (not ref_genome.exists()): + print(f"Copying reference genome {GIVEN_REF} to {ref_genome}") output_dir.mkdir(exist_ok=True, parents=True) ref_dir = ref_genome.parent ref_dir.mkdir(exist_ok=True, parents=True) copyfile(GIVEN_REF, ref_genome) + def get_output_per_cluster(cluster): with open(checkpoints.preclustering.get(**cluster).output[0]) as file: SAMPLE_CLUSTERS = yaml.safe_load(file) - CLUSTERS = set([ cluster for sample, cluster in SAMPLE_CLUSTERS.items() ]) - output_files = expand(output_dir.joinpath('tree/cluster_{cluster}/{file}'), - cluster=CLUSTERS, - file=['newick_tree.txt', 'snp_matrix.csv']) - output_iqtree = expand(output_dir.joinpath('ml_tree', 'cluster_{cluster}'), - cluster=CLUSTERS) + CLUSTERS = set([cluster for sample, cluster in SAMPLE_CLUSTERS.items()]) + output_files = expand( + output_dir.joinpath("tree/cluster_{cluster}/{file}"), + cluster=CLUSTERS, + file=["newick_tree.txt", "snp_matrix.csv"], + ) + output_iqtree = expand( + output_dir.joinpath("ml_tree", "cluster_{cluster}"), cluster=CLUSTERS + ) return output_files + output_iqtree -#@################################################################################ -#@#### Processes ##### -#@################################################################################ +################################################################################# +##### Processes ##### +################################################################################# + +if GIVEN_REF != "None": -if GIVEN_REF is not None: include: "bin/rules/mock_cluster.smk" + else: + include: "bin/rules/pre_cluster.smk" include: "bin/rules/find_reference.smk" + include: "bin/rules/snp_analysis.smk" include: "bin/rules/dm_n_viz.smk" -#@################################################################################ -#@#### Finalize pipeline (error/success) ##### -#@################################################################################ + +################################################################################# +##### Finalize pipeline (error/success) ##### +################################################################################# + onerror: - shell(""" + shell( + """ rm -f tmp*npy rm -f tmp*_fastme_stat.txt rm -f tmp*_fastme_tree.nwk rm -f tmp*dist.list echo -e "Something went wrong with Juno-SNP pipeline. Please check the logging files in {output_dir}/log/" - """) + """ + ) ################################################################################# ##### Specify final output ##### ################################################################################# + localrules: - all + all, + rule all: input: diff --git a/envs/juno_snp.yaml b/envs/juno_snp.yaml new file mode 100644 index 0000000..bcb8d5e --- /dev/null +++ b/envs/juno_snp.yaml @@ -0,0 +1,19 @@ +name: juno_snp +channels: + - bioconda + - conda-forge + - nodefaults +dependencies: + - git=2.40.* + - mamba==1.3.* + - pip>=21.2 + - pandas==1.3.2 + - python>3.7.6 + - snakemake==7.32.0 + - pytest + - biopython + - pip: + - numpy + - "--editable=git+https://github.com/RIVM-bioinformatics/juno-library.git@v2.1.3#egg=juno_library" + + diff --git a/envs/master_env.yaml b/envs/master_env.yaml deleted file mode 100644 index dcae50a..0000000 --- a/envs/master_env.yaml +++ /dev/null @@ -1,23 +0,0 @@ -name: juno_snp -channels: - - bioconda - - conda-forge - - nodefaults -dependencies: - - git - - mamba==0.24.0 - - libmamba==0.24.0 - - libmambapy==0.24.0 - - libarchive==3.5.2 - - pip>=21.2 - - pandas==1.3.2 - - python>3.7.6 - - snakemake==7.18.2 - - pytest - - pytest-cov - - biopython - - pip: - - numpy - - "--editable=git+https://github.com/RIVM-bioinformatics/base_juno_pipeline.git@v0.2.7#egg=base_juno" - - diff --git a/juno_snp.py b/juno_snp.py index 0517249..6c4abde 100644 --- a/juno_snp.py +++ b/juno_snp.py @@ -11,117 +11,185 @@ """ # Dependencies -from base_juno_pipeline import base_juno_pipeline, helper_functions +from juno_library import Pipeline import argparse -import pathlib -import subprocess +from pathlib import Path +from typing import Union, Callable import yaml +from dataclasses import dataclass, field + from version import __package_name__, __version__, __description__ -class JunoSnpRun( - base_juno_pipeline.PipelineStartup, base_juno_pipeline.RunSnakemake -): - """Class with the arguments and specifications that are only for the Juno-typing pipeline but inherit from PipelineStartup and RunSnakemake""" +def main() -> None: + juno_snp = JunoSnp() + juno_snp.run() + +def check_number_within_range( + minimum: float = 0, maximum: float = 1 +) -> Union[Callable[[str], str], argparse.FileType]: + """ + Creates a function to check whether a numeric value is within a range, inclusive. + + The generated function can be used by the `type` parameter in argparse.ArgumentParser. + See https://stackoverflow.com/a/53446178. + + Args: + value: the numeric value to check. + minimum: minimum of allowed range, inclusive. + maximum: maximum of allowed range, inclusive. + + Returns: + A function which takes a single argument and checks this against the range. + + Raises: + argparse.ArgumentTypeError: if the value is outside the range. + ValueError: if the value cannot be converted to float. + """ + + def generated_func_check_range(value: str) -> str: + value_f = float(value) + if (value_f < minimum) or (value_f > maximum): + raise argparse.ArgumentTypeError( + f"Supplied value {value} is not within expected range {minimum} to {maximum}." + ) + return str(value) + + return generated_func_check_range + + +@dataclass +class JunoSnp(Pipeline): pipeline_name: str = __package_name__ pipeline_version: str = __version__ + input_type: str = 'both' - def __init__(self, - input_dir, - ref, - output_dir, - db_dir='/mnt/db/juno/snp', - ani=0.95, - conserved_dna=0.69, - sliding_window=400, - kmer_length=21, - sketch_size=1000, - mash_threshold=0.01, - tree_algorithm='upgma', - cores=300, - time_limit=60, - local=False, - queue='bio', - unlock=False, - rerunincomplete=False, - dryrun=False, - run_in_container=True, - prefix=None, - **kwargs): - """Initiating Juno-SNP pipeline""" - - # Get proper file paths - output_dir = pathlib.Path(output_dir).resolve() - if ref is not None: - self.ref = pathlib.Path(ref).resolve() - else: - self.ref = None - self.db_dir = pathlib.Path(db_dir).resolve() - self.ani_threshold=float(ani) - self.conserved_dna_threshold=float(conserved_dna) - self.sliding_window=int(sliding_window) - self.kmer_length=int(kmer_length) - self.sketch_size=int(sketch_size) - self.mash_threshold=float(mash_threshold) - if tree_algorithm not in ['upgma', 'nj']: - raise ValueError( - f'The provided tree algorithm {tree_algorithm} is not supported.' \ - ' Please choose upgma or nj' - ) - self.tree_algorithm=tree_algorithm - workdir = pathlib.Path(__file__).parent.resolve() - self.path_to_audit = output_dir.joinpath('audit_trail') - base_juno_pipeline.PipelineStartup.__init__( - self, - input_dir=pathlib.Path(input_dir).resolve(), - input_type='both', - min_num_lines=2 - ) # Min for viable fasta - base_juno_pipeline.RunSnakemake.__init__( - self, - pipeline_name='Juno_SNP', - pipeline_version='v0.1', - output_dir=output_dir, - workdir=workdir, - cores=cores, - time_limit=time_limit, - local=local, - queue=queue, - unlock=unlock, - rerunincomplete=rerunincomplete, - dryrun=dryrun, - useconda=not run_in_container, - conda_prefix=prefix, - usesingularity=run_in_container, - singularityargs=f"--bind {self.input_dir}:{self.input_dir} --bind {output_dir}:{output_dir} --bind {self.db_dir}:{self.db_dir}", - singularity_prefix=prefix, - restarttimes=1, - latency_wait=60, - name_snakemake_report=str(self.path_to_audit.joinpath('juno_snp_report.html')), - **kwargs + def _add_args_to_parser(self) -> None: + super()._add_args_to_parser() + + self.parser.description = __description__ + + self.add_argument( + "-r", + "--reference", + type = Path, + required = False, + metavar = "FILE", + help = "Relative or absolute path to a reference fasta file." + ) + self.add_argument( + "-d", + "--db-dir", + type = Path, + metavar = "DIR", + default = "/mnt/db/juno/snp", + help = "Relative or absolute path to the database directory. If non is given, /mnt/db/juno/snp"\ + " (where the default db resides at the RIVM will be used)." + ) + self.add_argument( + "-a", + "--ani", + type = check_number_within_range(0, 1), + metavar = "INT", + default = 0.95, + help="ANI threshold. Passed to referenceseeker" + ) + self.add_argument( + "-cd", + "--conserved-dna", + type = check_number_within_range(0, 1), + metavar = "INT", + default = 0.69, + help="Conserved DNA threshold. Passed to referenceseeker" ) + self.add_argument( + "-sw", + "--sliding-window", + type = check_number_within_range(100, 1000), + metavar = "INT", + default = 400, + help="Sliding window - the lower the more accurate but also slower. Passed to referenceseeker" + ) + self.add_argument( + "-kl", + "--kmer-length", + type = check_number_within_range(1, 32), + metavar = "INT", + default = 21, + help="K-mer length - longer kmers increase specificity, shorter kmers increase sensitivity. Passed to mash sketch" + ) + self.add_argument( + "-ss", + "--sketch-size", + type = int, + metavar = "INT", + default = 1000, + help="Sketch size - larger sketch size better represents the original sequence," \ + " but leads to large files and longer running time. Passed to mash sketch" + ) + self.add_argument( + "-mt", + "--mash-threshold", + type = check_number_within_range(0, 1), + metavar = "FLOAT", + default = 0.01, + help="Mash threshold - maximum mash distance to consider genomes similar. Passed to preclustering script." + ) + self.add_argument( + "-t", + "--tree-algorithm", + type = str, + metavar = "ALGORITHM", + default = 'upgma', + choices=['upgma', 'nj'], + help="Algorithm to use for making the tree. It can be 'upgma' or 'nj' (neighbor-joining). Default is upgma" + ) + - # Start pipeline - self.run_juno_snp_pipeline() - def start_juno_snp_pipeline(self): - """ - Function to start the pipeline - """ - self.start_juno_pipeline() - with open(self.sample_sheet, 'w') as file_: - yaml.dump(self.sample_dict, file_, default_flow_style=False) + def _parse_args(self) -> argparse.Namespace: + args = super()._parse_args() + + self.reference: Path = args.reference + self.db_dir: Path = args.db_dir + self.ani: float = args.ani + self.conserved_dna: float = args.conserved_dna + self.sliding_window: int = args.sliding_window + self.kmer_length: int = args.kmer_length + self.sketch_size: int = args.sketch_size + self.mash_threshold: float = args.mash_threshold + self.tree_algorithm: str = args.tree_algorithm + self.dryrun: bool = args.dryrun + + return args - def write_userparameters(self): + def setup(self) -> None: + super().setup() + if self.snakemake_args["use_singularity"]: + self.snakemake_args["singularity_args"] = " ".join( + [ + self.snakemake_args["singularity_args"], + f"--bind {self.db_dir}:{self.db_dir}", + ] # paths that singularity should be able to read from can be bound by adding to the above list + ) - config_params = { - 'input_dir': str(self.input_dir), - 'ref': self.ref, - 'out': str(self.output_dir), - 'db_dir': str(self.db_dir), + with open( + Path(__file__).parent.joinpath("config/pipeline_parameters.yaml") + ) as f: + parameters_dict = yaml.safe_load(f) + self.snakemake_config.update(parameters_dict) + + self.user_parameters = { + "input_dir": str(self.input_dir), + "output_dir": str(self.output_dir), + "exclusion_file": str(self.exclusion_file), + "db_dir": str(self.db_dir), + "reference": str(self.reference), + "use_singularity": str(self.snakemake_args["use_singularity"]), + 'dryrun': self.dryrun, 'referenceseeker': { 'db': str(self.db_dir.joinpath('bacteria-refseq')), - 'ani_threshold': self.ani_threshold, - 'conserved_dna_threshold': self.conserved_dna_threshold, + 'ani_threshold': self.ani, + 'conserved_dna_threshold': self.conserved_dna, 'sliding_window': self.sliding_window }, 'mash': { @@ -132,249 +200,212 @@ def write_userparameters(self): 'tree': { 'algorithm' : self.tree_algorithm }, - 'dryrun': self.dryrun } + + +if __name__ == "__main__": + main() + + + +# class JunoSnpRun( +# base_juno_pipeline.PipelineStartup, base_juno_pipeline.RunSnakemake +# ): +# """Class with the arguments and specifications that are only for the Juno-typing pipeline but inherit from PipelineStartup and RunSnakemake""" +# pipeline_name: str = __package_name__ +# pipeline_version: str = __version__ + +# def __init__(self, +# input_dir, +# ref, +# output_dir, +# db_dir='/mnt/db/juno/snp', +# ani=0.95, +# conserved_dna=0.69, +# sliding_window=400, +# kmer_length=21, +# sketch_size=1000, +# mash_threshold=0.01, +# tree_algorithm='upgma', +# cores=300, +# time_limit=60, +# local=False, +# queue='bio', +# unlock=False, +# rerunincomplete=False, +# dryrun=False, +# run_in_container=True, +# prefix=None, +# **kwargs): +# """Initiating Juno-SNP pipeline""" - with open(self.user_parameters, 'w') as file_: - yaml.dump(config_params, file_, default_flow_style=False) +# # Get proper file paths +# output_dir = pathlib.Path(output_dir).resolve() +# if ref is not None: +# self.ref = pathlib.Path(ref).resolve() +# else: +# self.ref = None +# self.db_dir = pathlib.Path(db_dir).resolve() +# self.ani_threshold=float(ani) +# self.conserved_dna_threshold=float(conserved_dna) +# self.sliding_window=int(sliding_window) +# self.kmer_length=int(kmer_length) +# self.sketch_size=int(sketch_size) +# self.mash_threshold=float(mash_threshold) +# if tree_algorithm not in ['upgma', 'nj']: +# raise ValueError( +# f'The provided tree algorithm {tree_algorithm} is not supported.' \ +# ' Please choose upgma or nj' +# ) +# self.tree_algorithm=tree_algorithm +# workdir = pathlib.Path(__file__).parent.resolve() +# self.path_to_audit = output_dir.joinpath('audit_trail') +# base_juno_pipeline.PipelineStartup.__init__( +# self, +# input_dir=pathlib.Path(input_dir).resolve(), +# input_type='both', +# min_num_lines=2 +# ) # Min for viable fasta +# base_juno_pipeline.RunSnakemake.__init__( +# self, +# pipeline_name='Juno_SNP', +# pipeline_version='v0.1', +# output_dir=output_dir, +# workdir=workdir, +# cores=cores, +# time_limit=time_limit, +# local=local, +# queue=queue, +# unlock=unlock, +# rerunincomplete=rerunincomplete, +# dryrun=dryrun, +# useconda=not run_in_container, +# conda_prefix=prefix, +# usesingularity=run_in_container, +# singularityargs=f"--bind {self.input_dir}:{self.input_dir} --bind {output_dir}:{output_dir} --bind {self.db_dir}:{self.db_dir}", +# singularity_prefix=prefix, +# restarttimes=1, +# latency_wait=60, +# name_snakemake_report=str(self.path_to_audit.joinpath('juno_snp_report.html')), +# **kwargs +# ) + +# # Start pipeline +# self.run_juno_snp_pipeline() - return config_params +# def start_juno_snp_pipeline(self): +# """ +# Function to start the pipeline +# """ +# self.start_juno_pipeline() +# with open(self.sample_sheet, 'w') as file_: +# yaml.dump(self.sample_dict, file_, default_flow_style=False) + +# def write_userparameters(self): + +# config_params = { +# 'input_dir': str(self.input_dir), +# 'ref': self.ref, +# 'out': str(self.output_dir), +# 'db_dir': str(self.db_dir), +# 'referenceseeker': { +# 'db': str(self.db_dir.joinpath('bacteria-refseq')), +# 'ani_threshold': self.ani_threshold, +# 'conserved_dna_threshold': self.conserved_dna_threshold, +# 'sliding_window': self.sliding_window +# }, +# 'mash': { +# 'kmer_length': self.kmer_length, +# 'sketch_size': self.sketch_size, +# 'threshold': self.mash_threshold +# }, +# 'tree': { +# 'algorithm' : self.tree_algorithm +# }, +# 'dryrun': self.dryrun +# } - def run_juno_snp_pipeline(self): - self.start_juno_snp_pipeline() - self.user_params = self.write_userparameters() - self.get_run_info() - if not self.dryrun or self.unlock: - self.path_to_audit.mkdir(parents=True, exist_ok=True) - - self.successful_run = self.run_snakemake() - assert self.successful_run, f'Please check the log files' - if not self.dryrun or self.unlock: - subprocess.run( - ['find', self.output_dir, '-type', 'f', '-empty', '-exec', 'rm', '{}', ';'] - ) - subprocess.run( - ['find', self.output_dir, '-type', 'd', '-empty', '-exec', 'rm', '-rf', '{}', ';'] - ) - self.make_snakemake_report() - -def check_sliding_window(num): - num_int = int(num) - if num_int > 99 and num_int < 1001: - return num_int - else: - raise ValueError( - f'The provided input value for sliding window {str(num)} is not valid.' \ - ' Please provide a number between 100-1000' - ) +# with open(self.user_parameters, 'w') as file_: +# yaml.dump(config_params, file_, default_flow_style=False) -def check_between_zero_and_one(num): - num_f = float(num) - if num_f >= 0 and num_f <= 1: - return num_f - else: - raise ValueError( - f'The provided input value {str(num)} is not valid. '\ - 'Please provide a value between 0-1' - ) +# return config_params + +# def run_juno_snp_pipeline(self): +# self.start_juno_snp_pipeline() +# self.user_params = self.write_userparameters() +# self.get_run_info() +# if not self.dryrun or self.unlock: +# self.path_to_audit.mkdir(parents=True, exist_ok=True) -def check_kmer_length(num): - num_int = int(num) - if num_int >= 1 and num_int <= 32: - return num_int - else: - raise ValueError( - f'The provided input value for kmer length {str(num)} is not valid.' \ - ' Please provide a number between 1-32' - ) +# self.successful_run = self.run_snakemake() +# assert self.successful_run, f'Please check the log files' +# if not self.dryrun or self.unlock: +# subprocess.run( +# ['find', self.output_dir, '-type', 'f', '-empty', '-exec', 'rm', '{}', ';'] +# ) +# subprocess.run( +# ['find', self.output_dir, '-type', 'd', '-empty', '-exec', 'rm', '-rf', '{}', ';'] +# ) +# self.make_snakemake_report() + +# def check_sliding_window(num): +# num_int = int(num) +# if num_int > 99 and num_int < 1001: +# return num_int +# else: +# raise ValueError( +# f'The provided input value for sliding window {str(num)} is not valid.' \ +# ' Please provide a number between 100-1000' +# ) + +# def check_between_zero_and_one(num): +# num_f = float(num) +# if num_f >= 0 and num_f <= 1: +# return num_f +# else: +# raise ValueError( +# f'The provided input value {str(num)} is not valid. '\ +# 'Please provide a value between 0-1' +# ) -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description = __description__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument( - "-i", - "--input", - type = pathlib.Path, - required = True, - metavar = "DIR", - help = "Relative or absolute path to the input directory. It must either be the output directory of the Juno-assembly pipeline or it must contain all the raw reads (fastq) and assemblies (fasta) files for all samples to be processed." - ) - parser.add_argument( - "-r", - "--reference", - type = pathlib.Path, - required = False, - metavar = "FILE", - help = "Relative or absolute path to a reference fasta file." - ) - parser.add_argument( - "-o", - "--output", - type = pathlib.Path, - metavar = "DIR", - default = "output", - help = "Relative or absolute path to the output directory. If non is given, an 'output' directory will be created in the current directory." - ) - parser.add_argument( - "-d", - "--db-dir", - type = pathlib.Path, - metavar = "DIR", - default = "/mnt/db/juno/snp", - help = "Relative or absolute path to the database directory. If non is given, /mnt/db/juno/snp"\ - " (where the default db resides at the RIVM will be used)." - ) - parser.add_argument( - "-a", - "--ani", - type = check_between_zero_and_one, - metavar = "INT", - default = 0.95, - help="ANI threshold. Passed to referenceseeker" - ) - parser.add_argument( - "-cd", - "--conserved-dna", - type = check_between_zero_and_one, - metavar = "INT", - default = 0.69, - help="Conserved DNA threshold. Passed to referenceseeker" - ) - parser.add_argument( - "-sw", - "--sliding-window", - type = check_sliding_window, - metavar = "INT", - default = 400, - help="Sliding window - the lower the more accurate but also slower. Passed to referenceseeker" - ) - parser.add_argument( - "-kl", - "--kmer-length", - type = check_kmer_length, - metavar = "INT", - default = 21, - help="K-mer length - longer kmers increase specificity, shorter kmers increase sensitivity. Passed to mash sketch" - ) - parser.add_argument( - "-ss", - "--sketch-size", - type = int, - metavar = "INT", - default = 1000, - help="Sketch size - larger sketch size better represents the original sequence," \ - " but leads to large files and longer running time. Passed to mash sketch" - ) - parser.add_argument( - "-mt", - "--mash-threshold", - type = check_between_zero_and_one, - metavar = "FLOAT", - default = 0.01, - help="Mash threshold - maximum mash distance to consider genomes similar. Passed to preclustering script." - ) - parser.add_argument( - "-t", - "--tree-algorithm", - type = str, - metavar = "ALGORITHM", - default = 'upgma', - choices=['upgma', 'nj'], - help="Algorithm to use for making the tree. It can be 'upgma' or 'nj' (neighbor-joining). Default is upgma" - ) - parser.add_argument( - "--no-containers", - action = 'store_false', - help = "Use conda environments instead of containers." - ) - parser.add_argument( - "-p", - "--prefix", - type = str, - metavar="PATH", - default=None, - help = "Conda or singularity prefix. Path to the place where you want to store the conda environments or the singularity images." - ) - parser.add_argument( - "-c", - "--cores", - type = int, - metavar = "INT", - default = 300, - help="Number of cores to use. Default is 300" - ) - #TODO: Get from ${irods_runsheet_sys__runsheet__lsf_queue} if it exists - parser.add_argument( - "-q", - "--queue", - type = str, - metavar = "STR", - default = 'bio', - help = 'Name of the queue that the job will be submitted to if working on a cluster.' - ) - parser.add_argument( - "-l", - "--local", - action='store_true', - help="Running pipeline locally (instead of in a computer cluster). Default is running it in a cluster." - ) - parser.add_argument( - "-w", - "--time-limit", - type = int, - metavar = "INT", - default = 60, - help="Time limit per job in minutes (passed as -W argument to bsub). Jobs will be killed if not finished in this time." - ) - # Snakemake arguments - parser.add_argument( - "-u", - "--unlock", - action = 'store_true', - help = "Unlock output directory (passed to snakemake)." - ) - parser.add_argument( - "-n", - "--dryrun", - action='store_true', - help="Dry run printing steps to be taken in the pipeline without actually running it (passed to snakemake)." - ) - parser.add_argument( - "--rerunincomplete", - action='store_true', - help="Re-run jobs if they are marked as incomplete (passed to snakemake)." - ) - parser.add_argument( - "--snakemake-args", - nargs='*', - default={}, - action=helper_functions.SnakemakeKwargsAction, - help="Extra arguments to be passed to snakemake API (https://snakemake.readthedocs.io/en/stable/api_reference/snakemake.html)." - ) - parser.add_argument('--version', action='version', version=f'{__package_name__} {__version__}') - args = parser.parse_args() - JunoSnpRun( - input_dir=args.input, - ref=args.reference, - output_dir=args.output, - db_dir=args.db_dir, - ani=args.ani, - conserved_dna=args.conserved_dna, - sliding_window=args.sliding_window, - kmer_length=args.kmer_length, - sketch_size=args.sketch_size, - mash_threshold=args.mash_threshold, - tree_algorithm=args.tree_algorithm, - cores=args.cores, - local=args.local, - time_limit=args.time_limit, - queue=args.queue, - unlock=args.unlock, - rerunincomplete=args.rerunincomplete, - dryrun=args.dryrun, - run_in_container=args.no_containers, - prefix=args.prefix, - **args.snakemake_args - ) +# def check_kmer_length(num): +# num_int = int(num) +# if num_int >= 1 and num_int <= 32: +# return num_int +# else: +# raise ValueError( +# f'The provided input value for kmer length {str(num)} is not valid.' \ +# ' Please provide a number between 1-32' +# ) + +# if __name__ == '__main__': +# parser = argparse.ArgumentParser( +# description = __description__, +# formatter_class=argparse.ArgumentDefaultsHelpFormatter +# ) + +# parser.add_argument('--version', action='version', version=f'{__package_name__} {__version__}') +# args = parser.parse_args() +# JunoSnpRun( +# input_dir=args.input, +# ref=args.reference, +# output_dir=args.output, +# db_dir=args.db_dir, +# ani=args.ani, +# conserved_dna=args.conserved_dna, +# sliding_window=args.sliding_window, +# kmer_length=args.kmer_length, +# sketch_size=args.sketch_size, +# mash_threshold=args.mash_threshold, +# tree_algorithm=args.tree_algorithm, +# cores=args.cores, +# local=args.local, +# time_limit=args.time_limit, +# queue=args.queue, +# unlock=args.unlock, +# rerunincomplete=args.rerunincomplete, +# dryrun=args.dryrun, +# run_in_container=args.no_containers, +# prefix=args.prefix, +# **args.snakemake_args +# )