Skip to content

Commit

Permalink
refactor!: update juno library to v2
Browse files Browse the repository at this point in the history
  • Loading branch information
boasvdp committed Feb 21, 2024
1 parent 396a8c9 commit 347971f
Show file tree
Hide file tree
Showing 5 changed files with 437 additions and 390 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ __pycache__/
output/
.parallel/
.pytest_cache
.cache
.vscode

# Files to ignore
*.sif
Expand Down
80 changes: 49 additions & 31 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,87 +10,105 @@ Date: 07-03-2022
#################################################################################

from yaml import safe_load
import pathlib
from pathlib import Path
from shutil import copyfile

#################################################################################
##### Load samplesheet and config params #####
#################################################################################

# Loading sample sheet as dictionary
# Loading sample sheet as dictionary
# ("R1" and "R2" keys for fastq)
sample_sheet = config["sample_sheet"]
SAMPLES = {}
with open(sample_sheet) as sample_sheet_file:
SAMPLES = safe_load(sample_sheet_file)
SAMPLES = safe_load(sample_sheet_file)

GIVEN_REF=config['ref']
GIVEN_REF = config["reference"]

#@################################################################################
#@#### Expected output #####
#@################################################################################
output_dir = pathlib.Path(config["out"])
log_dir = output_dir.joinpath('log')
db_dir = pathlib.Path(config["db_dir"])
mash_db = db_dir.joinpath('bacteria-refseq', 'db.msh')
referenceseeker_md5 = str(db_dir.joinpath('bacteria-refseq', 'downloaded_db.txt'))
#################################################################################
##### Expected output #####
#################################################################################
output_dir = Path(config["output_dir"])
log_dir = output_dir.joinpath("log")
db_dir = Path(config["db_dir"])
mash_db = db_dir.joinpath("bacteria-refseq", "db.msh")
referenceseeker_md5 = str(db_dir.joinpath("bacteria-refseq", "downloaded_db.txt"))

if config['dryrun'] is True and GIVEN_REF is not None:
if (config["dryrun"] is True) and (GIVEN_REF != "None"):
ref_genome = GIVEN_REF
else:
ref_genome = output_dir.joinpath('ref_genomes_used', 'cluster_1', 'ref_genome.fasta')
ref_genome = output_dir.joinpath(
"ref_genomes_used", "cluster_1", "ref_genome.fasta"
)

if GIVEN_REF is not None and not ref_genome.exists():
# GIVEN_REF is converted to str
if (GIVEN_REF != "None") and (not ref_genome.exists()):
print(f"Copying reference genome {GIVEN_REF} to {ref_genome}")
output_dir.mkdir(exist_ok=True, parents=True)
ref_dir = ref_genome.parent
ref_dir.mkdir(exist_ok=True, parents=True)
copyfile(GIVEN_REF, ref_genome)


def get_output_per_cluster(cluster):
with open(checkpoints.preclustering.get(**cluster).output[0]) as file:
SAMPLE_CLUSTERS = yaml.safe_load(file)
CLUSTERS = set([ cluster for sample, cluster in SAMPLE_CLUSTERS.items() ])
output_files = expand(output_dir.joinpath('tree/cluster_{cluster}/{file}'),
cluster=CLUSTERS,
file=['newick_tree.txt', 'snp_matrix.csv'])
output_iqtree = expand(output_dir.joinpath('ml_tree', 'cluster_{cluster}'),
cluster=CLUSTERS)
CLUSTERS = set([cluster for sample, cluster in SAMPLE_CLUSTERS.items()])
output_files = expand(
output_dir.joinpath("tree/cluster_{cluster}/{file}"),
cluster=CLUSTERS,
file=["newick_tree.txt", "snp_matrix.csv"],
)
output_iqtree = expand(
output_dir.joinpath("ml_tree", "cluster_{cluster}"), cluster=CLUSTERS
)
return output_files + output_iqtree


#@################################################################################
#@#### Processes #####
#@################################################################################
#################################################################################
##### Processes #####
#################################################################################

if GIVEN_REF != "None":

if GIVEN_REF is not None:
include: "bin/rules/mock_cluster.smk"

else:

include: "bin/rules/pre_cluster.smk"
include: "bin/rules/find_reference.smk"


include: "bin/rules/snp_analysis.smk"
include: "bin/rules/dm_n_viz.smk"

#@################################################################################
#@#### Finalize pipeline (error/success) #####
#@################################################################################

#################################################################################
##### Finalize pipeline (error/success) #####
#################################################################################


onerror:
shell("""
shell(
"""
rm -f tmp*npy
rm -f tmp*_fastme_stat.txt
rm -f tmp*_fastme_tree.nwk
rm -f tmp*dist.list
echo -e "Something went wrong with Juno-SNP pipeline. Please check the logging files in {output_dir}/log/"
""")
"""
)


#################################################################################
##### Specify final output #####
#################################################################################


localrules:
all
all,


rule all:
input:
Expand Down
19 changes: 19 additions & 0 deletions envs/juno_snp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: juno_snp
channels:
- bioconda
- conda-forge
- nodefaults
dependencies:
- git=2.40.*
- mamba==1.3.*
- pip>=21.2
- pandas==1.3.2
- python>3.7.6
- snakemake==7.32.0
- pytest
- biopython
- pip:
- numpy
- "--editable=git+https://github.com/RIVM-bioinformatics/[email protected]#egg=juno_library"


23 changes: 0 additions & 23 deletions envs/master_env.yaml

This file was deleted.

Loading

0 comments on commit 347971f

Please sign in to comment.