Skip to content

Commit

Permalink
citations and clean-up
Browse files Browse the repository at this point in the history
  • Loading branch information
nsylvestertgen committed Jul 5, 2024
1 parent 6f6183c commit e98c6ce
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 41 deletions.
13 changes: 0 additions & 13 deletions q2_asap/_methods.py

This file was deleted.

26 changes: 11 additions & 15 deletions q2_asap/analyzeAmplicons.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,28 @@
import re
import time


# function to check if a job with the given job ID is still running.
# returns true if the job is still running, false otherwise.
def is_job_running(job_id):
"""
Check if the Slurm job with the given job ID is still running.
Returns True if the job is running, False otherwise.
"""
try:
result = subprocess.run(['squeue', '--job', str(job_id)], capture_output=True, text=True)
return str(job_id) in result.stdout
except Exception as e:
print(f"Error checking job status: {e}")
return False

# function to pause the script until the job with the given job ID is completed
# checks if the job is still running every check interval
def wait_for_job_completion(job_id, check_interval=10):
"""
Pause the script until the Slurm job with the given job ID is completed.
:param job_id: The Slurm job ID to check.
:param check_interval: Time interval (in seconds) between status checks.
"""
while is_job_running(job_id):
print(f"Job {job_id} is still running. Checking again in {check_interval} seconds...")
time.sleep(check_interval)

print(f"Job {job_id} has completed.")


# function that runs ASAP analyze amplicons using specified parameters. Sets up and executes command within a conda environment
# waits for job to complete, then organizes output files into designated output directories
def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: str=None, depth: int=10, breadth: float=0.9,
min_base_qual: int=20, consensus_proportion: float=0.8, fill_gaps: str="n", aligner: str="bwa", aligner_args: str='"-k 51 -L 20"'
) -> (
Expand All @@ -59,23 +55,23 @@ def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: s

# combine conda environment and command TODO: fix conda environment
shell_script= f"""
source /home/cjohnson/anaconda3/etc/profile.d/conda.sh
conda activate /home/dlemmer/.conda/envs/asap
{command}
conda run -p /home/dlemmer/.conda/envs/asap {command}
"""

# call asap command
result = subprocess.run(['bash', '-c', shell_script], capture_output=True, text=True)

# capture stdout
output = result.stdout

# find the job ID in the stdout
job_id_match = re.findall('(?<=final job id is: )\d+', output)[0]

# wait for the job to complete
wait_for_job_completion(job_id_match)
asap_output_dir = os.path.join(temp_dir, "asap_output")

# move output into artifact directories by looping through files, getting the file path
# and moving the file to correct directory TODO: search through multiple directories
# and moving the file to correct directory
for file_name in os.listdir(asap_output_dir):
file_path = os.path.join(asap_output_dir, file_name)
if re.search(r'\.(amb|ann|bwt|pac|sa|fasta)$', file_name):
Expand Down
7 changes: 7 additions & 0 deletions q2_asap/citations.bib
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@ @MISC{Caporaso-Bolyen-2024
year = 2024,
howpublished = "https://develop.qiime2.org"
}

@article{ASAP,
author = {Darrin Lemmer and others},
title = {The Amplicon Sequencing Analysis Pipeline (ASAP)},
year = {2015},
url = {https://github.com/TGenNorth/ASAP}
}
29 changes: 16 additions & 13 deletions q2_asap/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
# Please retain the plugin-level citation of 'Caporaso-Bolyen-2024'
# as attribution of the use of this template, in addition to any citations
# you add.
citations=[citations['Caporaso-Bolyen-2024']]
citations=[citations['Caporaso-Bolyen-2024'], citations['ASAP']]
)


Expand All @@ -55,22 +55,25 @@
],
input_descriptions={'sequences': 'The amplicon sequences to be analyzed'},
parameter_descriptions={
'name': 'Str',
'depth': 'Int',
'breadth': 'Float',
'min_base_qual': 'Int',
'consensus_proportion': 'Float',
'fill_gaps': 'Str',
'aligner': 'Str',
'aligner_args': 'Str'},
'name': 'Name of ASAP run',
'depth': 'minimum read depth required to consider a position covered. [default: 100]',
'breadth': 'minimum breadth of coverage required to consider an amplicon as present. [default: 0.8]',
'min_base_qual': 'what is the minimum base quality score (BQS) to use a position (Phred scale, i.e. 10=90, 20=99, 30=99.9 accuracy',
'consensus_proportion': 'minimum proportion required to call at base at that position, else 'N'. [default: 0.8]',
'fill_gaps': 'fill no coverage gaps in the consensus sequence [default: False], optional parameter is the character to use for filling [defaut: n]',
'aligner': 'aligner to use for read mapping, supports bowtie2, novoalign, and bwa. [default: bowtie2]',
'aligner_args': "additional arguments to pass to the aligner, enclosed in ''."},
output_descriptions={
'output_bams': 'SampleData[AlignmentMap]',
'bwa_index': 'BWAIndex',
'asap_xmls': 'ASAPXMLOutputDirFmt'
'output_bams': 'directory of bam files',
'bwa_index': 'directory of files that hold BWA indices used to align sequencing reads to the reference genome',
'asap_xmls': 'directory of XML files with complete details for each assay against each sample. \
These details include number of reads aligning to each target, any SNPs found above a user-defined threshold, \
and the nucleotide distribution at each of these SNP positions. For ROI assays, the output includes the sequence \
distribution at each of the regions of interest -- both the DNA sequences and translated into amino acid sequences.'
},
name='analyzeAmplicons',
description=(""),
citations=[]
citations=[citations['ASAP']]
)

plugin.register_formats( ASAPHTMLOutputDirFmt, ASAPXMLOutputDirFmt)
Expand Down

0 comments on commit e98c6ce

Please sign in to comment.