citations and clean-up

TGenNorth · Jul 5, 2024 · e98c6ce · e98c6ce
1 parent 6f6183c
commit e98c6ce
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 41 deletions.
diff --git a/q2_asap/_methods.py b/q2_asap/_methods.py
diff --git a/q2_asap/analyzeAmplicons.py b/q2_asap/analyzeAmplicons.py
@@ -11,32 +11,28 @@
 import re
 import time
 
-
+# function to check if a job with the given job ID is still running.
+# returns true if the job is still running, false otherwise.
 def is_job_running(job_id):
-    """
-    Check if the Slurm job with the given job ID is still running.
-    Returns True if the job is running, False otherwise.
-    """
     try:
         result = subprocess.run(['squeue', '--job', str(job_id)], capture_output=True, text=True)
         return str(job_id) in result.stdout
     except Exception as e:
         print(f"Error checking job status: {e}")
         return False
 
+# function to pause the script until the job with the given job ID is completed
+# checks if the job is still running every check interval
 def wait_for_job_completion(job_id, check_interval=10):
-    """
-    Pause the script until the Slurm job with the given job ID is completed.
-    :param job_id: The Slurm job ID to check.
-    :param check_interval: Time interval (in seconds) between status checks.
-    """
     while is_job_running(job_id):
         print(f"Job {job_id} is still running. Checking again in {check_interval} seconds...")
         time.sleep(check_interval)
 
     print(f"Job {job_id} has completed.")
 
 
+# function that runs ASAP analyze amplicons using specified parameters. Sets up and executes command within a conda environment
+# waits for job to complete, then organizes output files into designated output directories
 def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: str=None, depth: int=10, breadth: float=0.9,
                         min_base_qual: int=20, consensus_proportion: float=0.8, fill_gaps: str="n", aligner: str="bwa", aligner_args: str='"-k 51 -L 20"'
           ) -> (
@@ -59,23 +55,23 @@ def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: s
 
     # combine conda environment and command TODO: fix conda environment
     shell_script= f"""
-    source /home/cjohnson/anaconda3/etc/profile.d/conda.sh
-    conda activate /home/dlemmer/.conda/envs/asap
-    {command}
+    conda run -p /home/dlemmer/.conda/envs/asap {command}
     """
 
     # call asap command
     result = subprocess.run(['bash', '-c', shell_script], capture_output=True, text=True)
-
+    # capture stdout
     output = result.stdout
 
+    # find the job ID in the stdout
     job_id_match = re.findall('(?<=final job id is: )\d+', output)[0]
 
+    # wait for the job to complete
     wait_for_job_completion(job_id_match)
     asap_output_dir = os.path.join(temp_dir, "asap_output")
 
     # move output into artifact directories by looping through files, getting the file path
-    # and moving the file to correct directory TODO: search through multiple directories
+    # and moving the file to correct directory 
     for file_name in os.listdir(asap_output_dir):
         file_path = os.path.join(asap_output_dir, file_name)
         if re.search(r'\.(amb|ann|bwt|pac|sa|fasta)$', file_name):

diff --git a/q2_asap/citations.bib b/q2_asap/citations.bib
@@ -4,3 +4,10 @@ @MISC{Caporaso-Bolyen-2024
   year         =  2024,
   howpublished = "https://develop.qiime2.org"
 }
+
+@article{ASAP,
+  author = {Darrin Lemmer and others},
+  title = {The Amplicon Sequencing Analysis Pipeline (ASAP)},
+  year = {2015},
+  url = {https://github.com/TGenNorth/ASAP}
+}
diff --git a/q2_asap/plugin_setup.py b/q2_asap/plugin_setup.py
@@ -32,7 +32,7 @@
     # Please retain the plugin-level citation of 'Caporaso-Bolyen-2024'
     # as attribution of the use of this template, in addition to any citations
     # you add.
-    citations=[citations['Caporaso-Bolyen-2024']]
+    citations=[citations['Caporaso-Bolyen-2024'], citations['ASAP']]
 )
 
 
@@ -55,22 +55,25 @@
              ],
     input_descriptions={'sequences': 'The amplicon sequences to be analyzed'},
     parameter_descriptions={
-                'name': 'Str',
-                'depth': 'Int',
-                'breadth': 'Float',
-                'min_base_qual': 'Int',
-                'consensus_proportion': 'Float',
-                'fill_gaps': 'Str',
-                'aligner': 'Str',
-                'aligner_args': 'Str'},
+                'name': 'Name of ASAP run',
+                'depth': 'minimum read depth required to consider a position covered. [default: 100]',
+                'breadth': 'minimum breadth of coverage required to consider an amplicon as present. [default: 0.8]',
+                'min_base_qual': 'what is the minimum base quality score (BQS) to use a position (Phred scale, i.e. 10=90, 20=99, 30=99.9 accuracy',
+                'consensus_proportion': 'minimum proportion required to call at base at that position, else 'N'. [default: 0.8]',
+                'fill_gaps': 'fill no coverage gaps in the consensus sequence [default: False], optional parameter is the character to use for filling [defaut: n]',
+                'aligner': 'aligner to use for read mapping, supports bowtie2, novoalign, and bwa. [default: bowtie2]',
+                'aligner_args': "additional arguments to pass to the aligner, enclosed in ''."},
     output_descriptions={
-        'output_bams': 'SampleData[AlignmentMap]',
-        'bwa_index': 'BWAIndex',
-        'asap_xmls': 'ASAPXMLOutputDirFmt'
+        'output_bams': 'directory of bam files',
+        'bwa_index': 'directory of files that hold BWA indices used to align sequencing reads to the reference genome',
+        'asap_xmls': 'directory of XML files with complete details for each assay against each sample. \
+                        These details include number of reads aligning to each target, any SNPs found above a user-defined threshold, \
+                        and the nucleotide distribution at each of these SNP positions. For ROI assays, the output includes the sequence \
+                        distribution at each of the regions of interest -- both the DNA sequences and translated into amino acid sequences.'
         },
     name='analyzeAmplicons',
     description=(""),
-    citations=[]
+    citations=[citations['ASAP']]
 )
 
 plugin.register_formats( ASAPHTMLOutputDirFmt, ASAPXMLOutputDirFmt)