OpenOmics · chenv3 · Nov 20, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/cell-seek b/cell-seek
@@ -245,9 +245,10 @@ def parsed_arguments(name, description):
                 [--dry-run] [--job-name JOB_NAME] [--mode {{slurm,local}}] \\
                 [--sif-cache SIF_CACHE] [--singularity-cache SINGULARITY_CACHE] \\
                 [--silent] [--threads THREADS] [--tmp-dir TMP_DIR] \\
-                [--libraries LIBRARIES] [--features FEATURES] \\
-                [--cmo-reference CMOREFERENCE] [--cmo-sample CMOSAMPLE] \\
-                [--exclude-introns] \\
+                [--aggregate {{mapped, none}}][--libraries LIBRARIES] \\
+                [--features FEATURES] [--cmo-reference CMOREFERENCE] \\
+                [--cmo-sample CMOSAMPLE] [--exclude-introns] [--filter FILTER] \\
+                [--create-bam] \\
                 --input INPUT [INPUT ...] \\
                 --output OUTPUT \\
                 --version {{gex, ...}} \\
@@ -288,10 +289,21 @@ def parsed_arguments(name, description):
                                 options: hg38, mm10.
                                   Example: --genome hg38
         {3}{4}Analysis options:{5}
+          --aggregate  {{mapped,none}}
+                                Cell Ranger aggregate. This option defines the
+                                normalization mode that should be used. Mapped is what
+                                Cell Ranger would run by default, which subsamples reads
+                                from higher depth samples until each library type has an
+                                equal number of reads per cell that are confidently mapped.
+                                None means to not normalize at all. If this flag is not
+                                used then aggregate will not be run. To run Cell Ranger
+                                aggregate, please select one of the following options:
+                                mapped, none.
+                                  Example: --aggregate mapped
           --libraries LIBRARIES
                                 Libraries file. A CSV file containing information about
-                                each library.  This file is used in feature barcode (cite), 
-                                multi, and multiome analysis.It contains each sample's 
+                                each library.  This file is used in feature barcode (cite),
+                                multi, and multiome analysis.It contains each sample's
                                 name, flowcell, demultiplexed name, and library type.
                                   Here is an example libraries.csv file:
                                     Name,Flowcell,Sample,Type
@@ -359,7 +371,7 @@ def parsed_arguments(name, description):
                                       not contain whitespace.
                                     • sequence: Nucleotide barcode sequence associated
                                       with this hashtag.
-                                    • feature_type: Type of the feature. This should always be 
+                                    • feature_type: Type of the feature. This should always be
                                       multiplexing capture.
                                     • read: Specifies which RNA sequencing read contains
                                       the Feature Barcode sequence. Must be R1 or R2, but
@@ -386,15 +398,50 @@ def parsed_arguments(name, description):
                                     • sample_id: Unique sample ID for this hashtagged sample.
                                       Must not contain, whitespace, quote or comma characters.
                                       Each sample ID must be unique.
-                                    • cmo_ids: Unique CMO ID(s) that the sample is hashtagged 
+                                    • cmo_ids: Unique CMO ID(s) that the sample is hashtagged
                                       with. Must match either entries in cmo_reference.csv file
                                       or 10x CMO IDs.
                                   Example: --cmo-sample cmo_sample.csv
           --exclude-introns
-                                Exclude introns from the count alignment. This flag is 
-                                only applicable when dealing with gene expression related 
+                                Exclude introns from the count alignment. This flag is
+                                only applicable when dealing with gene expression related
                                 data.
                                   Example: --exclude-introns
+          --filter FILTER
+                                Filter threshold file. A CSV file containing the different
+                                thresholds to be applied for individual samples within the
+                                project during the QC analysis. The file should contain a
+                                header row with Sample as the column name for the sample IDs,
+                                and the name of each metric that will be filtered along with
+                                if it is the high or low threshold for that metric. Each row
+                                is then the entries for each sample that the manual thresholds
+                                will be applied. If no file is provided then the default
+                                thresholds will be used. If a cell is left blank for a sample
+                                then that sample would not be filtered based on that criteria.
+                                This flag is currently only applicable when dealing with GEX
+                                projects.
+                                  Here is an example filter.csv file:
+                                    Sample,nFeature_RNA_low,nFeature_RNA_high,percent.mito_high
+                                    sample1,500,6000,15
+                                    sample2,500,6000,5
+                                    sample4,500,6000,5
+                                  where:
+                                    • Sample: Unique sample ID that should match the sample name
+                                      used for Cell Ranger count.
+                                    • nFeature_RNA_low,nFeature_RNA_high,percent.mito_high: Example
+                                      entries that can be used for manual thresholding. The column
+                                      names need to be formatted as metadataname_high/low. Entries
+                                      that ends with high will be treated as the upper threshold.
+                                      Entries that ends with low will be treated as the lower
+                                      threshold. Valid metadata names include nCount_RNA,
+                                      nFeature_RNA, and percent.mito.
+                                  Example: --filter filter.csv
+          --create-bam
+                                Create bam files. By default the no-bam flag is used when running
+                                Cell Ranger. Use this flag to ensure that a bam file is created for
+                                each sample during analysis. This flag is only applicable when
+                                dealing with gene expression related data.
+                                  Example: --create-bam
 
         {3}{4}Orchestration options:{5}
           --mode {{slurm,local}}
@@ -537,7 +584,7 @@ def parsed_arguments(name, description):
         '--version',
         type = str.lower,
         required = True,
-        default = "slurm",
+        default = "gex",
         choices = ['gex', 'cite', 'multi', 'vdj', 'atac', 'multiome'],
         help = argparse.SUPPRESS
     )
@@ -608,6 +655,34 @@ def parsed_arguments(name, description):
         help = argparse.SUPPRESS
     )
 
+    # How to run Cell Ranger aggregate
+    subparser_run.add_argument(
+        '--aggregate',
+        type = str.lower,
+        required = False,
+        default = "",
+        choices = ['none', 'mapped'],
+        help = argparse.SUPPRESS
+    )
+
+    # Thresholds to use for filtering in QC Analysis
+    subparser_run.add_argument(
+        '--filter',
+        # Check if the file exists and if it is readable
+        type = lambda file: permissions(parser, file, os.R_OK),
+        required = False,
+        help = argparse.SUPPRESS
+    )
+
+    # Create BAM file during run
+    subparser_run.add_argument(
+        '--create-bam',
+        action = 'store_true',
+        required = False,
+        default = False,
+        help = argparse.SUPPRESS
+    )
+
     # Orchestration Options
     # Execution Method, run locally
     # on a compute node or submit to

diff --git a/config/cluster.json b/config/cluster.json
@@ -15,5 +15,10 @@
         "threads": "16",
         "mem": "150g",
         "time": "2-00:00:00"
+    },
+    "seuratQC": {
+	"threads": "8",
+	"mem": "150g",
+	"time": "1-00:00:00"
     }
 }