manifest

#ExampleModule
#Tue June 25 2024
JVMLevel=
LSID=urn:lsid:genepattern.org:module.analysis:00461:999999999
author=Edwin Huang, Ted Liefeld, Thorin Tabor, Michael Reich;UCSD - Mesirov Lab
categories=spatial transcriptomics
#commandLine when run with source in Docker container:
commandLine=Rscript /spatialGE/spatialge_preprocessing_wrapper.R <input.data.archive> <input.clinical.data> <verbose> <transform.data> <output.filename> <pseudobulk> <pseudobulk.max.var.genes> <pseudobulk.plot.meta> <pseudobulk.heatmap.num.displayed.genes> <distribution.plot.meta> <spot.min.reads> <spot.min.genes> <spot.max.reads> <spot.max.genes> <transform.scale.f> <transform.num.regression.genes> <transform.min.spots.or.cells> <spot.min.percent> <spot.max.percent> <gene.min.reads> <gene.max.reads> <gene.min.spots> <gene.max.spots> <gene.min.percent> <gene.max.percent> <filter.samples> <rm.tissue> <rm.spots> <rm.genes> <rm.genes.regex> <spot.percentage.genes.regex> <filter.data>

job.docker.image=genepattern/spatialgp.preprocessing:0.9
src.repo=https://github.com/genepattern/spatialGE.Preprocessing
cpuType=any
description=The package spatialGE provides a collection of tools for the visualization of gene expression from spatially-resolved transcriptomic experiments. The data input methods have been designed so that any data can be analyzed as long as it contains gene expression counts per region of interest (ROI), spot, or cell, and the spatial coordinates of those ROIs, spots, or cells, as it is generated in platforms such as GeoMx, Visium, and CosMx-SMI. The spatialge.Preprocessing module performs initial data ingestion, filtering, transform and pseudo-bulk operations to prepare data for further processing. Order of operations are: ingestion, filter, pseudobulk, transform.
documentationUrl=https://genepattern.github.io/spatialGE.Preprocessing/v1/
fileFormat=rds
language=R
name=spatialGE.Preprocessing
os=any
p1_MODE=IN
p1_TYPE=FILE
p1_default_value=
p1_description=Input data archive in gz format. Should match directory structure as defined by spatialGE as described <a href="https://fridleylab.github.io/spatialGE/reference/STlist.html">here</a>.
p1_fileFormat=gz
p1_flag=
p1_name=input.data.archive
p1_numValues=0..1
p1_optional=
p1_prefix=
p1_prefix_when_specified=-a 
p1_type=java.io.File
p1_value=

p2_MODE=
p2_TYPE=TEXT
p2_default_value=False
p2_description=Output additional files including summarized STList, distribution_plots before and after filtering, and pseudobulk PCA plot and heatmap.
p2_fileFormat=
p2_flag=
p2_name=verbose
p2_numValues=0..1
p2_optional=
p2_prefix=
p2_prefix_when_specified=-c 
p2_type=java.lang.String
p2_value=False\=False;True\=True

p3_MODE=
p3_TYPE=TEXT
p3_default_value=Log
p3_description=None, log or sct. If log, log-normalization is performed. If sct, then the <a href="https://rdrr.io/pkg/sctransform/man/vst.html">SCTransform</a> method is applied. SCT is a variance stabilizing transformation to UMI count data using a regularized Negative Binomial regression model.
p3_fileFormat=
p3_flag=
p3_name=transform.data
p3_numValues=0..1
p3_optional=
p3_prefix=
p3_prefix_when_specified=-d 
p3_type=java.lang.String
p3_value=None\=None;Log\=Log;SCT\=SCT

p4_MODE=
p4_TYPE=TEXT
p4_default_value=<input.data.archive_basename>
p4_description=The basename to use for output file (no need to add ".txt" at the end)
p4_fileFormat=
p4_flag=
p4_name=output.filename
p4_numValues=0..1
p4_optional=
p4_prefix=
p4_prefix_when_specified=-e 
p4_type=java.lang.String
p4_value=

p5_MODE=
p5_TYPE=TEXT
p5_default_value=False
p5_description=Perform pseudobulk to creates (pseudo) bulk RNAseq data sets by combining all counts from each sample. Then log transform the pseudo bulk and perform PCA. Note that the spatial coordinate information is not considered here, which is intended only as an exploratory analysis analysis.
p5_fileFormat=
p5_flag=
p5_name=pseudobulk
p5_numValues=0..1
p5_optional=
p5_prefix=
p5_prefix_when_specified=-f 
p5_type=java.lang.String
p5_value=False\=False;True\=True

p6_MODE=
p6_TYPE=TEXT
p6_default_value=5000
p6_description=The number of most variable genes (standard deviation) to use in pseudobulk analysis.
p6_fileFormat=
p6_flag=
p6_name=pseudobulk.max.var.genes
p6_numValues=0..1
p6_optional=on
p6_prefix=
p6_prefix_when_specified=-g 
p6_type=java.lang.Integer
p6_value=

p7_MODE=
p7_TYPE=TEXT
p7_default_value=patient_id
p7_description=A string indicating the name of the variable in the sample metadata to color points in the PCA plot.
p7_fileFormat=
p7_flag=
p7_name=pseudobulk.plot.meta
p7_numValues=0..1
p7_optional=
p7_prefix=
p7_prefix_when_specified=-F 
p7_type=java.lang.String
p7_value=

p8_MODE=
p8_TYPE=TEXT
p8_default_value=30
p8_description=The number of genes to display in the pseudobulk heatmap, selected based on decreasing order of standard deviation across samples.
p8_fileFormat= 
p8_flag=-o
p8_name=pseudobulk.heatmap.num.displayed.genes
p8_numValues=0..1
p8_optional=
p8_prefix=
p8_prefix_when_specified=-i 
p8_type=java.lang.Integer
p8_value=

p9_MODE=
p9_TYPE=TEXT
p9_default_value=total_counts
p9_description=Vector of variables in x@spatial_meta to plot distributions. If 'total_counts', the function plots the counts per spot/cell. If 'total_genes', the function plots the number of genes per spot/cell are plotted.
p9_fileFormat=
p9_flag=
p9_name=distribution.plot.meta
p9_numValues=0..1
p9_optional=
p9_prefix=
p9_prefix_when_specified=-j 
p9_type=java.lang.Integer
p9_value=

p10_MODE=
p10_TYPE=TEXT
p10_default_value=5000
p10_description=The minimum number of total reads for a spot to be retained
p10_fileFormat=
p10_flag=
p10_name=spot.min.reads
p10_numValues=0..1
p10_optional=on
p10_prefix=
p10_prefix_when_specified=-k 
p10_type=java.lang.Integer
p10_value=

p11_MODE=
p11_TYPE=TEXT
p11_default_value=1000
p11_description=the minimum number of non-zero counts for a spot to be retained
p11_fileFormat=
p11_flag=
p11_name=spot.min.genes
p11_numValues=0..1
p11_optional=on
p11_prefix=
p11_prefix_when_specified=-l 
p11_type=java.lang.Integer
p11_value=

p12_MODE=
p12_TYPE=TEXT
p12_default_value=150000
p12_description=The maximum number of total reads for a spot to be retained
p12_fileFormat=
p12_flag=
p12_name=spot.max.reads
p12_numValues=0..1
p12_optional=on
p12_prefix=
p12_prefix_when_specified=-m 
p12_type=java.lang.Integer
p12_value=

p13_MODE=
p13_TYPE=TEXT
p13_default_value=
p13_description=The maximum number of non-zero counts for a spot to be retained.
p13_fileFormat=
p13_flag=
p13_name=spot.max.genes
p13_numValues=0..1
p13_optional=on
p13_prefix=
p13_prefix_when_specified=-n 
p13_type=java.lang.Integer
p13_value=

p14_MODE=
p14_TYPE=TEXT
p14_default_value=10000
p14_description=The scale factor used in logarithmic transformation.
p14_fileFormat=
p14_flag=
p14_name=transform.scale.f
p14_numValues=0..1
p14_optional=on
p14_prefix=
p14_prefix_when_specified=-o 
p14_type=java.lang.Integer
p14_value=

p15_MODE=
p15_TYPE=TEXT
p15_default_value=3000
p15_description=The number of genes to be used in the regression model during SCTransform. The function sctransform::vst makes a random gene selection based on this number.
p15_fileFormat=
p15_flag=
p15_name=transform.num.regression.genes
p15_numValues=0..1
p15_optional=on
p15_prefix=
p15_prefix_when_specified=-p 
p15_type=java.lang.Integer
p15_value=

p16_MODE=
p16_TYPE=TEXT
p16_default_value=5
p16_description=The minimum number of spots/cells to be used in the regression model fit by sctransform::vst.
p16_fileFormat=
p16_flag=
p16_name=transform.min.spots.or.cells
p16_numValues=0..1
p16_optional=on
p16_prefix=
p16_prefix_when_specified=-q 
p16_type=java.lang.Integer
p16_value=

p17_MODE=
p17_TYPE=TEXT
p17_default_value=0
p17_description=The minimum percentage of counts for features defined by spot_pct_expr for a spot to be retained.
p17_fileFormat=
p17_flag=
p17_name=spot.min.percent
p17_numValues=0..1
p17_optional=on
p17_prefix=
p17_prefix_when_specified=-r 
p17_type=java.lang.Integer
p17_value=

p18_MODE=
p18_TYPE=TEXT
p18_default_value=
p18_description=The maximum percentage of counts for features defined by spot_pct_expr for a spot to be retained.
p18_fileFormat=
p18_flag=
p18_name=spot.max.percent
p18_numValues=0..1
p18_optional=on
p18_prefix=
p18_prefix_when_specified=-s 
p18_type=java.lang.Integer
p18_value=

p19_MODE=
p19_TYPE=TEXT
p19_default_value=0
p19_description=The minimum number of total reads for a gene to be retained.
p19_fileFormat=
p19_flag=
p19_name=gene.min.reads
p19_numValues=0..1
p19_optional=on
p19_prefix=
p19_prefix_when_specified=-t 
p19_type=java.lang.Integer
p19_value=

p20_MODE=
p20_TYPE=TEXT
p20_default_value=
p20_description=The maximum number of total reads for a gene to be retained.
p20_fileFormat=
p20_flag=
p20_name=gene.max.reads
p20_numValues=0..1
p20_optional=on
p20_prefix=
p20_prefix_when_specified=-u 
p20_type=java.lang.Integer
p20_value=

p21_MODE=
p21_TYPE=TEXT
p21_default_value=0
p21_description=The minimum number of spots with non-zero counts for a gene to be retained.
p21_fileFormat=
p21_flag=
p21_name=gene.min.spots
p21_numValues=0..1
p21_optional=on
p21_prefix=
p21_prefix_when_specified=-v 
p21_type=java.lang.Integer
p21_value=

p22_MODE=
p22_TYPE=TEXT
p22_default_value=
p22_description=The maximum number of spots with non-zero counts for a gene to be retained.
p22_fileFormat=
p22_flag=
p22_name=gene.max.spots
p22_numValues=0..1
p22_optional=on
p22_prefix=
p22_prefix_when_specified=-w 
p22_type=java.lang.Integer
p22_value=

p23_MODE=
p23_TYPE=TEXT
p23_default_value=0
p23_description=The minimum percentage of spots with non-zero counts for a gene to be retained.
p23_fileFormat=
p23_flag=
p23_name=gene.min.percent
p23_numValues=0..1
p23_optional=on
p23_prefix=
p23_prefix_when_specified=-x 
p23_type=java.lang.Integer
p23_value=

p24_MODE=
p24_TYPE=TEXT
p24_default_value=
p24_description=The maximum percentage of spots with non-zero counts for a gene to be retained.
p24_fileFormat=
p24_flag=
p24_name=gene.max.percent
p24_numValues=0..1
p24_optional=on
p24_prefix=
p24_prefix_when_specified=-y 
p24_type=java.lang.Integer
p24_value=

p25_MODE=
p25_TYPE=TEXT
p25_default_value=
p25_description=Samples (as in names(x@counts)) to perform filtering.
p25_fileFormat=
p25_flag=
p25_name=filter.samples
p25_numValues=0..1
p25_optional=on
p25_prefix=
p25_prefix_when_specified=-z 
p25_type=java.lang.String
p25_value=

p26_MODE=
p26_TYPE=TEXT
p26_default_value=
p26_description=Sample (as in names(x@counts)) to remove from STlist. Removes samples in x@counts, x@tr_counts, x@spatial_meta, x@gene_meta, and x@sample_meta.
p26_fileFormat=
p26_flag=
p26_name=rm.tissue
p26_numValues=0..1
p26_optional=on
p26_prefix=
p26_prefix_when_specified=-A 
p26_type=java.lang.String
p26_value=

p27_MODE=
p27_TYPE=TEXT
p27_default_value=
p27_description=Vector of spot/cell IDs to remove. Removes spots/cells in x@counts, x@tr_counts, and x@spatial_meta.
p27_fileFormat=
p27_flag=
p27_name=rm.spots
p27_numValues=0..1
p27_optional=on
p27_prefix=
p27_prefix_when_specified=-B 
p27_type=java.lang.String
p27_value=

p28_MODE=
p28_TYPE=TEXT
p28_default_value=
p28_description=Vector of gene names to remove from STlist. Removes genes in x@counts, x@tr_counts, and x@gene_meta
p28_fileFormat=
p28_flag=
p28_name=rm.genes
p28_numValues=0..1
p28_optional=on
p28_prefix=
p28_prefix_when_specified=-C 
p28_type=java.lang.String
p28_value=

p29_MODE=
p29_TYPE=TEXT
p29_default_value=
p29_description=A regular expression that matches genes to remove. Removes genes in x@counts, x@tr_counts, and x@gene_meta.
p29_fileFormat=
p29_flag=
p29_name=rm.genes.regex
p29_numValues=0..1
p29_optional=on
p29_prefix=
p29_prefix_when_specified=-D 
p29_type=java.lang.String
p29_value=

p30_MODE=
p30_TYPE=TEXT
p30_default_value=
p30_description=a expression to use with spot_minpct and spot_maxpct. By default '^MT-'.
p30_fileFormat=
p30_flag=
p30_name=spot.percentage.genes.regex
p30_numValues=0..1
p30_optional=on
p30_prefix=
p30_prefix_when_specified=-E 
p30_type=java.lang.Integer
p30_value=

p31_MODE=IN
p31_TYPE=FILE
p31_default_value=
p31_description=Metadata associated with each sample in a csv file. The sample names are in the first column, and they must match the names of the folders containing the data.
p31_fileFormat=tsv;csv;txt
p31_flag=
p31_name=input.clinical.data
p31_numValues=0..1
p31_optional=
p31_prefix=
p31_prefix_when_specified=-b 
p31_type=java.io.File
p31_value=

p32_MODE=
p32_TYPE=TEXT
p32_default_value=True
p32_description=Filter data, T/F
p32_fileFormat=
p32_flag=
p32_name=filter.data
p32_numValues=0..1
p32_optional=
p32_prefix=
p32_prefix_when_specified=-G 
p32_type=java.lang.String
p32_value=True;False

privacy=public
quality=production
taskDoc=
taskType=spatial transcriptomics
userid=bhill@broadinstitute.org
version=Strip .tar from output filenames if its part of the input filename. Stop putting .rds on the rds file twice.

job.memory=8Gb