generated from genepattern/ExampleModule
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanifest
476 lines (440 loc) · 12.6 KB
/
manifest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
#ExampleModule
#Tue June 25 2024
JVMLevel=
LSID=urn:lsid:genepattern.org:module.analysis:00461:999999999
author=Edwin Huang, Ted Liefeld, Thorin Tabor, Michael Reich;UCSD - Mesirov Lab
categories=spatial transcriptomics
#commandLine when run with source in Docker container:
commandLine=Rscript /spatialGE/spatialge_preprocessing_wrapper.R <input.data.archive> <input.clinical.data> <verbose> <transform.data> <output.filename> <pseudobulk> <pseudobulk.max.var.genes> <pseudobulk.plot.meta> <pseudobulk.heatmap.num.displayed.genes> <distribution.plot.meta> <spot.min.reads> <spot.min.genes> <spot.max.reads> <spot.max.genes> <transform.scale.f> <transform.num.regression.genes> <transform.min.spots.or.cells> <spot.min.percent> <spot.max.percent> <gene.min.reads> <gene.max.reads> <gene.min.spots> <gene.max.spots> <gene.min.percent> <gene.max.percent> <filter.samples> <rm.tissue> <rm.spots> <rm.genes> <rm.genes.regex> <spot.percentage.genes.regex> <filter.data>
job.docker.image=genepattern/spatialgp.preprocessing:0.9
src.repo=https://github.com/genepattern/spatialGE.Preprocessing
cpuType=any
description=The package spatialGE provides a collection of tools for the visualization of gene expression from spatially-resolved transcriptomic experiments. The data input methods have been designed so that any data can be analyzed as long as it contains gene expression counts per region of interest (ROI), spot, or cell, and the spatial coordinates of those ROIs, spots, or cells, as it is generated in platforms such as GeoMx, Visium, and CosMx-SMI. The spatialge.Preprocessing module performs initial data ingestion, filtering, transform and pseudo-bulk operations to prepare data for further processing. Order of operations are: ingestion, filter, pseudobulk, transform.
documentationUrl=https://genepattern.github.io/spatialGE.Preprocessing/v1/
fileFormat=rds
language=R
name=spatialGE.Preprocessing
os=any
p1_MODE=IN
p1_TYPE=FILE
p1_default_value=
p1_description=Input data archive in gz format. Should match directory structure as defined by spatialGE as described <a href="https://fridleylab.github.io/spatialGE/reference/STlist.html">here</a>.
p1_fileFormat=gz
p1_flag=
p1_name=input.data.archive
p1_numValues=0..1
p1_optional=
p1_prefix=
p1_prefix_when_specified=-a
p1_type=java.io.File
p1_value=
p2_MODE=
p2_TYPE=TEXT
p2_default_value=False
p2_description=Output additional files including summarized STList, distribution_plots before and after filtering, and pseudobulk PCA plot and heatmap.
p2_fileFormat=
p2_flag=
p2_name=verbose
p2_numValues=0..1
p2_optional=
p2_prefix=
p2_prefix_when_specified=-c
p2_type=java.lang.String
p2_value=False\=False;True\=True
p3_MODE=
p3_TYPE=TEXT
p3_default_value=Log
p3_description=None, log or sct. If log, log-normalization is performed. If sct, then the <a href="https://rdrr.io/pkg/sctransform/man/vst.html">SCTransform</a> method is applied. SCT is a variance stabilizing transformation to UMI count data using a regularized Negative Binomial regression model.
p3_fileFormat=
p3_flag=
p3_name=transform.data
p3_numValues=0..1
p3_optional=
p3_prefix=
p3_prefix_when_specified=-d
p3_type=java.lang.String
p3_value=None\=None;Log\=Log;SCT\=SCT
p4_MODE=
p4_TYPE=TEXT
p4_default_value=<input.data.archive_basename>
p4_description=The basename to use for output file (no need to add ".txt" at the end)
p4_fileFormat=
p4_flag=
p4_name=output.filename
p4_numValues=0..1
p4_optional=
p4_prefix=
p4_prefix_when_specified=-e
p4_type=java.lang.String
p4_value=
p5_MODE=
p5_TYPE=TEXT
p5_default_value=False
p5_description=Perform pseudobulk to creates (pseudo) bulk RNAseq data sets by combining all counts from each sample. Then log transform the pseudo bulk and perform PCA. Note that the spatial coordinate information is not considered here, which is intended only as an exploratory analysis analysis.
p5_fileFormat=
p5_flag=
p5_name=pseudobulk
p5_numValues=0..1
p5_optional=
p5_prefix=
p5_prefix_when_specified=-f
p5_type=java.lang.String
p5_value=False\=False;True\=True
p6_MODE=
p6_TYPE=TEXT
p6_default_value=5000
p6_description=The number of most variable genes (standard deviation) to use in pseudobulk analysis.
p6_fileFormat=
p6_flag=
p6_name=pseudobulk.max.var.genes
p6_numValues=0..1
p6_optional=on
p6_prefix=
p6_prefix_when_specified=-g
p6_type=java.lang.Integer
p6_value=
p7_MODE=
p7_TYPE=TEXT
p7_default_value=patient_id
p7_description=A string indicating the name of the variable in the sample metadata to color points in the PCA plot.
p7_fileFormat=
p7_flag=
p7_name=pseudobulk.plot.meta
p7_numValues=0..1
p7_optional=
p7_prefix=
p7_prefix_when_specified=-F
p7_type=java.lang.String
p7_value=
p8_MODE=
p8_TYPE=TEXT
p8_default_value=30
p8_description=The number of genes to display in the pseudobulk heatmap, selected based on decreasing order of standard deviation across samples.
p8_fileFormat=
p8_flag=-o
p8_name=pseudobulk.heatmap.num.displayed.genes
p8_numValues=0..1
p8_optional=
p8_prefix=
p8_prefix_when_specified=-i
p8_type=java.lang.Integer
p8_value=
p9_MODE=
p9_TYPE=TEXT
p9_default_value=total_counts
p9_description=Vector of variables in x@spatial_meta to plot distributions. If 'total_counts', the function plots the counts per spot/cell. If 'total_genes', the function plots the number of genes per spot/cell are plotted.
p9_fileFormat=
p9_flag=
p9_name=distribution.plot.meta
p9_numValues=0..1
p9_optional=
p9_prefix=
p9_prefix_when_specified=-j
p9_type=java.lang.Integer
p9_value=
p10_MODE=
p10_TYPE=TEXT
p10_default_value=5000
p10_description=The minimum number of total reads for a spot to be retained
p10_fileFormat=
p10_flag=
p10_name=spot.min.reads
p10_numValues=0..1
p10_optional=on
p10_prefix=
p10_prefix_when_specified=-k
p10_type=java.lang.Integer
p10_value=
p11_MODE=
p11_TYPE=TEXT
p11_default_value=1000
p11_description=the minimum number of non-zero counts for a spot to be retained
p11_fileFormat=
p11_flag=
p11_name=spot.min.genes
p11_numValues=0..1
p11_optional=on
p11_prefix=
p11_prefix_when_specified=-l
p11_type=java.lang.Integer
p11_value=
p12_MODE=
p12_TYPE=TEXT
p12_default_value=150000
p12_description=The maximum number of total reads for a spot to be retained
p12_fileFormat=
p12_flag=
p12_name=spot.max.reads
p12_numValues=0..1
p12_optional=on
p12_prefix=
p12_prefix_when_specified=-m
p12_type=java.lang.Integer
p12_value=
p13_MODE=
p13_TYPE=TEXT
p13_default_value=
p13_description=The maximum number of non-zero counts for a spot to be retained.
p13_fileFormat=
p13_flag=
p13_name=spot.max.genes
p13_numValues=0..1
p13_optional=on
p13_prefix=
p13_prefix_when_specified=-n
p13_type=java.lang.Integer
p13_value=
p14_MODE=
p14_TYPE=TEXT
p14_default_value=10000
p14_description=The scale factor used in logarithmic transformation.
p14_fileFormat=
p14_flag=
p14_name=transform.scale.f
p14_numValues=0..1
p14_optional=on
p14_prefix=
p14_prefix_when_specified=-o
p14_type=java.lang.Integer
p14_value=
p15_MODE=
p15_TYPE=TEXT
p15_default_value=3000
p15_description=The number of genes to be used in the regression model during SCTransform. The function sctransform::vst makes a random gene selection based on this number.
p15_fileFormat=
p15_flag=
p15_name=transform.num.regression.genes
p15_numValues=0..1
p15_optional=on
p15_prefix=
p15_prefix_when_specified=-p
p15_type=java.lang.Integer
p15_value=
p16_MODE=
p16_TYPE=TEXT
p16_default_value=5
p16_description=The minimum number of spots/cells to be used in the regression model fit by sctransform::vst.
p16_fileFormat=
p16_flag=
p16_name=transform.min.spots.or.cells
p16_numValues=0..1
p16_optional=on
p16_prefix=
p16_prefix_when_specified=-q
p16_type=java.lang.Integer
p16_value=
p17_MODE=
p17_TYPE=TEXT
p17_default_value=0
p17_description=The minimum percentage of counts for features defined by spot_pct_expr for a spot to be retained.
p17_fileFormat=
p17_flag=
p17_name=spot.min.percent
p17_numValues=0..1
p17_optional=on
p17_prefix=
p17_prefix_when_specified=-r
p17_type=java.lang.Integer
p17_value=
p18_MODE=
p18_TYPE=TEXT
p18_default_value=
p18_description=The maximum percentage of counts for features defined by spot_pct_expr for a spot to be retained.
p18_fileFormat=
p18_flag=
p18_name=spot.max.percent
p18_numValues=0..1
p18_optional=on
p18_prefix=
p18_prefix_when_specified=-s
p18_type=java.lang.Integer
p18_value=
p19_MODE=
p19_TYPE=TEXT
p19_default_value=0
p19_description=The minimum number of total reads for a gene to be retained.
p19_fileFormat=
p19_flag=
p19_name=gene.min.reads
p19_numValues=0..1
p19_optional=on
p19_prefix=
p19_prefix_when_specified=-t
p19_type=java.lang.Integer
p19_value=
p20_MODE=
p20_TYPE=TEXT
p20_default_value=
p20_description=The maximum number of total reads for a gene to be retained.
p20_fileFormat=
p20_flag=
p20_name=gene.max.reads
p20_numValues=0..1
p20_optional=on
p20_prefix=
p20_prefix_when_specified=-u
p20_type=java.lang.Integer
p20_value=
p21_MODE=
p21_TYPE=TEXT
p21_default_value=0
p21_description=The minimum number of spots with non-zero counts for a gene to be retained.
p21_fileFormat=
p21_flag=
p21_name=gene.min.spots
p21_numValues=0..1
p21_optional=on
p21_prefix=
p21_prefix_when_specified=-v
p21_type=java.lang.Integer
p21_value=
p22_MODE=
p22_TYPE=TEXT
p22_default_value=
p22_description=The maximum number of spots with non-zero counts for a gene to be retained.
p22_fileFormat=
p22_flag=
p22_name=gene.max.spots
p22_numValues=0..1
p22_optional=on
p22_prefix=
p22_prefix_when_specified=-w
p22_type=java.lang.Integer
p22_value=
p23_MODE=
p23_TYPE=TEXT
p23_default_value=0
p23_description=The minimum percentage of spots with non-zero counts for a gene to be retained.
p23_fileFormat=
p23_flag=
p23_name=gene.min.percent
p23_numValues=0..1
p23_optional=on
p23_prefix=
p23_prefix_when_specified=-x
p23_type=java.lang.Integer
p23_value=
p24_MODE=
p24_TYPE=TEXT
p24_default_value=
p24_description=The maximum percentage of spots with non-zero counts for a gene to be retained.
p24_fileFormat=
p24_flag=
p24_name=gene.max.percent
p24_numValues=0..1
p24_optional=on
p24_prefix=
p24_prefix_when_specified=-y
p24_type=java.lang.Integer
p24_value=
p25_MODE=
p25_TYPE=TEXT
p25_default_value=
p25_description=Samples (as in names(x@counts)) to perform filtering.
p25_fileFormat=
p25_flag=
p25_name=filter.samples
p25_numValues=0..1
p25_optional=on
p25_prefix=
p25_prefix_when_specified=-z
p25_type=java.lang.String
p25_value=
p26_MODE=
p26_TYPE=TEXT
p26_default_value=
p26_description=Sample (as in names(x@counts)) to remove from STlist. Removes samples in x@counts, x@tr_counts, x@spatial_meta, x@gene_meta, and x@sample_meta.
p26_fileFormat=
p26_flag=
p26_name=rm.tissue
p26_numValues=0..1
p26_optional=on
p26_prefix=
p26_prefix_when_specified=-A
p26_type=java.lang.String
p26_value=
p27_MODE=
p27_TYPE=TEXT
p27_default_value=
p27_description=Vector of spot/cell IDs to remove. Removes spots/cells in x@counts, x@tr_counts, and x@spatial_meta.
p27_fileFormat=
p27_flag=
p27_name=rm.spots
p27_numValues=0..1
p27_optional=on
p27_prefix=
p27_prefix_when_specified=-B
p27_type=java.lang.String
p27_value=
p28_MODE=
p28_TYPE=TEXT
p28_default_value=
p28_description=Vector of gene names to remove from STlist. Removes genes in x@counts, x@tr_counts, and x@gene_meta
p28_fileFormat=
p28_flag=
p28_name=rm.genes
p28_numValues=0..1
p28_optional=on
p28_prefix=
p28_prefix_when_specified=-C
p28_type=java.lang.String
p28_value=
p29_MODE=
p29_TYPE=TEXT
p29_default_value=
p29_description=A regular expression that matches genes to remove. Removes genes in x@counts, x@tr_counts, and x@gene_meta.
p29_fileFormat=
p29_flag=
p29_name=rm.genes.regex
p29_numValues=0..1
p29_optional=on
p29_prefix=
p29_prefix_when_specified=-D
p29_type=java.lang.String
p29_value=
p30_MODE=
p30_TYPE=TEXT
p30_default_value=
p30_description=a expression to use with spot_minpct and spot_maxpct. By default '^MT-'.
p30_fileFormat=
p30_flag=
p30_name=spot.percentage.genes.regex
p30_numValues=0..1
p30_optional=on
p30_prefix=
p30_prefix_when_specified=-E
p30_type=java.lang.Integer
p30_value=
p31_MODE=IN
p31_TYPE=FILE
p31_default_value=
p31_description=Metadata associated with each sample in a csv file. The sample names are in the first column, and they must match the names of the folders containing the data.
p31_fileFormat=tsv;csv;txt
p31_flag=
p31_name=input.clinical.data
p31_numValues=0..1
p31_optional=
p31_prefix=
p31_prefix_when_specified=-b
p31_type=java.io.File
p31_value=
p32_MODE=
p32_TYPE=TEXT
p32_default_value=True
p32_description=Filter data, T/F
p32_fileFormat=
p32_flag=
p32_name=filter.data
p32_numValues=0..1
p32_optional=
p32_prefix=
p32_prefix_when_specified=-G
p32_type=java.lang.String
p32_value=True;False
privacy=public
quality=production
taskDoc=
taskType=spatial transcriptomics
version=Strip .tar from output filenames if its part of the input filename. Stop putting .rds on the rds file twice.
job.memory=8Gb