generated from kevinrue/snakemake_cellranger_count
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathSnakefile
99 lines (84 loc) · 3.66 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import os
import warnings
# -- Snakefile basic configuration -- #
report: "report/workflow.rst"
# Include rule files
include: "rules/common.smk"
# Variable declaration
OUTDIR = config["out"]
LOGDIR = config["log"]
# -- Read samples file -- #
try:
samples = pd.read_csv(config["samples"], sep="\t", comment="#").set_index("sample", drop=False)
validate(samples, schema="schemas/samples.schema.yaml")
except FileNotFoundError:
warning(f"ERROR: the samples file ({config['samples']}) does not exist. Please see the README file for details. Quitting now.")
sys.exit(1)
# -- Read units file -- #
try:
units = pd.read_csv(config["units"], dtype=str, sep="\t", comment="#").set_index(["sample"], drop=False)
validate(units, schema="schemas/units.schema.yaml")
except FileNotFoundError:
warning(f"ERROR: the units file ({config['units']}) does not exist. Please see the README file for details. Quitting now.")
sys.exit(1)
# -- Auxiliary functions -- #
def get_resource(rule,resource):
try:
return config["resources"][rule][resource]
except KeyError:
return config["resources"]["default"][resource]
def get_common_peaks(wc):
file = expand("{OUTDIR}/{sample}/cellranger_count/", sample = samples['sample'], OUTDIR=OUTDIR)
file = list(set(file))
return file
def get_cellranger_finish(wc):
file = expand("{OUTDIR}/{sample}/cellranger_count/cellranger.finish", sample = samples['sample'], OUTDIR=OUTDIR)
file = list(set(file))
return file
def get_integration_mgatk(wc):
file = expand("{OUTDIR}/{sample}/mgatk/final/", sample = samples['sample'], OUTDIR=OUTDIR)
file = list(set(file))
return file
def get_integration_amulet(wc):
file = expand("{OUTDIR}/{sample}/amulet/MultipletBarcodes_01.txt", alias = samples['sample'],OUTDIR=OUTDIR)
file = list(set(file))
return file
# Step 3 - Grouping samples by condition
condition_to_samples = samples.groupby('condition')['sample'].apply(list).to_dict()
def get_step4_input(wc):
file = expand("{OUTDIR}/integration/SeuratObjectBis_{condition}.rds", OUTDIR=OUTDIR, condition=samples['condition'])
file = list(set(file))
return file
# Create a rule that decides whether an integrated or merged object could be
# the output. For instance: if harmony = TRUE and exists a merged.rds file;
# then the output is mergedintegration.rds
# -- Final output -- #
def signac_output(wc):
if config["signac"]["enable"]:
if config["signac"]["integrate_samples"]:
file = expand("{OUTDIR}/integration/SeuratObjectBis_{condition}.rds", condition=samples['condition'],OUTDIR=OUTDIR)
file += expand("{OUTDIR}/integration/SeuratObject_Merge.rds", OUTDIR = OUTDIR)
if config["signac"]["individual_analysis"]:
file += expand("{OUTDIR}/{sample}/signac/01_preprocessing_{sample}.html", sample=samples['sample'],OUTDIR=OUTDIR)
else:
file = []
warnings.warn("Signac analysis disabled. Skipping...", category=UserWarning)
file = list(set(file))
return file
rule all:
input:
expand([ #"{OUTDIR}/{sample}/qc/multiqc_report.html",
"{OUTDIR}/{sample}/cellranger_count/cellranger.finish",
"{OUTDIR}/{sample}/mgatk/del/mgatkdel_find.clip.tsv",
"{OUTDIR}/{sample}/mgatk/final/{sample}.rds",
"{OUTDIR}/{sample}/amulet/MultipletSummary.txt"
], sample=samples['sample'], OUTDIR=OUTDIR, condition = samples['condition']),
signac_output
# -- Rule files -- #
#include: "rules/qc.smk"
include: "rules/cellranger.smk"
include: "rules/mgatk.smk"
include: "rules/amulet.smk"
include: "rules/signac.smk"
include: "rules/other.smk"