-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile.RNA
49 lines (43 loc) · 2.16 KB
/
Snakefile.RNA
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from os.path import join
import os
import re
import numpy as np
import pandas as pd
from snakemake.utils import validate
validate(config, "conf/snareR.config.yaml")
CURRENT_DIR = os.getcwd()
# read parameters from config file
FASTQ_DIR = config["fastq_dir"]
link_table = pd.read_table(config["link_table"], index_col=0)
ASSAY = config["assay_type"]
SAMPLES, = glob_wildcards(join(FASTQ_DIR, "{sample}_R1.fastq.gz"))
# Patterns for the 1st mate and the 2nd mate using the 'sample' wildcard.
PATTERN_R1 = '{sample}_R1.fastq.gz'
PATTERN_R2 = '{sample}_R2.fastq.gz'
# get sample name
SAMPLE_NAME = np.unique(np.array([re.sub(r'_N.*', '', sample_name) for sample_name in SAMPLES])).tolist()
#SAMPLE_NAME = np.unique(np.array([re.sub(r'\..*', '', sample_name) for sample_name in SAMPLES])).tolist()
TISSUE = link_table.loc[link_table['Experiment_ID_Short'].isin(SAMPLE_NAME), 'Tissue']
TISSUE = np.unique(np.array(TISSUE)).tolist()
# functions
def get_sample_wildcards(wildcards):
return expand('tmp/QCs/{sample}.qc.txt', sample=SAMPLES)
rule all:
input:
expand(os.path.join(FASTQ_DIR, '{sample}_R1_filtered.fastq.gz'), sample=SAMPLES),
expand('tmp/FastQC/{sample}_R1_filtered_fastqc.zip', sample=SAMPLES),
expand('tmp/dropest_out/{sample}.mtx', sample=SAMPLES),
expand('tmp/dropest_out/{sample}.genes.tsv', sample=SAMPLES),
expand('tmp/dropest_out/{sample}.rds', sample=SAMPLES),
expand('tmp/seurat_obj/{sample}.seurat.filtered.RData', sample=SAMPLES),
expand('tmp/pagoda_RData/{sample}.filtered.RData', sample=SAMPLES),
expand('tmp/reports/{sample}_dropEst_QC.html', sample=SAMPLES),
expand(join(ASSAY, "{tissue}/samples/{sample_name}/Sample_output/obj/{sample_name}.sample_matrix.rds"), tissue=TISSUE, sample_name=SAMPLE_NAME),
sample_QCs = expand(join(ASSAY, "{tissue}/samples/{sample_name}/Sample_output/QCs/{sample_name}.qc.txt"), tissue=TISSUE, sample_name=SAMPLE_NAME)
include: "rules/remove_contaminate.smk"
include: "rules/RNA_fastqc.smk"
include: "rules/dropest_alignment.smk"
include: "rules/merge_dT_N6_barcode.smk"
include: "rules/drop_report.smk"
include: "rules/RNA_QC.smk"
include: "rules/RNA_sample_level.smk"