-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path0--ConfigurePipeline.bash
executable file
·200 lines (172 loc) · 4.47 KB
/
0--ConfigurePipeline.bash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/bin/bash
## Default parameters ##
output="pipeline.conf"
fastqsuffix="_R1.fastq.gz"
ref="hg19"
emailtype="END"
########################
## Functions ######
get_response() {
echo -n $1
read response
if [ -n "$response" ]; then
PARAM=$response
fi
}
check_file() {
if [ ! -f $1 ] ; then
echo "File does not exist."
exit 1
# else
# echo "Using file: [$1]"
fi
}
check_output() {
if [ -f $1 ]; then
echo -n "Output file exists. Overwrite? [y/n] > "
read response
if [ "$response" != "y" ]; then
echo "Exiting program."
exit 1
# else
# echo "Overwriting file: [$output]"
fi
fi
}
check_directory() {
if [ ! -d $1 ] ; then
echo "Directory does not exist."
echo "Creating directory."
mkdir -p $1
elif [ "$1" == "" ] ; then
echo "Please provide a directory."
echo "Exiting program."
exit 1
# else
# echo "Using existing directory: [$1]"
fi
}
###################
unset PARAM
get_response "Enter the full path to store your custom pipeline scripts [no default] > "
if [ -n "$PARAM" ]; then
scriptsdir=$PARAM
fi
check_directory $scriptsdir
unset PARAM
get_response "Enter name of output configuration file [$output] > "
if [ -n "$PARAM" ]; then
output=$PARAM
fi
check_output $output
unset PARAM
get_response "Enter the full path of the raw data directory [no default] > "
if [ -n "$PARAM" ]; then
datadir=$PARAM
fi
check_directory $datadir
unset PARAM
get_response "Enter the full path of the directory for pipeline results [no default] > "
if [ -n "$PARAM" ]; then
resdir=$PARAM
fi
check_directory $resdir
unset PARAM
get_response "Enter name of raw Fastq.gz suffix (paired-end expected) [$fastqsuffix] > " # ?? Very strange behaviour when a file named 'a' is present in the same directory as this script...???
if [ -n "$PARAM" ]; then
fastqsuffix=$PARAM
fi
echo $fastqsuffix
# echo "Using suffix: [$fastqsuffix]"
unset PARAM
get_response "Enter name of reference assembly [$ref] > "
if [ -n "$PARAM" ]; then
ref=$PARAM
fi
if [ "$ref" != "hg19" ]; then
echo "Only hg19 is implemented yet. For other assemblies, you should change the pipeline configuration file by hand and at your own risk."
fi
# echo "Using suffix: [$ref]"
unset PARAM
get_response "Please define analysis mode: (1)GENOME or (2)EXOME [2] > "
if [ -n "$PARAM" ]; then
mode=$PARAM
fi
if [ "$mode" == "2" ] ; then
unset PARAM
get_response "Enter full path of target file [no default] > "
if [ -n "$PARAM" ]; then
targets=$PARAM
fi
check_file $targets
unset PARAM
get_response "Enter full path of baits (Picard) file [no default] > "
if [ -n "$PARAM" ]; then
baitsPicard=$PARAM
fi
check_file $baitsPicard
unset PARAM
get_response "Enter full path of target (Picard) file [no default] > "
if [ -n "$PARAM" ]; then
targetsPicard=$PARAM
fi
check_file $targetsPicard
fi
unset PARAM
get_response "Email address for SLURM [no default] > "
if [ -n "$PARAM" ]; then
email=$PARAM
fi
unset PARAM
get_response "Email type for SLURM [$emailtype] > "
if [ -n "$PARAM" ]; then
emailtype=$PARAM
fi
unset PARAM
get_response "Log directory for SLURM [no default] > "
if [ -n "$PARAM" ]; then
slurmlogdir=$PARAM
fi
check_directory $slurmlogdir
echo "Estimating array value for SLURM ..."
slurmarray="1-"`\ls $datadir/*R1* | wc -l`
echo "Done."
unset PARAM
get_response "(optional) No Phone Home (GATK) key [no default] > "
if [ -n "$PARAM" ]; then
noET=$PARAM
fi
check_file $noET
# # Read Groups should follow:
# # They are assumed to have been run on a hiseq
## Preparing output ##
str="ScriptsDir\t$scriptsdir\n"
str+="RawDataDir\t$datadir\n"
str+="ResultsDir\t$resdir\n"
str+="FastqGzSuffixPE\t$fastqsuffix\n"
str+="ReferenceAssembly\t$ref\n"
if [ "$mode" == "2" ]; then # EXOME mode
str+="AnalysisMode\tEXOME\n"
str+="TargetFile\t$targets\n"
str+="BaitsFilePicard\t$baitsPicard\n"
str+="TargetFilePicard\t$targetsPicard\n"
else
str+="AnalysisMode\tGENOME\n"
fi
str+="SLURMemailaddress\t$email\n"
str+="SLURMemailtype\t$emailtype\n"
str+="SLURMlog\t$slurmlogdir\n"
str+="SLURMarray\t$slurmarray\n"
if [ -n "$noET" ]; then
str+="noPhoneHome\t$noET\n"
fi
echo ""
echo "######### Pipeline configuration ########"
echo -en $str
echo "#########################################"
echo ""
echo -en $str > $output
######################
echo "This pipeline configuration has been written to: '$output'"
echo "WARNING: currently, Read Groups have to be modified manually"
exit 1