-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathsp_scatterplotColor.sh
executable file
·276 lines (235 loc) · 6.12 KB
/
sp_scatterplotColor.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#!/bin/bash
#set -x
usage()
{
cat <<EOF
${txtcyn}
***CREATED BY Chen Tong ([email protected])***
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This script is used to do scatter plot and color them by the third
column data using ggplot2.
It is designed for representing the expression data which
may be affected by multiple factors(here for two).
The parameters for logical variable are either TRUE or FALSE.
Input file:
Gene hmC Kme Expr Size
1_NM_001001130_23818 0.342364 0.387972 0.562945535966746 expr3
2_NM_001001144_16662 1.09501 0.927882 10.6244189482162 expr7
3_NM_001001152_23797 0.14429 0.375741 0 unexpr
4_NM_001001160_10503 0.991374 1.07919 0.0878474532737287 expr1
5_NM_001001176_17970 0.184586 0.202106 0.9731593253037 expr3
6_NM_001001177_28078 0.351389 0.411244 0 unexpr
7_NM_001001178_1650 0.328352 0.295332 0.0490132479669711 expr1
8_NM_001001179_10881 0.693106 0.55201 0 unexpr
9_NM_001001180_13669 0.533143 0.682877 3.73548640439016 expr5
**********************A potential bug******************************
If -c column have only 1 value, program will be aborted by no reasons.
${txtbld}OPTIONS${txtrst}:
-f Data file (with header line, the first column is the
colname, tab seperated)${bldred}[NECESSARY]${txtrst}
-t Title of picture[${txtred}Default empty title${txtrst}]
[Scatter plot of horizontal and vertical variable]
-x xlab of picture[${txtred}Default empty xlab${txtrst}]
[The description for horizontal variable]
-y ylab of picture[${txtred}Default empty ylab${txtrst}]
[The description for vertical variable]
-l The legend for color scale.[${txtred}Default the
variable for color value${txtrst}]
-P Legend position[${txtred}Default right. Accept
top, bottom, left, none, or c(0.08, 0.8).${txtrst}]
-o The variable for horizontal axis.${bldred}[NECESSARY, such hmC]${txtrst}
-v The variable for vertical axis.${bldred}[NECESSARY, such as Kme]${txtrst}
-c The variable for color value.${bldred}[Optional, such as Expr]${txtrst}
-S The variable for shape.${bldred}[Optional, such as Size]${txtrst}
-g Log transfer[${bldred}Default none, accept log, log2${txtrst}].
-w The width of output picture.[${txtred}Default 20${txtrst}]
-a The height of output picture.[${txtred}Default 20${txtrst}]
-E The type of output figures.[${txtred}Default png, accept
eps/ps, tex (pictex), pdf, jpeg, tiff, bmp, svg and wmf)${txtrst}]
-r The resolution of output picture.[${txtred}Default 300 ppi${txtrst}]
-b The formula for facets.[${bldred}Default no facets,
+facet_grid(level ~ .) means divide by levels of 'level' vertcally.
+facet_grid(. ~ level) means divide by levels of 'level' horizontally.
+facet_grid(lev1 ~ lev2) means divide by lev1 vertically and lev2
horizontally.
+facet_wrap(~level, ncol=2) means wrap horizontally with 2
columns.
Example: +facet_wrap(~Size,ncol=6,scale='free')
${txtrst}]
-d If facet is given, you may want to specifize the order of
variable in your facet, default alphabetically.
[${txtred}Accept sth like
(one level one sentence, separate by';')
data\$size <- factor(data\$size, levels=c("l1",
"l2",...,"l10"), ordered=T) ${txtrst}]
-s smoothed fit curve with confidence region or not.
[${bldred}Default loss smooth, one can give 'lm' to
get linear smooth. FALSE for no smooth.${txtrst}]
-z Other parameters in ggplot format.[${bldred}selection${txtrst}]
-e Execute or not[${bldred}Default TRUE${txtrst}]
-i Install the required packages[${bldred}Default FALSE${txtrst}]
EOF
}
file=''
title=''
xlab=''
ylab=''
xval=''
yval=''
execute='TRUE'
ist='FALSE'
color=''
col_legend=''
log=''
width=20
height=20
res=300
ext='png'
facet=''
smooth='geom_smooth'
other=''
facet_o=''
legend_pos='right'
while getopts "hf:t:x:y:o:P:v:c:l:g:w:a:r:E:s:b:d:z:e:i:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
t)
title=$OPTARG
;;
x)
xlab=$OPTARG
;;
y)
ylab=$OPTARG
;;
P)
legend_pos=$OPTARG
;;
o)
xval=$OPTARG
;;
v)
yval=$OPTARG
;;
c)
color=$OPTARG
;;
l)
col_legend=$OPTARG
;;
g)
log=$OPTARG
;;
w)
width=$OPTARG
;;
a)
height=$OPTARG
;;
r)
res=$OPTARG
;;
E)
ext=$OPTARG
;;
b)
facet=$OPTARG
;;
d)
facet_o=$OPTARG
;;
s)
smooth=$OPTARG
;;
z)
other=$OPTARG
;;
e)
execute=$OPTARG
;;
i)
ist=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
mid=".scatterplot.color"
if [ -z $file ] || [ -z $xval ] || [ -z $yval ] || [ -z $color ]; then
echo 1>&2 "Please give filename, xval and yval."
usage
exit 1
fi
if [ -z $col_legend ]; then
col_legend="$color"
fi
if [ ! -z $log ]; then
log=", trans=\"${log}\""
fi
cat <<END >${file}${mid}.r
if ($ist){
install.packages("ggplot2", repo="http://cran.us.r-project.org")
}
library(ggplot2)
library(grid)
data <- read.table(file="$file", sep="\t", header=T, row.names=1, quote="")
#if ("$width" != "" && "$height" != "" && "$res" != ""){
# png(filename="${file}${mid}.png", width=$width, height=$height,
# res=$res)
#}else{
# png(filename="${file}${mid}.png")
#}
$facet_o
p <- ggplot(data, aes(x=${xval},y=${yval})) \
+ geom_point(aes(color=${color})) \
+ scale_colour_gradient(low="green", high="red",
name="$col_legend" ${log}) \
+ labs(x="$xlab", y="$ylab") + labs(title="$title")
#if ("$facet" != ""){
# facet=$facet
p <- p ${facet}
#}
if ("$smooth" == "geom_smooth"){
p <- p + geom_smooth()
} else
if ("$smooth" == 'lm'){
p <- p + geom_smooth(method=lm)
}
#if ("$other" != ''){
#other=$other
p <- p $other
#}
p <- p + theme_bw() + theme(legend.title=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
top='top'
bottom='bottom'
left='left'
right='right'
none='none'
legend_pos_par <- ${legend_pos}
p <- p + theme(legend.position=legend_pos_par)
ggsave(p, filename="${file}${mid}.${ext}", dpi=$res, width=${width},
height=${height}, units=c("cm"))
#p
#dev.off()
#+ geom_point(alpha=1/10)
END
if [ "$execute" == "TRUE" ]; then
Rscript ${file}${mid}.r
if [ "$?" == "0" ]; then /bin/rm -f ${file}${mid}.r; fi
fi
if [ ! -z "$log" ]; then
log=', trans=\"'$log'\"'
fi
#convert -density 200 -flatten ${file}${mid}.eps ${first}${mid}.png