-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbasic_plots.R
103 lines (85 loc) · 4.02 KB
/
basic_plots.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Katia Lopes
# August 02, 2019
# Basic plots for RNASeq data
# The data used here was downloaded from Embl Expression Atlas: https://www.ebi.ac.uk/gxa/home
# We are using TPM, but you should consider another normalizations and correction by effects.
# For demonstration purpose only!
#This command clean all variables. BE CAREFULL!!!
rm(list = setdiff(ls(), lsf.str()))
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
if(!require("RColorBrewer")) install.packages("RColorBrewer"); library("RColorBrewer")
if(!require("gplots")) install.packages("gplots"); library("gplots")
if(!require("Hmisc")) install.packages("Hmisc"); library("Hmisc")
if(!require("amap")) BiocManager::install("amap"); library("amap")
if(!require("ggfortify")) BiocManager::install("ggfortify"); library("ggfortify")
if(!require("tidyverse")) install.packages("tidyverse"); library("tidyverse")
if(!require("factoextra")) install.packages("factoextra"); library("factoextra")
work_plots = "/Users/katia/Desktop/kt/rnaseq/"
# Input data
tpm_data = as.matrix(read.table("/Users/katia/Desktop/kt/rnaseq/fantom5_2016_tpm.txt", header = TRUE, row.names = 1, check.names = FALSE))
dim(tpm_data)
#####################################
# Trees to check outliers samples
#####################################
tpm_data_t = t(tpm_data)
#Euclidean distance with the transposed matrix
sampleTree = hclust(dist(tpm_data_t), method = "average")
#png(paste0(work_plots, "tree_euclidean.png"), width = 20, height = 10, res = 300, units = "in")
plot(sampleTree, main = "Sample clustering to detecting outliers",
sub = "Function: hclust, Method: average from Euclidean distance", xlab = "samples",
cex.lab = 1.5, cex.axis = 1.5, cex.main = 1.5, cex.sub = 1.5)
#dev.off()
#Spearman correlation
#png(paste0(work_plots, "tree_Spearman.png"), width = 20, height = 10, res = 300, units = "in")
plot(hcluster(tpm_data_t, method= "spearman", link="average" ),
cex.lab = 1.5, cex.axis = 1.5, cex.main = 2, main= "Hierarchical Clustering",
sub = "Function: hcluster, Method: Spearman", xlab = "samples")
#dev.off()
#####################################
# PCA
#####################################
res.pca = prcomp(tpm_data_t)
#fviz_eig(res.pca)
fviz_pca_ind(res.pca,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = F # Avoid text overlapping
)
#####################################
# MDS
#####################################
d <- dist(tpm_data_t)
mds <- cmdscale(d)
plot(mds[,1],mds[,2],pch=21,xlab="First dimension",ylab="Second dimension")
title("MDS - Euclidean Distance")
#####################################
# Boxplots
#####################################
boxplot(log2(tpm_data[, 1:56] +1), col=rainbow(56), main="TPM", ylab="Log2(TPM+1)",las=2,cex.axis=0.8)
#####################################
# Heatmaps
#####################################
#Spearman
cormatrix = rcorr(as.matrix(tpm_data), type='spearman')
corrdata = as.matrix(cormatrix$r)
heatmap.2(corrdata, main="TPM - Spearman",trace="none", col = c(sort(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")), margins = c(8,8))
#Pearson
cormatrix = rcorr(as.matrix(tpm_data), type='pearson')
corrdata = as.matrix(cormatrix$r)
heatmap.2(corrdata, main="TPM - Pearson", trace="none", col = c(sort(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")), margins = c(8,8))
#####################################
# Density plot
#####################################
ltpm = log2(tpm_data)
nsamples <- ncol(tpm_data)
samplenames <- colnames(tpm_data)
colfunc <- colorRampPalette(c("#4DBBD5FF", "#3C5488FF")) # You can specify other colors
col = alpha(colfunc(nsamples), alpha = 0.1) # To put all samples in light blue
plot(density(ltpm[,1]), col=col[1], lwd=2, ylim=c(0,0.26), las=2, main="", xlab="")
title(main="TPM ", xlab="Log2(tpm)")
for (i in 2:nsamples){
den <- density(ltpm[,i])
lines(den$x, den$y, col=col[i], lwd=2)
}
#legend("right", samplenames, text.col=col, bty="n") # It's not a good idea when you have 56 samples!