-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmcmcotu_bat_bacteria.R
146 lines (115 loc) · 5.61 KB
/
mcmcotu_bat_bacteria.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
library(MCMC.OTU)
#Data needs to look like this:
#Rowname Data Data sample otu otu
#SampleID data data SampleID otu otu
#Loads your own data, use functions read.table or read.csv or read.delim
setwd("~/Desktop/bat_microbiota/")
func_level1 <- read.delim("~/Desktop/bat_fungal/bat_fungal_mcmcotu.txt",sep="\t",row.names=1)
str(func_level1)
head(func_level1)
# removing low-count samples and OTUs (?purgeOutliers to see how to adjust cutoffs)
# also adjust otu.columns value for your dataset
func_level1=purgeOutliers(func_level1,count.columns=6:39,otu.cut=0.001)
# this is how the format of the data table should look
# note the 'sample' column giving a unique name for each sample - this one is required
# (row names are not required)
head(func_level1)
# what is the proportion of samples with data for these OTUs?
apply(func_level1[,5:length(func_level1[1,])],2,function(x){sum(x>0)/length(x)})
# what percentage of global total counts each OTU represents?
apply(func_level1[,6:length(func_level1[1,])],2,function(x){sum(x)/sum(func_level1[,6:length(func_level1[1,])])})
# stacking the data; adjust otu.columns and condition.columns values for your data
gss=otuStack(func_level1,count.columns=c(6:length(func_level1[1,])),condition.columns=c(1:5))
head(gss)
# fitting the model. Replace the formula specified in 'fixed' with yours, add random effects if present.
# See ?mcmc.otu for these and other options.
mm=mcmc.otu(
fixed="REGION",
data=gss,
nitt=55000,thin=50,burnin=15000 # a long MCMC chain to improve modeling of rare OTUs
)
summary(mm)
# selecting the OTUs that were modeled reliably
# (OTUs that are too rare for confident parameter estimates are discarded)
acpass=otuByAutocorr(mm,gss)
head(acpass)
# calculating differences and p-values between all pairs of factor combinations
smm0=OTUsummary(mm,gss,otus=acpass,summ.plot=TRUE)
# adjusting p-values for multiple comparisons:
#?p.adjust for different methods
smmA=padjustOTU(smm0, method="BH")
# significant OTUs at FDR<0.05:
sigs=signifOTU(smmA,p.cutoff=0.05)
sigs
# plotting the significant ones
smm1=OTUsummary(mm,gss,otus=sigs)
# now plotting them by species
smm1=OTUsummary(mm,gss,otus=sigs,xgroup="color")
# table of log10-fold changes and p-values: this one goes into supplementary info in the paper
smmA$otuWise[sigs]
write.table(smmA$otuWise[sigs],file = "log10_fold_pvalues_by_place.csv",sep=",")
summary(mm)
#eff.samp values much smaller than the nominal sample size indicate poor mixing
#post.mean = posterier mean
#Intercept = intercept of the post
autocorr(mm$VCV)
#Ideally, all samples of the posterior distribution should be independent,
#and the autocorrelation for all lag values greater than zero should be near zero
#If not near zero you may want to run the model longer
posterior.mode(mm$VCV)
#Obtain estimates of the additive phylogenetic and residual variance
#by calculating the modes of the posterior distributions
#See proportion!
posterior.mode(mm$Sol)
#Most likely value is where the posterior distribution peaks
#############################################
# Principal coordinate analysis
library(vegan)
library(MCMC.OTU)
# purging under-sequenced samples and OTUs represented in less than 10% of all samples
func_level1=purgeOutliers(func_level1,count.columns=3:56,zero.cut=0.1,otu.cut=0)
# creating a log-transfromed normalized dataset for PCoA:
func_level1.log=logLin(data=func_level1,count.columns=4:length(names(func_level1)))
# computing Bray-Curtis distances and performing PCoA:
func_level1.dist=vegdist(func_level1.log,method="bray")
func_level1.pcoa=pcoa(func_level1.dist)
# plotting by bank:
scores=func_level1.pcoa$vectors
conditions=func_level1[,1:3]
head(conditions)
margin=0.01
plot(scores[,1], scores[,2],type="n",
xlim=c(min(scores[,1])-margin,max(scores[,1])+margin),
ylim=c(min(scores[,2])-margin,max(scores[,2])+margin),
mgp=c(2.3,1,0),
xlab=paste("Axis1 (",round(func_level1.pcoa$values$Relative_eig[1]*100,1),"%)",sep=""),
ylab=paste("Axis2 (",round(func_level1.pcoa$values$Relative_eig[2]*100,1),"%)",sep=""),
main="PCoA colored by bank")
points(scores[conditions$type=="Lavatubes",1],scores[conditions$bank=="Lavatubes",2])
points(scores[conditions$type=="Surface Soil",1],scores[conditions$bank=="Surface Soil",2],pch=19)
legend("bottomright", c("East","West"), pch=c(1, 19), cex=0.8)
# plotting by species:
margin=0.01
conditions=func_level1[,1:3]
plot(scores[,1], scores[,2],type="n",
xlim=c(min(scores[,1])-margin,max(scores[,1])+margin),
ylim=c(min(scores[,2])-margin,max(scores[,2])+margin),
mgp=c(2.3,1,0),
xlab=paste("Axis1 (",round(func_level1.pcoa$values$Relative_eig[1]*100,1),"%)",sep=""),
ylab=paste("Axis2 (",round(func_level1.pcoa$values$Relative_eig[2]*100,1),"%)",sep=""),
main="PCoA colored by species")
points(scores[conditions$species=="franksi",1],scores[conditions$species=="franksi",2])
points(scores[conditions$species=="faveolata",1],scores[conditions$species=="faveolata",2],pch=19)
legend("bottomright", c("franksi","faveolata"), pch=c(1, 19), cex=0.8)
#############################################
# exploring correlations between OTUs
library(MCMC.OTU)
#data(green.data)
#func_level1=purgeOutliers(green.data,count.columns=4:156,otu.cut=0.001,zero.cut=0.2)
# creating a log-transformed normalized dataset ignoring zero counts:
nl=startedLog(data=func_level1,count.columns=4:length(names(func_level1)),logstart=0)
names(nl)=c("I","II","III","IV","V","VI","VII")
# displaying a matrix of scatterplots and p-values of OTU correlations
# (onlu p-values better than 0.1 are displayed)
pairs(nl,lower.panel=panel.smooth,upper.panel=panel.cor.pval)
#----------------------------------------