-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path06_PGLS-models.Rmd
137 lines (116 loc) · 8.62 KB
/
06_PGLS-models.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
editor_options:
chunk_output_type: console
---
# Phylogenetic generalized least squares regressions
Here, we used phylogenetic generalized least squares regression to test if thermal regime, dispersal ability, and diet significantly drive Himalayan bird elevational shift.
## Remove non-Himalayan species
```{r}
# first we remove species that are non-Himalayan
rem <-read.csv("data/SpeciesList_nonhimalayan.csv.csv")
```
## PGLS models for the Eastern Himalayas
```{r}
for (ds in c("05.q1.sam30", "05.q1.sam60", "05.q2.sam30","05.q2.sam60", "05.q3.sam30","05.q3.sam60", "95.q1.sam30","95.q1.sam60", "95.q2.sam30","95.q2.sam60" ,"95.q3.sam30","95.q3.sam60" ,"50.q1.sam30","50.q1.sam60" ,"50.q2.sam30","50.q2.sam60" ,"50.q3.sam30", "50.q3.sam60")) {
x<-read.csv(paste("results/final_birdlist_east_", ds, ".csv", sep = ""))
x<-x[!(x$Species %in% rem$Species),]
write.csv(x,file = paste("results/birdlist_east",ds,".csv", sep = ""), row.names = F)
# birdlist without non-himalayan species
}
# load necessary data for running statistical models
east_100 <-read.csv("results/eastHim_100.csv") #temperature data
east_100_S <- east_100 %>% select(-contains("Jan")) %>% rename(elev_roundS = elev_round) #winter
east_100_W <- east_100 %>% select(-contains("June"))%>% rename(elev_roundW = elev_round) #summer
jetzname <-read.csv("data/for_PGLS_list3.csv") #taxonomy used by birdtree.org
sheard_trait <- read.csv("data/2020-sheard et al-species-trait-dat - speciesdata.csv") %>% #trait data set
select(HWI,Diet,Tree.name)
tree <-read.nexus("data/birdtree.nex") #nex file from birdtree.org (hackett all species)
tree <- mcc(tree) ## maximum clade credibility tree
# create a single dataframe of summaries of all model combinations
compile_eastmodels<-setNames(data.frame(matrix(ncol = 6, nrow = 0)), c("Variable", "Value", "Std.Error", "t-value", "p-value", "ds"))
for (ds in c("05.q1.sam30", "05.q1.sam60", "05.q2.sam30","05.q2.sam60", "05.q3.sam30","05.q3.sam60", "95.q1.sam30","95.q1.sam60", "95.q2.sam30","95.q2.sam60" ,"95.q3.sam30","95.q3.sam60" ,"50.q1.sam30","50.q1.sam60" ,"50.q2.sam30","50.q2.sam60" ,"50.q3.sam30", "50.q3.sam60")) {
x<-read.csv(paste("results/birdlist_east", ds, ".csv", sep = ""))
x$elev_roundS<-round(x$Median_S,-2)
x$elev_roundW<-round(x$Median_W,-2)
x<-left_join(x,east_100_S, by = "elev_roundS") #aligning temperature data to elevational range data
x<-left_join(x,east_100_W, by = "elev_roundW")
x$ThermalRegime<-x$maxTemp_June_mean-x$minTemp_Jan_mean #calculating species thermal regimes
x<-left_join(x,jetzname, by = "Species") #adding jetz taxonomy
x<-left_join(x, sheard_trait, by = "Tree.name")%>% mutate(Diet = fct_recode(Diet, "omnivore" = "nectar", "omnivore" = "scav", "seeds"="plants")) #adding trait info
pruned.tree<-drop.tip(tree1,tree1$tip.label[-match(x$Tree.name, tree1$tip.label)]) #setting up the PGLS model
glsdata<-x[match(pruned.tree$tip.label, x$Tree.name),] #setting up the PGLS model
pgls2<-gls(Median_diff~ThermalRegime+HWI+relevel(Diet, ref = "invertebrates"),correlation=corPagel(1,pruned.tree),data=glsdata) #setting up the PGLS model
k<-summary(pgls2)
outputk<-as.data.frame(k$tTable)
outputk$dataset<-ds
outputk$Variable<-rownames(outputk)
compile_eastmodels<-rbind(compile_eastmodels,outputk)
write.csv(glsdata, file = paste("results/east_ordered",ds,".csv", sep = ""), row.names = F) # species ordered by taxonomy
write.csv(compile_eastmodels, "results/compile_eastmodels.csv", row.names = F)
}
## calculating adjusted p values using the Benjamini Hochberg correction for false discovery rates
east_p<-read.csv("results/compile_eastmodels.csv")
east_p$adjP<-p.adjust(east_p$p.value, method = "fdr")
write.csv(east_p, "results/compile_eastmodels_padj.csv")
```
## PGLS models for the Western Himalayas
```{r}
for (ds in c("05.q1.sam30", "05.q1.sam60", "05.q2.sam30","05.q2.sam60", "05.q3.sam30","05.q3.sam60", "95.q1.sam30","95.q1.sam60", "95.q2.sam30","95.q2.sam60" ,"95.q3.sam30","95.q3.sam60" ,"50.q1.sam30","50.q1.sam60" ,"50.q2.sam30","50.q2.sam60" ,"50.q3.sam30", "50.q3.sam60")) {
x<-read.csv(paste("results/final_birdlist_west", ds, ".csv", sep = ""))
x<-x[!(x$Species %in% rem$Species),]
write.csv(x,file = paste("results/birdlist_west",ds,".csv", sep = ""), row.names = F) ## final list after removing non himalayan species
}
west_100<-read.csv("results/westHim_100.csv") #temperature data
west_100_S<-west_100 %>% select(-contains("Jan")) %>% rename(elev_roundS = elev_round) #winter
west_100_W<-west_100 %>% select(-contains("June"))%>% rename(elev_roundW = elev_round) #summer
# create a single dataframe of summaries of all model combinations
compile_westmodels<-setNames(data.frame(matrix(ncol = 5, nrow = 0)), c("Value", "Std.Error", "t-value", "p-value", "ds"))
for (ds in c("05.q1.sam30", "05.q1.sam60", "05.q2.sam30","05.q2.sam60", "05.q3.sam30","05.q3.sam60", "95.q1.sam30","95.q1.sam60", "95.q2.sam30","95.q2.sam60" ,"95.q3.sam30","95.q3.sam60" ,"50.q1.sam30","50.q1.sam60" ,"50.q2.sam30","50.q2.sam60" ,"50.q3.sam30", "50.q3.sam60")) {
x<-read.csv(paste("results/birdlist_west", ds, ".csv", sep = ""))
x$elev_roundS<-round(x$Median_S,-2)
x$elev_roundW<-round(x$Median_W,-2)
x<-left_join(x,west_100_S, by = "elev_roundS") #aligning temperature data to elevational range data
x<-left_join(x,west_100_W, by = "elev_roundW")
x$ThermalRegime<-x$maxTemp_June_mean-x$minTemp_Jan_mean #calculating species thermal regimes
x<-left_join(x,jetzname, by = "Species") #adding jetz taxonomy
x<-left_join(x, sheard_trait, by = "Tree.name")%>% mutate(Diet = fct_recode(Diet, "omnivore" = "nectar", "omnivore" = "scav", "seeds"="plants"))
pruned.tree<-drop.tip(tree1,tree1$tip.label[-match(x$Tree.name, tree1$tip.label)]) #setting up the PGLS model
glsdata<-x[match(pruned.tree$tip.label, x$Tree.name),] #setting up the PGLS model
pgls2<-gls(Median_diff~ThermalRegime+HWI+relevel(Diet, ref = "invertebrates"),correlation=corPagel(1,pruned.tree),data=glsdata) #setting up the PGLS model
k<-summary(pgls2)
outputk<-as.data.frame(k$tTable)
outputk$dataset<-ds
outputk$Variable<-rownames(outputk)
compile_westmodels<-rbind(compile_westmodels,outputk)
write.csv(glsdata, file = paste("results/west_ordered",ds,".csv", sep = ""), row.names = F) # species ordered by taxonomy
write.csv(compile_westmodels, "results/compile_westmodels.csv", row.names = F)
}
## calculating adjusted p values using the Benjamini Hochberg correction for false discovery rates
west_p<-read.csv("results/compile_westmodels.csv")
west_p$adjP<-p.adjust(west_p$p.value, method = "fdr")
write.csv(east_p, "results/compile_westmodels_padj.csv")
```
## Phylogenetic paired t-test for species common to the east and west
```{r}
diff_ttest<-setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("t", "df", "pvalue")) # difference in extent of elevation shift
niche_ttest<-setNames(data.frame(matrix(ncol = 3, nrow = 0)), c("t", "df", "pvalue")) #difference in thermal regimes
for (ds in c("05.q1.sam30", "05.q1.sam60", "05.q2.sam30","05.q2.sam60", "05.q3.sam30","05.q3.sam60", "95.q1.sam30","95.q1.sam60", "95.q2.sam30","95.q2.sam60" ,"95.q3.sam30","95.q3.sam60" ,"50.q1.sam30","50.q1.sam60" ,"50.q2.sam30","50.q2.sam60" ,"50.q3.sam30", "50.q3.sam60")) {
west<-read.csv(paste("results/west_ordered",ds,".csv", sep = ""))
east<-read.csv(paste("results/east_ordered",ds,".csv", sep = ""))
commonW<-west[(west$Species %in% east$Species),] # subset of species in the west that are found in the east
commonE<-east[(east$Species %in% west$Species),] # subset of species in the east that are found in the west
common<-data.frame(Median_diffW = commonW$Median_diff,Median_diffE = commonE$Median_diff, ThermalRegimeW = commonW$ThermalRegime,ThermalRegimeE = commonE$ThermalRegime ,Tree.name = commonE$Tree.name) # combined dataframe of common species with their extent of shift and thermal regimes in both the eastern and western himalayas
# setting up and running the phylogenetic t-tests and outputting results on csvs
pruned.tree<-drop.tip(tree1,tree1$tip.label[-match(common$Tree.name, tree1$tip.label)])
comdata<-common[match(pruned.tree$tip.label, common$Tree.name),]
row.names(comdata)<-comdata$Tree.name
a<-phyl.pairedttest(pruned.tree,comdata[,c(1,2)])
b<-phyl.pairedttest(pruned.tree,comdata[,c(3,4)])
a_diff_ttest<-data.frame(t = a$t, pvalue = a$P, df = a$df, dataset = ds)
b_niche_ttest<-data.frame(t = b$t, pvalue = b$P, df = b$df, dataset = ds)
diff_ttest<-rbind(diff_ttest,a_diff_ttest)
niche_ttest<-rbind(niche_ttest,b_niche_ttest)
write.csv(diff_ttest, "results/diff_ttest.csv", row.names = F) # difference in extent of elevation shift
write.csv(niche_ttest, "results/niche_ttest.csv", row.names = F) #difference in thermal regimes
}
```