-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMuSSE_Angiosperm.R
137 lines (105 loc) · 4.98 KB
/
MuSSE_Angiosperm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#perform a MuSSE analysis for WGD events in the Angiosperms
library(diversitree)
#due to changes in ape and how an ultrametric tree is determined, need to have ape version 3.5 installed
library(ape)
library(geiger)
library(nlme)
#starting files of the tree and the data file
#the data file has two columns, one with the species name, the second with the character state starting at 1
mydata <- read.csv("Angiosperm_only.csv",header=TRUE, row.names=1)
mytree <- read.tree("Vascular_Plants_rooted.dated.tre")
#compare data file and tree file to see if any taxa are not represented in both
comparison <- name.check(phy=mytree,data=mydata)
# prune taxa that don't have data but are present in the tree
mytree <- drop.tip(mytree,comparison$tree_not_data)
#double check to make sure that taxa all match with tree and data
name.check(phy=mytree,data=mydata)
comparison <- name.check(phy=mytree,data=mydata)
#create vector for traits
# if the vector names are changed, then commands following this will need to be altered to maintain the procedure
states <- mydata[,1]
names(states) <- row.names(mydata)
#proportion of tips sampled. Angiosperms species estimated from Plant List data base and species richness based on Zanne et al. 2014
# is 283,715, but more general estimates around 352,000
sampling.f <- 30458 / 283715
sampling.f
#make.musse needs two arguments, a tree and character states
lik <- make.musse(mytree,states,6,sampling.f=sampling.f)
lik
#argument names
argnames(lik)
#constrain transition rates so that events 1 <> 2 <> 3 <> 4
lik.base <- constrain(lik, q13 ~ 0, q14 ~ 0, q15 ~0, q16 ~ 0, q24~ 0, q25 ~ 0, q26 ~ 0, q31~ 0, q35 ~ 0, q36 ~ 0, q41~ 0, q42~ 0, q46 ~ 0, q51 ~ 0, q52 ~ 0, q53 ~ 0, q61 ~ 0, q62 ~ 0, q63 ~ 0, q64 ~ 0)
argnames(lik.base)
#heuristic guess as to sensible starting point
p <- starting.point.musse(mytree,6)
#full model allowing all variables to change at any time
fit <-find.mle(lik, p[argnames(lik)])
fit
round(coef(fit),3)
#fit the base model constraining WGD to only change sequentially in either direction
fit.base <- find.mle(lik.base, p[argnames(lik.base)])
fit.base
#round to three decimal places
round(coef(fit.base),3)
#save data for future use if necessary
save(fit, file="fit.RData")
save.image()
#load(file="fit.base.RData")
#test hypothesis of speciation rates are different for different states
lik.lambda <- constrain(lik, q13 ~ 0, q14 ~ 0, q15 ~0, q16 ~ 0, q24~ 0, q25 ~ 0, q26 ~ 0, q31~ 0, q35 ~ 0, q36 ~ 0, q41~ 0, q42~ 0, q46 ~ 0, q51 ~ 0, q52 ~ 0, q53 ~ 0, q61 ~ 0, q62 ~ 0, q63 ~ 0, q64 ~ 0, lambda2 ~ lambda1, lambda3 ~ lambda1, lambda4 ~ lambda1, lambda5 ~ lambda1, lambda6 ~ lambda1)
#then start search again
fit.lambda <- find.mle(lik.lambda, p[argnames(lik.lambda)])
fit.lambda
#round to three decimal places
round(coef(fit.lambda),3)
#save data with constraining lambda
save(fit.lambda, file="fit.lambda.RData")
save.image()
#perform statistical test to get a p-value
anova(fit.base, free.lambda=fit.lambda)
anova
#then start search again
lik.mu <- constrain(lik, q13 ~ 0, q14 ~ 0, q15 ~0, q16 ~ 0, q24~ 0, q25 ~ 0, q26 ~ 0, q31~ 0, q35 ~ 0, q36 ~ 0, q41~ 0, q42~ 0, q46 ~ 0, q51 ~ 0, q52 ~ 0, q53 ~ 0, q61 ~ 0, q62 ~ 0, q63 ~ 0, q64 ~ 0, mu2 ~ mu1, mu3 ~ mu1, mu4 ~ mu1, mu5 ~ mu1, mu6 ~ mu1)
#then start search again
fit.mu <- find.mle(lik.mu, p[argnames(lik.mu)])
fit.mu
#round results
round(coef(fit.mu),3)
#save data when mu is constrained
save(fit.mu, file="fit.mu.RData")
save.image()
#perform statistical test to get a p-value
anova(fit.base, free.lambda=fit.mu)
anova
#mcmc analysis
prior.exp <- make.prior.exponential(2)
set.seed(1)
#run MCMC analysis for 5,000 generations
samples <- mcmc(lik, coef(fit), fail.value=NULL, nsteps=5000, prior=prior.exp, w=1, print.every=100)
#save distribution
save(samples, file="Samples_distribution.RData")
#if need be, can reload file
#load("Samples_distribution.RData")
#to visualize speciation rate
pdf("Speciation.pdf")
col <- c("snow2","blue", "yellow", "green", "red", "black")
profiles.plot(samples[c("lambda1", "lambda2", "lambda3", "lambda4", "lambda5", "lambda6")], col.line=col, las=1,
xlab="Speciation rate", legend="topright", font=2, cex.lab=0.5, font.lab=1, cex.legend=1, margin=1/4)
dev.off()
#to visualize extinction rate
pdf("Extinction_rate.pdf")
col <- c("snow2","blue", "yellow", "green", "red", "black")
profiles.plot(samples[c("mu1", "mu2", "mu3", "mu4", "mu5", "mu6")], col.line=col, las=1,
xlab="Extinction rate", legend="topright", font=2, cex.lab=0.5, font.lab=1, cex.legend=1, margin=1/4)
dev.off()
#visualize net diversification
pdf("Net_diversification.pdf")
samples$net1 <- samples$lambda1 - samples$mu1
samples$net2 <- samples$lambda2 - samples$mu2
samples$net3 <- samples$lambda3 - samples$mu3
samples$net4 <- samples$lambda4 - samples$mu4
col <- c("snow2","blue", "yellow", "green", "red", "black")
profiles.plot(samples[c("net1", "net2", "net3", "net4", "net5", "net6")], col.line=col, las=1,
xlab="Net diversification rate", legend="topright", font=2, cex.lab=0.5, font.lab=1, cex.legend=1, margin=1/4)
dev.off()