#### Packages library(ade4) library(adegraphics) library(adespatial) library(vegan) library(MASS) ## read data "Site1.Minqing(Div.index).txt" Minqing <- read.table(choose.files(), header=T) summary(Minqing) names(Minqing) # Minqing.xy <- Minqing[,c(4,5)] Minqing.xy ## model matrix Minqing.mm = model.matrix(~ veg.,Minqing)[,-1] # DATA Minqing.N <- Minqing[,c(6,8,10,12,14,16,18,20)] names(Minqing.N) colSums(Minqing.N) rowSums(Minqing.N) # Minqing.N.mat <- as.matrix(Minqing.N) Minqing.N.mat Minqing.N.hel <- (decostand(Minqing.N.mat, "hell")) Minqing.N.hell <- as.matrix(scale(Minqing.N.hel, scale = FALSE, center = TRUE)) summary(Minqing.N.hell) ## pcnm Minqing.N.rs <- rowSums(Minqing.N.mat)/sum(Minqing.N.mat) Minqing.pcn <- pcnm(dist(Minqing.xy), w = Minqing.N.rs) attributes(Minqing.pcn$vectors) Minqing.N.pcnm = as.matrix(Minqing.pcn$vectors) ## varpart of two explanatory variables; vegetation and PCNM Minqing.N.var <- varpart(Minqing.N.hell, Minqing.mm, Minqing.N.pcnm) Minqing.N.var showvarparts(2) plot(Minqing.N.var,bg=1:3) ## rda to show the relative imporatnce between surrouding vegetation and geographical distance Minqing.N.rda = rda(Minqing.N.hell, Minqing.mm, Minqing.N.pcnm) Minqing.N.rda summary(Minqing.N.rda) # regression coefficients for each explanatory variable coef(Minqing.N.rda) # retrieve R2 from rda results (R2 <- RsquareAdj(Minqing.N.rda)$r.squared) # which is "[1] 0.2622423" # Adjusted R2 retrived from the rda object (R2adj <- RsquareAdj(Minqing.N.rda)$adj.r.squared) # which is "[1] 0.1323941" # Triplot of rda results plot(Minqing.N.rda, scaling =2, main = "Triplot RDA for spider fami. abundace.hell in Minqing", xlab = "RDA1(proportion exlpained=17%)", ylab = "RDA2(proportion explained=10%)") # This plot displays all our entities: sites, species # explanatory variables as arrows (heads) # species are in red colour without the arrows # Let's add arrows to the species without heads to make them different from the output object Minqing.spe.scor <- scores(Minqing.N.rda, choices=1:2, scaling=1, display= "sp") arrows(0, 0, Minqing.spe.scor[,1], Minqing.spe.scor[,2], length=0, lty=1, col="red") # RDA triplot of the Hellinger-transformed spider abundance data in Minqing # constrained by all environmental variables, scaling 2. # The bottom and left-hand scales are for the objects and the # response variables, the top and right-hand scales # are for the explanatory variables # Let's test the significance of "rda" resluts anova(Minqing.N.rda, step=1000) # F=1.564, Pr(>F)=0.069 ######## Minqing Diversity Minqing.H <- Minqing[,c(7,9,11,13,15,17,19)] names(Minqing.H) colSums(Minqing.H) rowSums(Minqing.H) # Minqing.xy <- Minqing[,c(4,5)] Minqing.xy ## model matrix Minqing.mm = model.matrix(~ veg.,Minqing)[,-1] # DATA matrix Minqing.H.mat <- as.matrix(Minqing.H) Minqing.H.mat Minqing.H.hel <- (decostand(Minqing.H.mat, "hell")) Minqing.H.hell <- as.matrix(scale(Minqing.H.hel, center = TRUE, scale = FALSE)) summary(Minqing.H.hell) ## pcnm Minqing.H.rs <- rowSums(Minqing.H.mat)/sum(Minqing.H.mat) Minqing.pcn <- pcnm(dist(Minqing.xy), w = Minqing.H.rs) attributes(Minqing.pcn$vectors) Minqing.H.pcnm = as.matrix(Minqing.pcn$vectors) ## varpart of two explanatory variables; vegetation and euclidean distance Minqing.H.var <- varpart(Minqing.H.hell, Minqing.mm, Minqing.H.pcnm) Minqing.H.var showvarparts(2) plot(Minqing.H.var,bg=1:3) ## rda to show the relative imporatnce between surrouding vegetation and geographical distance Minqing.H.rda = rda(Minqing.H.hell, Minqing.mm, Minqing.H.pcnm) summary(Minqing.H.rda) Minqing.H.rda # regression coefficients for each explanatory variable coef(Minqing.H.rda) # retrieve R2 from rda results (R2 <- RsquareAdj(Minqing.H.rda)$r.squared) # which is "[1] 0.1823243" # Adjusted R2 retrived from the rda object (R2adj <- RsquareAdj(Minqing.H.rda)$adj.r.squared) # which is "[1] 0.04811967" # Triplot of rda results plot(Minqing.H.rda, scaling =2, main = "Triplot RDA for spider fami. Shannon-diversity in Minqing.hell", xlab = "RDA1(proportion exlpained=25%)", ylab = "RDA2(proportion explained=04%)") # This plot displays all our entities: sites, species # explanatory variables as arrows (heads) # species are in red colour without the arrows # Let's add arrows to the species without heads to make them different from the output object Minqing.H.scor <- scores(Minqing.H.rda, choices=1:2, scaling=1, display= "sp") arrows(0, 0, Minqing.H.scor[,1], Minqing.H.scor[,2], length=0, lty=1, col="red") # RDA triplot of the Hellinger-transformed spiders shannon diversity data in Minqing # constrained by all environmental variables, scaling 2. # The bottom and left-hand scales are for the objects and the # response variables, the top and right-hand scales # are for the explanatory variables # Let's test the significance of "rda" resluts anova(Minqing.H.rda, step=1000) # F=1.2182, Pr(>F)=0.277 ### HEATMAPS OF BETA DISSIMILARITY IN MINQING # HELP WEBSITE # http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/ library(gplots) # for heatmap.2 library(Heatplus) # load the vegan package for hierachical clustering if you want to use distance functions not specified in dist. library(vegan) # load the RColorBrewer package for better colour options library(RColorBrewer) library(pvclust) library(ComplexHeatmap) # load Minqing.Div dataset row.names <- Minqing[,c(3)] Minqing.N <- Minqing[, c(6,8,10,12,14,16,18,20)] names(Minqing.N) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Minqing.N), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Minqing.N, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. # The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Minqing.N), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Minqing.N), method = "bray") data.dist.g # do the complete linkage hierarchical clustering col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Minqing.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 29 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 29, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Minqing.N), var1) heatmap.2(as.matrix(Minqing.N),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Minqing.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Minqing Abundance", lhei = c(2, 8)) # pvclust to test the goodness of cluster result <- pvclust(Minqing.N, method.dist="correlation", method.hclust="ward.D2", nboot=1000) result plot(result) # Minqing Shannon Diversity Minqing.Div <- Minqing[, c(7,9,11,13,15,17,19)] names(Minqing.Div) colSums(Minqing.Div) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Minqing.Div), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Minqing.Div, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. #The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Minqing.Div), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Minqing.Div), method = "bray") data.dist.g # Do complete linkage hierarchical clustering. col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Minqing.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 29 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 29, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Minqing.Div), var1) heatmap.2(as.matrix(Minqing.Div),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Minqing.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Minqing Diversity", lhei = c(2, 8)) # result.d <- pvclust(Minqing.Div, method.dist="correlation", method.hclust="ward.D2", nboot=1000) result.d plot(result.d) ### dbMEM ANALSYSIS FOR MINQING #### Load packages library(lattice) library(HSAUR) library(vegan) library(MASS) library(graphics) library(ade4) library(adegraphics) library(adespatial) ## Minqing XY Minqing.N.xy <- Minqing[,c(4,5)] Minqing.N.xy Minqing.N <- Minqing[,c(6,8,10,12,14,16,18,20)] names(Minqing.N) Minqing.N.N <- as.matrix(Minqing.N) Y = as.matrix(Minqing.N.N) X = as.matrix(Minqing.N.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Minqing.N.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) Minqing.N.res ## Minqing.N.lm = lm(as.matrix(Minqing.N.N) ~ as.matrix(Minqing.N.xy)) Minqing.N.resid = residuals(Minqing.N.lm) head(Minqing.N.resid) # Check the results sum(abs(Minqing.N.res - Minqing.N.resid)) # The two sets of residuals do not differ diag(cor(Minqing.N.res, Minqing.N.resid)) # The two sets of residuals have correlations of 1 # Minqing.N.hel <- decostand(Minqing.N.N, "hellinger", scale = FALSE, center = TRUE) # Plot a rough map of the 29 sampling points plot(Minqing.N.xy, asp=1) text(Minqing.N.xy, labels=rownames(Minqing.N.xy), pos=3) #or rotate it plot(Minqing.N.xy$y,Minqing.N.xy$x) text(Minqing.N.xy$y,Minqing.N.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Minqing.N.lm = lm(as.matrix(Minqing.N.hel) ~ as.matrix(Minqing.N.xy)) Minqing.N.resid = residuals(Minqing.N.lm) Minqing.N.resid # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Minqing.N.dbmem = dbmem(Minqing.N.xy, silent=FALSE) # Note the truncation value Minqing.N.dbmem summary(Minqing.N.dbmem) # Examine the positive eigenvalues attributes(Minqing.N.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Minqing.N.mem = as.matrix(Minqing.N.dbmem) dim(Minqing.N.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Minqing.N.xy, nb = attr(Minqing.N.dbmem, "listw")$neighbours) # Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Minqing.N.xy, Minqing.N.dbmem[,1:11]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(3,3)) for(i in 1:9) { ade4::s.value(Minqing.N.xy, Minqing.N.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Minqing.N.dbmem, nrepet = 9999) ) par(mfrow=c(1,2)) plot(test$obs, attr(Minqing.N.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Minqing.N.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Minqing.N.resid, Minqing.N.mem)) # $r.squared [1] 0.4520056, $adj.r.squared [1] 0.09742101 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Minqing.N.sel = forward.sel(Minqing.N.resid, Minqing.N.mem, adjR2thresh=0.4520056)) # Procedure stopped (alpha criteria): pvalue for variable 2 is 0.094000 (> 0.050000) # variables order R2 R2Cum AdjR2Cum F pval #1 MEM10 10 0.1398653 0.1398653 0.1080085 4.390432 0.002 # The selected dbMEM variables are listed in vector "Minqing.N.sel$order"(column 2 of output table) ( Minqing.N.sel$order ) # Obtain an ordered list of the selected variable numbers: ( Minqing.N.sel.dbmem = sort(Minqing.N.sel$order) ) # Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table): Minqing.N.sel[1,] # 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics: s.value(Minqing.N.xy, Minqing.N.mem[ ,Minqing.N.sel.dbmem]) # This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop: par(mfrow=c(1,1)) for(i in 1:length(Minqing.N.sel.dbmem)) { ade4::s.value(Minqing.N.xy, Minqing.N.mem[ ,Minqing.N.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #", Minqing.N.sel.dbmem[i]), csub=1.5) } # 6. Canonical analysis (RDA): compute the dbMEM spatial model Minqing.N.rda = rda(Minqing.N.resid ~ ., data=as.data.frame(Minqing.N.mem[,Minqing.N.sel.dbmem])) summary(Minqing.N.rda) anova(Minqing.N.rda) # model F= 4.3904, Pr = 0.002*** anova(Minqing.N.rda, by="axis") # RDA1 F= 4.3904 , Pr = 0.001*** # How many axes of the canonical model are significant at level alpha = 0.05? anova(Minqing.N.rda, by="axis", cutoff=0.05) #RDA1 F= 4.3904 , Pr = 0.001*** # dbMEM for Shannon diveristy in Minqing Minqing.H.xy <- Minqing[,c(4,5)] Minqing.H.xy Minqing.H <- Minqing[,c(7,9,11,13,15,17,19)] names(Minqing.H) Minqing.H.N <- as.matrix(Minqing.H) Y = as.matrix(Minqing.H.N) X = as.matrix(Minqing.H.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Minqing.H.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) ## Minqing.H.lm = lm(as.matrix(Minqing.H.N) ~ as.matrix(Minqing.H.xy)) Minqing.H.resid = residuals(Minqing.H.lm) head(Minqing.H.resid) # Check the results sum(abs(Minqing.H.res - Minqing.H.resid)) # The two sets of residuals do not differ diag(cor(Minqing.H.res, Minqing.H.resid)) # The two sets of residuals have correlations of 1 # Minqing.H.hel <- decostand(Minqing.H.N, "hellinger", center = TRUE, scale = FALSE) # Plot a rough map of the 29 sampling points plot(Minqing.H.xy, asp=1) text(Minqing.H.xy, labels=rownames(Minqing.H.xy), pos=3) #or rotate it plot(Minqing.H.xy$y,Minqing.H.xy$x) text(Minqing.H.xy$y,Minqing.H.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Minqing.H.lm = lm(as.matrix(Minqing.H.hel) ~ as.matrix(Minqing.H.xy)) Minqing.H.resid = residuals(Minqing.H.lm) # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Minqing.H.dbmem = dbmem(Minqing.H.xy, silent=FALSE) # Note the truncation value Minqing.H.dbmem # Examine the positive eigenvalues attributes(Minqing.H.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Minqing.H.mem = as.matrix(Minqing.H.dbmem) dim(Minqing.H.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Minqing.H.xy, nb = attr(Minqing.H.dbmem, "listw")$neighbours) # Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Minqing.H.xy, Minqing.H.dbmem[,1:11]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(3,3)) for(i in 1:9) { ade4::s.value(Minqing.H.xy, Minqing.H.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Minqing.H.dbmem, nrepet = 9999) ) # par(mfrow=c(1,2)) plot(test$obs, attr(Minqing.H.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Minqing.H.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Minqing.H.resid, Minqing.H.mem)) # $r.squared [1] 0.4633128, $adj.r.squared [1] 0.1133612 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Minqing.H.sel = forward.sel(Minqing.H.resid, Minqing.H.mem, adjR2thresh=0.4633128)) # Testing variable 1 # Testing variable 2 # Testing variable 3 # Procedure stopped (alpha criteria): pvalue for variable 3 is 0.100000 (> 0.050000) # variables order R2 R2Cum AdjR2Cum F pval # 1 MEM10 10 0.08323120 0.0832312 0.0492768 2.451264 0.046 # 2 MEM5 5 0.08276055 0.1659917 0.1018373 2.580039 0.044 # The selected dbMEM variables are listed in vector "Minqing.H.sel$order"(column 2 of output table) ( Minqing.H.sel$order ) # Obtain an ordered list of the selected variable numbers: ( Minqing.H.sel.dbmem = sort(Minqing.H.sel$order) ) # Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table): Minqing.H.sel[2,] # 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics: s.value(Minqing.H.xy, Minqing.H.mem[ ,Minqing.H.sel.dbmem]) # This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop: par(mfrow=c(1,1)) for(i in 1:length(Minqing.H.sel.dbmem)) { ade4::s.value(Minqing.H.xy, Minqing.H.mem[ ,Minqing.H.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #", Minqing.H.sel.dbmem[i]), csub=1.5) } # 6. Canonical analysis (RDA): compute the dbMEM spatial model Minqing.H.rda = rda(Minqing.H.resid ~ ., data=as.data.frame(Minqing.H.mem[,Minqing.H.sel.dbmem])) anova(Minqing.H.rda) # model F= 2.5874, Pr = 0.011* anova(Minqing.H.rda, by="axis") #RDA1 (F= 4.4019 , Pr = 0.008**), RDA2 (F= 0.7728, Pr= 0.531) # How many axes of the canonical model are significant at level alpha = 0.05? anova(Minqing.H.rda, by="axis", cutoff=0.05) #RDA1 (F= 4.4019 , Pr = 0.004**), RDA2 (F= 0.7728, Pr= 0.545)