# Load pacakages library(ade4) library(adegraphics) library(adespatial) library(vegan) library(MASS) ## read data file "Site3.Nantong2(Div.index).txt" Nantong2 <- read.table(choose.files(), header=T) summary(Nantong2) names(Nantong2) # Nantong2.xy <- Nantong2[,c(4,5)] Nantong2.xy ## model matrix Nantong2.mm = model.matrix(~ veg.,Nantong2)[,-1] # DATA Nantong2.N <- Nantong2[,c(6,8,10,12,14,16,18,20)] names(Nantong2.N) summary(Nantong2.N) colSums(Nantong2.N) rowSums(Nantong2.N) # Nantong2.N.mat <- as.matrix(Nantong2.N) Nantong2.N.mat Nantong2.N.hel <- (decostand(Nantong2.N.mat, "hell")) Nantong2.N.hell <- as.matrix(scale(Nantong2.N.hel, center = TRUE, scale = FALSE)) summary(Nantong2.N.hell) ## pcnm Nantong2.N.rs <- rowSums(Nantong2.N.mat)/sum(Nantong2.N.mat) Nantong2.pcn <- pcnm(dist(Nantong2.xy), w = Nantong2.N.rs) attributes(Nantong2.pcn$vectors) Nantong2.N.pcnm = as.matrix(Nantong2.pcn$vectors) ## varpart of two explanatory variables; vegetation and euclidean distance Nantong2.N.var <- varpart(Nantong2.N.hell, Nantong2.mm, Nantong2.N.pcnm) ######## Nantong2 Diversity Nantong2.H <- Nantong2[,c(9,11,13,15,17)] names(Nantong2.H) colSums(Nantong2.H) rowSums(Nantong2.H) # Nantong2.xy <- Nantong2[,c(4,5)] Nantong2.xy ## model matrix Nantong2.mm = model.matrix(~ veg.,Nantong2)[,-1] # DATA matrix Nantong2.H.mat <- as.matrix(Nantong2.H) Nantong2.H.mat Nantong2.H.hel <- (decostand(Nantong2.H.mat, "hell")) Nantong2.H.hell <- as.matrix(scale(Nantong2.H.hel, center = TRUE, scale = FALSE)) summary(Nantong2.H.hell) ## pcnm Nantong2.H.rs <- rowSums(Nantong2.H.mat)/sum(Nantong2.H.mat) Nantong2.pcn <- pcnm(dist(Nantong2.xy), w = Nantong2.H.rs) attributes(Nantong2.pcn$vectors) Nantong2.H.pcnm = as.matrix(Nantong2.pcn$vectors) ## varpart of two explanatory variables; vegetation and euclidean distance Nantong2.H.var <- varpart(Nantong2.H.hell, Nantong2.mm, Nantong2.H.pcnm) Nantong2.H.var showvarparts(2) plot(Nantong2.H.var,bg=1:3) ## rda to show the relative imporatnce between surrouding vegetation and geographical distance Nantong2.H.rda = rda(Nantong2.H.hell, Nantong2.mm, Nantong2.H.pcnm) summary(Nantong2.H.rda) Nantong2.H.rda # regression coefficients for each explanatory variable coef(Nantong2.H.rda) # retrieve R2 from rda results (R2 <- RsquareAdj(Nantong2.H.rda)$r.squared) # which is "[1] 0.1043813" # Adjusted R2 retrived from the rda object (R2adj <- RsquareAdj(Nantong2.H.rda)$adj.r.squared) # which is "[1] -0.03174952" # Triplot of rda results plot(Nantong2.H.rda, scaling =2, main = "Triplot RDA for spider fami. Shannon-diversity in Nantong 2 ~ surrouding veg.+pcnm", xlab = "RDA1(proportion exlpained=12%)", ylab = "RDA2(proportion explained=02%)") # This plot displays all our entities: sites, species # explanatory variables as arrows (heads) # species are in red colour without the arrows # Let's add arrows to the species without heads to make them different from the output object Nantong2.H.scor <- scores(Nantong2.H.rda, choices=1:2, scaling=1, display= "sp") arrows(0, 0, Nantong2.H.scor[,1], Nantong2.H.scor[,2], length=0, lty=1, col="red") # RDA triplot of the Hellinger-transformed spider shannon diversity data in Nantong 2 # constrained by all environmental variables, scaling 2. # The bottom and left-hand scales are for the objects and the # response variables, the top and right-hand scales # are for the explanatory variables # Let's test the significance of "rda" resluts anova(Nantong2.H.rda, step=1000) # F=0.8261, Pr(>F)=0.574 # ### HEATMAPS OF BETA DISSIMILARITY IN Nantong2 # HELP WEBSITE # http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/ # library(gplots) # for heatmap.2 library(Heatplus) # load the vegan package for hierachical clustering if you want to use distance functions not specified in dist. library(vegan) # load the RColorBrewer package for better colour options library(RColorBrewer) library(pvclust) library(ComplexHeatmap) # load Nantong2.Div dataset row.names <- Nantong2[,c(3)] Nantong2.N <- Nantong2[, c(6,8,10,12,14,16,18,20)] names(Nantong2.N) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Nantong2.N), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Nantong2.N, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. # The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Nantong2.N), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Nantong2.N), method = "bray") data.dist.g # Do complete linkage hierarchical clustering. Other options are 'average' or 'single'. # do the complete linkage hierarchical clustering col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Nantong2.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 27 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 27, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Nantong2.N), var1) heatmap.2(as.matrix(Nantong2.N),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Nantong2.N), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Nantong2 Abundance", lhei = c(2, 8)) # pvclust to test the goodness of cluster result <- pvclust(Nantong2.N, method.dist="correlation", method.hclust="ward.D2", nboot=1000) result plot(result) # Nantong2 Shannon Diversity Nantong2.Div <- Nantong2[, c(9,11,13,15,17)] names(Nantong2.Div) colSums(Nantong2.Div) # colorRampPalette is in the RColorBrewer package. #This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100) # The most basic of heatmap heatmap(as.matrix(Nantong2.Div), Rowv = NA, Colv = NA, col = scaleyellowred) #determine the maximum relative abundance for each column maxab <- apply(Nantong2.Div, 2, max) head(maxab) # the margins command sets the width of the white space around the plot. #The first element is the bottom margin and the second is the right margin heatmap(as.matrix(Nantong2.Div), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2)) # you have to transpose the dataset to get the genera as rows data.dist.g <- vegdist(t(Nantong2.Div), method = "bray") data.dist.g # Do complete linkage hierarchical clustering. col.clus <- hclust(data.dist.g, "complete") col.clus # make the heatmap with Rowv = NULL heatmap(as.matrix(Nantong2.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3)) # There are 27 samples in the dataset, # so let's assign them randomly to one of two fictional categories var1 <- round(runif(n = 27, min = 1, max = 2)) # this randomly samples from a uniform distribution and rounds the result to an integer value # change the 10s and 1s to the names of colours: var1 <- replace(var1, which(var1 == 1), "deepskyblue") var1 <- replace(var1, which(var1 == 2), "magenta") # if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g. cbind(row.names(Nantong2.Div), var1) heatmap.2(as.matrix(Nantong2.Div),Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(10, 3)) # this puts in the annotation for the samples heatmap.2(as.matrix(Nantong2.Div), Rowv = NULL, Colv = as.dendrogram(col.clus), col = scaleyellowred, RowSideColors = var1, margins = c(11, 5), trace = "none", density.info = "none", xlab = "genera", ylab = "Samples", main = "Nantong2 Diversity", lhei = c(2, 8)) # result.d <- pvclust(Nantong2.Div, method.dist="correlation", method.hclust="ward.D2", nboot=1000) result.d plot(result.d) ### dbMEM ANALSYSIS FOR Nantong2 #### Load packages library(lattice) library(HSAUR) library(vegan) library(MASS) library(graphics) library(ade4) library(adegraphics) library(adespatial) ## Nantong2 XY Nantong2.N.xy <- Nantong2[,c(4,5)] Nantong2.N.xy Nantong2.N <- Nantong2[,c(6,8,10,12,14,16,18,20)] names(Nantong2.N) Nantong2.N.N <- as.matrix(Nantong2.N) Y = as.matrix(Nantong2.N.N) X = as.matrix(Nantong2.N.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Nantong2.N.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) Nantong2.N.res ## Nantong2.N.lm = lm(as.matrix(Nantong2.N.N) ~ as.matrix(Nantong2.N.xy)) Nantong2.N.resid = residuals(Nantong2.N.lm) head(Nantong2.N.resid) # Check the results sum(abs(Nantong2.N.res - Nantong2.N.resid)) # The two sets of residuals do not differ diag(cor(Nantong2.N.res, Nantong2.N.resid)) # The two sets of residuals have correlations of 1 # Nantong2.N.hel <- decostand(Nantong2.N.N, "hellinger", scale = FALSE, center = TRUE) # Plot a rough map of the 27 sampling points plot(Nantong2.N.xy, asp=1) text(Nantong2.N.xy, labels=rownames(Nantong2.N.xy), pos=3) #or rotate it plot(Nantong2.N.xy$y,Nantong2.N.xy$x) text(Nantong2.N.xy$y,Nantong2.N.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Nantong2.N.lm = lm(as.matrix(Nantong2.N.hel) ~ as.matrix(Nantong2.N.xy)) Nantong2.N.resid = residuals(Nantong2.N.lm) Nantong2.N.resid # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Nantong2.N.dbmem = dbmem(Nantong2.N.xy, silent=FALSE) # Note the truncation value Nantong2.N.dbmem summary(Nantong2.N.dbmem) # Examine the positive eigenvalues attributes(Nantong2.N.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Nantong2.N.mem = as.matrix(Nantong2.N.dbmem) dim(Nantong2.N.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Nantong2.N.xy, nb = attr(Nantong2.N.dbmem, "listw")$neighbours) # Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong2.N.xy, Nantong2.N.dbmem[,1:9]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(3,3)) for(i in 1:9) { ade4::s.value(Nantong2.N.xy, Nantong2.N.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Nantong2.N.dbmem, nrepet = 9999) ) par(mfrow=c(1,2)) plot(test$obs, attr(Nantong2.N.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Nantong2.N.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Nantong2.N.resid, Nantong2.N.mem)) # $r.squared [1] 0.3349403, $adj.r.squared [1] -0.0171501 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Nantong2.N.sel = forward.sel(Nantong2.N.resid, Nantong2.N.mem, adjR2thresh=0.3349403)) # Testing variable 1 # Testing variable 2 # Procedure stopped (alpha criteria): pvalue for variable 2 is 0.164000 (> 0.050000) # variables order R2 R2Cum AdjR2Cum F pval # 1 MEM7 7 0.08943509 0.08943509 0.0530125 2.455484 0.02 # The selected dbMEM variables are listed in vector "Nantong2.$order"(column 2 of output table) ( Nantong2.N.sel$order ) # Obtain an ordered list of the selected variable numbers: ( Nantong2.N.sel.dbmem = sort(Nantong2.N.sel$order) ) # Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table): Nantong2.N.sel[1,] # 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong2.N.xy, Nantong2.N.mem[ ,Nantong2.N.sel.dbmem]) # This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop: par(mfrow=c(1,1)) for(i in 1:length(Nantong2.N.sel.dbmem)) { ade4::s.value(Nantong2.N.xy, Nantong2.N.mem[ ,Nantong2.N.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #", Nantong2.N.sel.dbmem[i]), csub=1.5) } # 6. Canonical analysis (RDA): compute the dbMEM spatial model Nantong2.N.rda = rda(Nantong2.N.resid ~ ., data=as.data.frame(Nantong2.N.mem[,Nantong2.N.sel.dbmem])) anova(Nantong2.N.rda) #MODEL F=2.4555, Pr =0.031* anova(Nantong2.N.rda, by="axis") #RDA1 F= 2.4555 , Pr = 0.025 * # How many axes of the canonical model are significant at level alpha = 0.05? anova(Nantong2.N.rda, by="axis", cutoff=0.05) #RDA1 F= 2.4555 , Pr = 0.024 * # dbMEM for Shannon diveristy in Nantong2 Nantong2.H.xy <- Nantong2[,c(4,5)] Nantong2.H.xy Nantong2.H <- Nantong2[,c(9,11,13,15,17)] names(Nantong2.H) Nantong2.H.N <- as.matrix(Nantong2.H) Y = as.matrix(Nantong2.H.N) X = as.matrix(Nantong2.H.xy) ## Y = scale(Y, center=TRUE, scale= FALSE) X = scale(X, center= TRUE, scale= FALSE) # Matrix algebra equation for multivariate regression and calculation of residuals Nantong2.H.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y) ## Nantong2.H.lm = lm(as.matrix(Nantong2.H.N) ~ as.matrix(Nantong2.H.xy)) Nantong2.H.resid = residuals(Nantong2.H.lm) head(Nantong2.H.resid) # Check the results sum(abs(Nantong2.H.res - Nantong2.H.resid)) # The two sets of residuals do not differ diag(cor(Nantong2.H.res, Nantong2.H.resid)) # The two sets of residuals have correlations of 1 # Nantong2.H.hel <- decostand(Nantong2.H.N, "hellinger", scale = FALSE, center = TRUE) # Plot a rough map of the 27 sampling points plot(Nantong2.H.xy, asp=1) text(Nantong2.H.xy, labels=rownames(Nantong2.H.xy), pos=3) #or rotate it plot(Nantong2.H.xy$y,Nantong2.H.xy$x) text(Nantong2.H.xy$y,Nantong2.H.xy$x) # ======== # dbMEM analysis by steps # 1. Detrend the species data Nantong2.H.lm = lm(as.matrix(Nantong2.H.hel) ~ as.matrix(Nantong2.H.xy)) Nantong2.H.resid = residuals(Nantong2.H.lm) # 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that #dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix. # The default option of argument MEM.autocor is to compute only the eigenfunctions modelling #positive spatial correlation. However, other options are available for argument MEM.autocor. Nantong2.H.dbmem = dbmem(Nantong2.H.xy, silent=FALSE) # Note the truncation value Nantong2.H.dbmem # Examine the positive eigenvalues attributes(Nantong2.H.dbmem)$values # Get the dbMEM eigenvectors modelling positive spatial correlation Nantong2.H.mem = as.matrix(Nantong2.H.dbmem) dim(Nantong2.H.mem) # How many are there? # Plot the spatial weighting matrix of this analysis, showing all edges > truncation value adegraphics::s.label(Nantong2.H.xy, nb = attr(Nantong2.H.dbmem, "listw")$neighbours) # Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics: s.value(Nantong2.H.xy, Nantong2.H.dbmem[,1:9]) # This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the #writing of a for loop: par(mfrow=c(3,3)) for(i in 1:9) { ade4::s.value(Nantong2.H.xy, Nantong2.H.mem[ ,i], addaxes=FALSE, include.origin=FALSE, sub=paste("dbMEM #",i), csub=1.5) } # Compute# Compute and test the Moran??£¤s I values associated with the dbMEM eigenfunctions # One can check that the eigenvalues are perfectly proportional to Moran??£¤s I ( test <- moran.randtest(Nantong2.H.dbmem, nrepet = 9999) ) # par(mfrow=c(1,2)) plot(test$obs, attr(Nantong2.H.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues") # Plot the decreasing values of Moran??£¤s I describing the successive dbMEM eigenfunctions. # The red line is the expected value of Moran??£¤s I under H0. plot(test$obs, xlab="MEM rank", ylab="Moran's I") abline(h=-1/(nrow(Nantong2.H.xy) - 1), col="red") # 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM RsquareAdj(rda(Nantong2.H.resid, Nantong2.H.mem)) # $r.squared [1] 0.2286687, $adj.r.squared [1] -0.1796832 # 4. Forward selection: identify the significant dbMEM among those that model positive #spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??¨¤. ( Nantong2.H.sel = forward.sel(Nantong2.H.resid, Nantong2.H.mem, adjR2thresh=0.2286687)) # Procedure stopped (alpha criteria): pvalue for variable 1 is 0.089000 (> 0.050000)