# Load pacakages
library(ade4)
library(adegraphics)
library(adespatial)
library(vegan)
library(MASS)
## read data file "Site3.Nantong2(Div.index).txt"
Nantong2 <- read.table(choose.files(), header=T)
summary(Nantong2)
names(Nantong2)
#
Nantong2.xy <- Nantong2[,c(4,5)]
Nantong2.xy
## model matrix
Nantong2.mm = model.matrix(~ veg.,Nantong2)[,-1]
# DATA
Nantong2.N <- Nantong2[,c(6,8,10,12,14,16,18,20)]
names(Nantong2.N)
summary(Nantong2.N)
colSums(Nantong2.N)
rowSums(Nantong2.N)
#
Nantong2.N.mat <- as.matrix(Nantong2.N)
Nantong2.N.mat
Nantong2.N.hel <- (decostand(Nantong2.N.mat, "hell"))
Nantong2.N.hell <- as.matrix(scale(Nantong2.N.hel, center = TRUE, scale = FALSE))
summary(Nantong2.N.hell)
## pcnm
Nantong2.N.rs <- rowSums(Nantong2.N.mat)/sum(Nantong2.N.mat)
Nantong2.pcn <- pcnm(dist(Nantong2.xy), w = Nantong2.N.rs)
attributes(Nantong2.pcn$vectors)
Nantong2.N.pcnm = as.matrix(Nantong2.pcn$vectors)
## varpart of two explanatory variables; vegetation and euclidean distance
Nantong2.N.var <- varpart(Nantong2.N.hell, Nantong2.mm, Nantong2.N.pcnm)

######## Nantong2 Diversity
Nantong2.H <- Nantong2[,c(9,11,13,15,17)]
names(Nantong2.H)
colSums(Nantong2.H)
rowSums(Nantong2.H)
#
Nantong2.xy <- Nantong2[,c(4,5)]
Nantong2.xy
## model matrix
Nantong2.mm = model.matrix(~ veg.,Nantong2)[,-1]
# DATA matrix
Nantong2.H.mat <- as.matrix(Nantong2.H)
Nantong2.H.mat
Nantong2.H.hel <- (decostand(Nantong2.H.mat, "hell"))
Nantong2.H.hell <- as.matrix(scale(Nantong2.H.hel, center = TRUE, scale = FALSE))
summary(Nantong2.H.hell)
## pcnm
Nantong2.H.rs <- rowSums(Nantong2.H.mat)/sum(Nantong2.H.mat)
Nantong2.pcn <- pcnm(dist(Nantong2.xy), w = Nantong2.H.rs)
attributes(Nantong2.pcn$vectors)
Nantong2.H.pcnm = as.matrix(Nantong2.pcn$vectors)
## varpart of two explanatory variables; vegetation and euclidean distance
Nantong2.H.var <- varpart(Nantong2.H.hell, Nantong2.mm, Nantong2.H.pcnm)
Nantong2.H.var
showvarparts(2)
plot(Nantong2.H.var,bg=1:3)

## rda to show the relative imporatnce between surrouding vegetation and geographical distance
Nantong2.H.rda = rda(Nantong2.H.hell, Nantong2.mm, Nantong2.H.pcnm)
summary(Nantong2.H.rda)
Nantong2.H.rda
# regression coefficients for each explanatory variable
coef(Nantong2.H.rda)
# retrieve R2 from rda results
(R2 <- RsquareAdj(Nantong2.H.rda)$r.squared) # which is "[1] 0.1043813"
# Adjusted R2 retrived from the rda object
(R2adj <- RsquareAdj(Nantong2.H.rda)$adj.r.squared) # which is "[1] -0.03174952"
# Triplot of rda results
plot(Nantong2.H.rda, scaling =2, main = "Triplot RDA for spider fami. Shannon-diversity in Nantong 2 
     ~ surrouding veg.+pcnm",
     xlab = "RDA1(proportion exlpained=12%)", 
     ylab = "RDA2(proportion explained=02%)")
# This plot displays all our entities: sites, species
# explanatory variables as arrows (heads)
# species are in red colour without the arrows
# Let's add arrows to the species without heads to make them different from the output object
Nantong2.H.scor <- scores(Nantong2.H.rda, choices=1:2, scaling=1, display= "sp")
arrows(0, 0, Nantong2.H.scor[,1], Nantong2.H.scor[,2], length=0, lty=1, col="red")
# RDA triplot of the Hellinger-transformed spider shannon diversity data in Nantong 2
# constrained by all environmental variables, scaling 2. 
# The bottom and left-hand scales are for the objects and the
# response variables, the top and right-hand scales 
# are for the explanatory variables
# Let's test the significance of "rda" resluts
anova(Nantong2.H.rda, step=1000) # F=0.8261, Pr(>F)=0.574

#
### HEATMAPS OF BETA DISSIMILARITY IN Nantong2
# HELP WEBSITE
# http://www.molecularecologist.com/2013/08/making-heatmaps-with-r-for-microbiome-analysis/
# 
library(gplots)  # for heatmap.2
library(Heatplus)
# load the vegan package for hierachical clustering if you want to use distance functions not specified in dist.
library(vegan)
# load the RColorBrewer package for better colour options
library(RColorBrewer)
library(pvclust)
library(ComplexHeatmap)
# load Nantong2.Div dataset
row.names <- Nantong2[,c(3)]
Nantong2.N <- Nantong2[, c(6,8,10,12,14,16,18,20)]
names(Nantong2.N)
# colorRampPalette is in the RColorBrewer package.  
#This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours
scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100)
# The most basic of heatmap
heatmap(as.matrix(Nantong2.N), Rowv = NA, Colv = NA, col = scaleyellowred)
#determine the maximum relative abundance for each column
maxab <- apply(Nantong2.N, 2, max)
head(maxab)
# the margins command sets the width of the white space around the plot. 
# The first element is the bottom margin and the second is the right margin
heatmap(as.matrix(Nantong2.N), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2))
# you have to transpose the dataset to get the genera as rows
data.dist.g <- vegdist(t(Nantong2.N), method = "bray")
data.dist.g
# Do complete linkage hierarchical clustering. Other options are 'average' or 'single'. 
# do the complete linkage hierarchical clustering
col.clus <- hclust(data.dist.g, "complete")
col.clus
# make the heatmap with Rowv = NULL
heatmap(as.matrix(Nantong2.N), Rowv = NULL, 
        Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3))

# There are 27 samples in the dataset, 
# so let's assign them randomly to one of two fictional categories
var1 <- round(runif(n = 27, min = 1, max = 2))  # this randomly samples from a uniform distribution and rounds the result to an integer value
# change the 10s and 1s to the names of colours:
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
var1 <- replace(var1, which(var1 == 2), "magenta")
# if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g.
cbind(row.names(Nantong2.N), var1)
heatmap.2(as.matrix(Nantong2.N),Rowv = NULL, Colv = as.dendrogram(col.clus), 
          col = scaleyellowred, RowSideColors = var1, margins = c(10, 3))
# this puts in the annotation for the samples
heatmap.2(as.matrix(Nantong2.N), Rowv = NULL, 
          Colv = as.dendrogram(col.clus), col = scaleyellowred, 
          RowSideColors = var1, margins = c(11, 5), trace = "none", 
          density.info = "none", xlab = "genera", ylab = "Samples",
          main = "Nantong2 Abundance", lhei = c(2, 8))
# pvclust to test the goodness of cluster
result <- pvclust(Nantong2.N, method.dist="correlation", method.hclust="ward.D2", nboot=1000)
result
plot(result)

# Nantong2 Shannon Diversity
Nantong2.Div <- Nantong2[, c(9,11,13,15,17)]
names(Nantong2.Div)
colSums(Nantong2.Div)
# colorRampPalette is in the RColorBrewer package.  
#This creates a colour palette that shades from light yellow to red in RGB space with 100 unique colours
scaleyellowred <- colorRampPalette(c("lightyellow", "red"), space = "rgb")(100)
# The most basic of heatmap
heatmap(as.matrix(Nantong2.Div), Rowv = NA, Colv = NA, col = scaleyellowred)
#determine the maximum relative abundance for each column
maxab <- apply(Nantong2.Div, 2, max)
head(maxab)
# the margins command sets the width of the white space around the plot. 
#The first element is the bottom margin and the second is the right margin
heatmap(as.matrix(Nantong2.Div), Rowv = NA, Colv = NA, col = scaleyellowred, margins = c(10, 2))
# you have to transpose the dataset to get the genera as rows
data.dist.g <- vegdist(t(Nantong2.Div), method = "bray")
data.dist.g
# Do complete linkage hierarchical clustering.
col.clus <- hclust(data.dist.g, "complete")
col.clus
# make the heatmap with Rowv = NULL
heatmap(as.matrix(Nantong2.Div), Rowv = NULL, 
        Colv = as.dendrogram(col.clus), col = scaleyellowred, margins = c(10, 3))

# There are 27 samples in the dataset, 
# so let's assign them randomly to one of two fictional categories
var1 <- round(runif(n = 27, min = 1, max = 2))  # this randomly samples from a uniform distribution and rounds the result to an integer value
# change the 10s and 1s to the names of colours:
var1 <- replace(var1, which(var1 == 1), "deepskyblue")
var1 <- replace(var1, which(var1 == 2), "magenta")
# if this was real data, you would need the order of the values to be in the same order as the sequencing data e.g.
cbind(row.names(Nantong2.Div), var1)
heatmap.2(as.matrix(Nantong2.Div),Rowv = NULL, Colv = as.dendrogram(col.clus), 
          col = scaleyellowred, RowSideColors = var1, margins = c(10, 3))
# this puts in the annotation for the samples
heatmap.2(as.matrix(Nantong2.Div), Rowv = NULL, 
          Colv = as.dendrogram(col.clus), col = scaleyellowred, 
          RowSideColors = var1, margins = c(11, 5), trace = "none", 
          density.info = "none", xlab = "genera", ylab = "Samples",
          main = "Nantong2 Diversity", lhei = c(2, 8))
#
result.d <- pvclust(Nantong2.Div, method.dist="correlation", method.hclust="ward.D2", nboot=1000)
result.d
plot(result.d)

### dbMEM ANALSYSIS FOR Nantong2
#### Load packages
library(lattice)
library(HSAUR)
library(vegan)
library(MASS)
library(graphics)
library(ade4)
library(adegraphics)
library(adespatial)
## Nantong2 XY
Nantong2.N.xy <- Nantong2[,c(4,5)]
Nantong2.N.xy
Nantong2.N <- Nantong2[,c(6,8,10,12,14,16,18,20)]
names(Nantong2.N)
Nantong2.N.N <- as.matrix(Nantong2.N)
Y = as.matrix(Nantong2.N.N)
X = as.matrix(Nantong2.N.xy)
##
Y = scale(Y, center=TRUE, scale= FALSE)
X = scale(X, center= TRUE, scale= FALSE)
# Matrix algebra equation for multivariate regression and calculation of residuals
Nantong2.N.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y)
Nantong2.N.res
##
Nantong2.N.lm = lm(as.matrix(Nantong2.N.N) ~ as.matrix(Nantong2.N.xy))
Nantong2.N.resid = residuals(Nantong2.N.lm)
head(Nantong2.N.resid)
# Check the results
sum(abs(Nantong2.N.res - Nantong2.N.resid)) # The two sets of residuals do not differ
diag(cor(Nantong2.N.res, Nantong2.N.resid)) # The two sets of residuals have correlations of 1
#
Nantong2.N.hel <- decostand(Nantong2.N.N, "hellinger", scale = FALSE, center = TRUE)
# Plot a rough map of the 27 sampling points
plot(Nantong2.N.xy, asp=1)
text(Nantong2.N.xy, labels=rownames(Nantong2.N.xy), pos=3)
#or rotate it
plot(Nantong2.N.xy$y,Nantong2.N.xy$x)
text(Nantong2.N.xy$y,Nantong2.N.xy$x)
# ========
# dbMEM analysis by steps
# 1. Detrend the species data
Nantong2.N.lm = lm(as.matrix(Nantong2.N.hel) ~ as.matrix(Nantong2.N.xy))
Nantong2.N.resid = residuals(Nantong2.N.lm)
Nantong2.N.resid
# 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that
#dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix.
# The default option of argument MEM.autocor is to compute only the eigenfunctions modelling
#positive spatial correlation. However, other options are available for argument MEM.autocor.
Nantong2.N.dbmem = dbmem(Nantong2.N.xy, silent=FALSE) # Note the truncation value
Nantong2.N.dbmem
summary(Nantong2.N.dbmem)
# Examine the positive eigenvalues
attributes(Nantong2.N.dbmem)$values
# Get the dbMEM eigenvectors modelling positive spatial correlation
Nantong2.N.mem = as.matrix(Nantong2.N.dbmem)
dim(Nantong2.N.mem) # How many are there?
# Plot the spatial weighting matrix of this analysis, showing all edges > truncation value
adegraphics::s.label(Nantong2.N.xy, nb = attr(Nantong2.N.dbmem, "listw")$neighbours)
# Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong2.N.xy, Nantong2.N.dbmem[,1:9])

# This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the
#writing of a for loop:
par(mfrow=c(3,3))
for(i in 1:9) {
  ade4::s.value(Nantong2.N.xy, Nantong2.N.mem[ ,i], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #",i), csub=1.5)
}
# Compute# Compute and test the Moran??ㄓs I values associated with the dbMEM eigenfunctions
# One can check that the eigenvalues are perfectly proportional to Moran??ㄓs I
( test <- moran.randtest(Nantong2.N.dbmem, nrepet = 9999) )
par(mfrow=c(1,2))
plot(test$obs, attr(Nantong2.N.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues")

# Plot the decreasing values of Moran??ㄓs I describing the successive dbMEM eigenfunctions.
# The red line is the expected value of Moran??ㄓs I under H0.
plot(test$obs, xlab="MEM rank", ylab="Moran's I")
abline(h=-1/(nrow(Nantong2.N.xy) - 1), col="red")

# 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM
RsquareAdj(rda(Nantong2.N.resid, Nantong2.N.mem)) # $r.squared [1] 0.3349403, $adj.r.squared [1] -0.0171501
# 4. Forward selection: identify the significant dbMEM among those that model positive
#spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??角.
( Nantong2.N.sel = forward.sel(Nantong2.N.resid, Nantong2.N.mem, adjR2thresh=0.3349403))
# Testing variable 1
# Testing variable 2
# Procedure stopped (alpha criteria): pvalue for variable 2 is 0.164000 (> 0.050000)
# variables order         R2      R2Cum  AdjR2Cum        F pval
# 1      MEM7     7 0.08943509 0.08943509 0.0530125 2.455484 0.02
# The selected dbMEM variables are listed in vector "Nantong2.$order"(column 2 of output table)
( Nantong2.N.sel$order )
# Obtain an ordered list of the selected variable numbers:
( Nantong2.N.sel.dbmem = sort(Nantong2.N.sel$order) )
# Look at the adjusted R-square of the mod Look at the adjusted R-square of the model containing the 8 selected dbMEM (line 8 of table):
Nantong2.N.sel[1,]
# 5. Draw maps of the selected eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong2.N.xy, Nantong2.N.mem[ ,Nantong2.N.sel.dbmem])

# This plot can be obtained using function s.value() of ade4 (slower); it involves writing a for loop:
par(mfrow=c(1,1))
for(i in 1:length(Nantong2.N.sel.dbmem)) {
  ade4::s.value(Nantong2.N.xy, Nantong2.N.mem[ ,Nantong2.N.sel.dbmem[i]], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #", Nantong2.N.sel.dbmem[i]), csub=1.5) }
# 6. Canonical analysis (RDA): compute the dbMEM spatial model
Nantong2.N.rda = rda(Nantong2.N.resid ~ ., data=as.data.frame(Nantong2.N.mem[,Nantong2.N.sel.dbmem]))
anova(Nantong2.N.rda) #MODEL F=2.4555, Pr =0.031*
anova(Nantong2.N.rda, by="axis") #RDA1 F= 2.4555 , Pr = 0.025 *

# How many axes of the canonical model are significant at level alpha = 0.05?
anova(Nantong2.N.rda, by="axis", cutoff=0.05) #RDA1 F= 2.4555 , Pr = 0.024 *

# dbMEM for Shannon diveristy in Nantong2
Nantong2.H.xy <- Nantong2[,c(4,5)]
Nantong2.H.xy
Nantong2.H <- Nantong2[,c(9,11,13,15,17)]
names(Nantong2.H)
Nantong2.H.N <- as.matrix(Nantong2.H)
Y = as.matrix(Nantong2.H.N)
X = as.matrix(Nantong2.H.xy)
##
Y = scale(Y, center=TRUE, scale= FALSE)
X = scale(X, center= TRUE, scale= FALSE)
# Matrix algebra equation for multivariate regression and calculation of residuals
Nantong2.H.res = Y - (X %*% solve(t(X)%*%X) %*% t(X) %*% Y)
##
Nantong2.H.lm = lm(as.matrix(Nantong2.H.N) ~ as.matrix(Nantong2.H.xy))
Nantong2.H.resid = residuals(Nantong2.H.lm)
head(Nantong2.H.resid)
# Check the results
sum(abs(Nantong2.H.res - Nantong2.H.resid)) # The two sets of residuals do not differ
diag(cor(Nantong2.H.res, Nantong2.H.resid)) # The two sets of residuals have correlations of 1
#
Nantong2.H.hel <- decostand(Nantong2.H.N, "hellinger", scale = FALSE, center = TRUE)
# Plot a rough map of the 27 sampling points
plot(Nantong2.H.xy, asp=1)
text(Nantong2.H.xy, labels=rownames(Nantong2.H.xy), pos=3)
#or rotate it
plot(Nantong2.H.xy$y,Nantong2.H.xy$x)
text(Nantong2.H.xy$y,Nantong2.H.xy$x)
# ========
# dbMEM analysis by steps
# 1. Detrend the species data
Nantong2.H.lm = lm(as.matrix(Nantong2.H.hel) ~ as.matrix(Nantong2.H.xy))
Nantong2.H.resid = residuals(Nantong2.H.lm)
# 2. Construct the dbMEM eigenfunctions using the dbmem() function. See ?dbmem. Note that
#dbmem() can use the xy coordinates of the sites or a pre-computed geographic distance matrix.
# The default option of argument MEM.autocor is to compute only the eigenfunctions modelling
#positive spatial correlation. However, other options are available for argument MEM.autocor.
Nantong2.H.dbmem = dbmem(Nantong2.H.xy, silent=FALSE) # Note the truncation value
Nantong2.H.dbmem
# Examine the positive eigenvalues
attributes(Nantong2.H.dbmem)$values
# Get the dbMEM eigenvectors modelling positive spatial correlation
Nantong2.H.mem = as.matrix(Nantong2.H.dbmem)
dim(Nantong2.H.mem) # How many are there?
# Plot the spatial weighting matrix of this analysis, showing all edges > truncation value
adegraphics::s.label(Nantong2.H.xy, nb = attr(Nantong2.H.dbmem, "listw")$neighbours)
# Plot maps of the first 9 dbMEM eigenfunctions using the new s.value() function of adegraphics:
s.value(Nantong2.H.xy, Nantong2.H.dbmem[,1:9])
# This plot can also be obtained using function s.value() of ade4 (slower); it involves, however, the
#writing of a for loop:
par(mfrow=c(3,3))
for(i in 1:9) {
  ade4::s.value(Nantong2.H.xy, Nantong2.H.mem[ ,i], addaxes=FALSE, include.origin=FALSE,
                sub=paste("dbMEM #",i), csub=1.5)
}
# Compute# Compute and test the Moran??ㄓs I values associated with the dbMEM eigenfunctions
# One can check that the eigenvalues are perfectly proportional to Moran??ㄓs I
( test <- moran.randtest(Nantong2.H.dbmem, nrepet = 9999) )
#
par(mfrow=c(1,2))
plot(test$obs, attr(Nantong2.H.dbmem, "values"), xlab = "Moran's I", ylab = "Eigenvalues")

# Plot the decreasing values of Moran??ㄓs I describing the successive dbMEM eigenfunctions.
# The red line is the expected value of Moran??ㄓs I under H0.
plot(test$obs, xlab="MEM rank", ylab="Moran's I")
abline(h=-1/(nrow(Nantong2.H.xy) - 1), col="red")

# 3. Compute the R2 and the R2 adjusted for the global model that includes all positive dbMEM
RsquareAdj(rda(Nantong2.H.resid, Nantong2.H.mem)) # $r.squared [1] 0.2286687, $adj.r.squared [1] -0.1796832
# 4. Forward selection: identify the significant dbMEM among those that model positive
#spatial correlation. Use the adjusted R2 above as value for stopping criterion ???adjR2thresh??角.
( Nantong2.H.sel = forward.sel(Nantong2.H.resid, Nantong2.H.mem, adjR2thresh=0.2286687))
# Procedure stopped (alpha criteria): pvalue for variable 1 is 0.089000 (> 0.050000)