---
title: "Analyses_amazon_soil_metabarcoding"
author: "Camila Ritter, Alex Zizka, and Fabian Roger"
date: "28 Jun 2017"
output: word_document
---

##################################################
###################Install Packages###############
##################################################

# install.packages ("dplyr", dependencies = T)
# install.packages ("tidyverse", dependencies = T)
# install.packages ("ecodist", dependencies = T)
# install.packages("ggplot2", dependencies = T)
# install.packages("broom", dependencies = T)
# install.packages ("gridExtra", dependencies = T)
# install.packages ("ggfortify", dependencies = T)
# install.packages ("vegan", dependencies = T)
# install.packages ("geosphere", dependencies = T)
# install.packages ("viridis", dependencies = T)
# install.packages ("Hmisc", dependencies = T)
# install.packages("mice", dependencies = T)
# install.packages("INLA", repos="https://inla.r-inla-download.org/R/stable", dep=TRUE)
# install.packages("gstat", dependencies = T)
# install.packages("lazyeval")
# install.packages("splancs", dependencies = T)
# source("https://bioconductor.org/biocLite.R")
# biocLite("DESeq2")
# install.packages("entropart")
# biocLite("phyloseq")

###########Library packages

#' if some packages (at the bottom of the list) fail to load with 
#' "maximal number of DLLs reached" you have to set a higher value for the 
#' R_MAX_NUM_DLLS=150 variable in the Renviron file. This is because some packages 
#' loaded in this script have a lot of dependencies and the total number of loaded packages 
#' becomes higher than allowed by default
#' 
#' you can find the Renviron file under R.home() /etc/
#' setting R_MAX_NUM_DLLS=200 (pasting this into the Renviron file, saving and restarting R) 
#' solves the issue for me.
#' 
#' R version 3.4.3 (2017-11-30)
#' Platform: x86_64-apple-darwin15.6.0 (64-bit)
#' Running under: macOS High Sierra 10.13.3
#' 

library (dplyr)
library(tidyverse)
library(broom)
library(gridExtra)
library(ggfortify)
library(ggplot2)
library(vegan)
library(geosphere)
library(viridis)
library(Hmisc)
library(mice) 
library(gplots)
library(stringr)
library(stringi)
library(RColorBrewer)
library(INLA)
library(sp)
library(gstat)
library(splancs)
library(broom)
library(ecodist)
library(DESeq2)
library(entropart)
library(phyloseq)


### make directory to store output

dir.create("output")

######################################################
######################Import data#####################
######################################################

####Prokaryote (16S)#######

#Read in metadata
meta <- read.csv("peerj-26029-metadata.csv")

#################Richness 16S##################################

#read in 16S otu table
OTU_16S <- read.csv("peerj-26029-16S_all.csv")
head(OTU_16S)

# transform to matrix
OTU_16S_m <- as.matrix(OTU_16S[,-1])

dimnames(OTU_16S_m) <- list(OTU_16S$OTU.ID, colnames(OTU_16S)[-1])

OTU_16S_mR <-  rrarefy(t(OTU_16S_m), min(colSums(OTU_16S_m)))

rownames(meta) <- meta$id

#Abundance data

PS_16S <- phyloseq(otu_table(OTU_16S_m, taxa_are_rows = TRUE),
                   sample_data(meta))


PS_16S_d2 <- phyloseq_to_deseq2(PS_16S,~ kind)

VST_16S <- varianceStabilizingTransformation(PS_16S_d2, blind = T) %>% assay()

VST_16S[VST_16S < 0] <- 0

VST_16S <- VST_16S[-c(which(rowSums(VST_16S) == 0)), ]
head(VST_16S)


###################Abundance 18S###################################

#read in OTU table
OTU_18S <- read.csv("peerj-26029-18S_all.csv")
head(OTU_18S)

# transform to matrix
OTU_18S_m <- as.matrix(OTU_18S[,-1])

dimnames(OTU_18S_m) <- list(OTU_18S$X.OTU.ID, colnames(OTU_18S)[-1])

OTU_18S_mR <-  rrarefy(t(OTU_18S_m), min(colSums(OTU_18S_m)))

rownames(meta) <- meta$id

#Abundance data

PS_18S <- phyloseq(otu_table(OTU_18S_m, taxa_are_rows = TRUE),
                   sample_data(meta))


PS_18S_d2 <- phyloseq_to_deseq2(PS_18S,~ kind)

VST_18S <- varianceStabilizingTransformation(PS_18S_d2, blind = T) %>% assay()

VST_18S[VST_18S < 0] <- 0

VST_18S <- VST_18S[-c(which(rowSums(VST_18S) == 0)), ]
head(VST_18S)

##############################################################
#######################Making tables##########################
##############################################################



#########################################################################
################Making the abundance matrix#######################
#########################################################################

#transform to presence/absence matrix

#16S
com16s <- VST_16S
com16s <- t(com16s)

#export
write.csv(com16s , "output_abu/otu_16s_abundancecsv")


#18S
com18s  <- VST_18S
com18s <- t(com18s)
#com18s <- as.data.frame(com18s)
#head(com18s)

#export
write.csv(com18s , "output_abu/otu_18s_abundance.csv")



###########Making a single table with OTUs S and metadata#############

#Richness + geographic + edaphic soil data

#add metadata
dat <- meta

#scale all numeric variables 
dat <- mutate_at(dat, .vars = c(8:27), funs(scale(.) %>% as.vector))

#capitalize colnames 
names(dat) <- capitalize(names(dat))


#16S

MC16s <- MetaCommunity(OTU_16S_m)

div_16S <- AlphaDiversity(MC16s, q = 1)$Communities %>% 
  data.frame(effN = .) %>% 
  rownames_to_column(var = "id")

#test the correlation

vegan::renyi(t(OTU_16S_m), hill = TRUE, scales = 1) %>% 
  data.frame(effN_v = .) %>% 
  rownames_to_column(var = "id") %>% 
  left_join(div_16S) %>% 
  ggplot(aes(x = effN, y = effN_v))+
  geom_point()

rrOTU_16S_m <- renyi(t(OTU_16S_m), hill = TRUE, scales = 1)
rrOTU_16S_m <-  as.vector (rrOTU_16S_m)
cor16S <- cbind(div_16S,rrOTU_16S_m)

cor(cor16S$effN, cor16S$rrOTU_16S_m)
  
  OTUS_16SL <- filter(div_16S, substr(id, 1,1) == "L") %>% #filter litter samples
  dplyr::rename(OTUS_16SL = effN) %>% 
  mutate(id_short = substr(id, 2, nchar(id)))  %>% #id withou litter soil prefix
    dplyr::select(-id)

OTUS_16SS <- filter(div_16S, substr(id, 1,1) == "S") %>% #filter soil samples
  dplyr::rename(OTUS_16SS = effN) %>% 
  mutate(id_short = substr(id, 2, nchar(id))) %>% #id withou litter soil prefix
  dplyr::select(-id)
  
  
#18S

MC18s <- MetaCommunity(OTU_18S_m)

div_18S <- AlphaDiversity(MC18s, q = 1)$Communities %>% 
  data.frame(effN = .) %>% 
  rownames_to_column(var = "id") 
 
#test the correlation

vegan::renyi(t(OTU_18S_m), hill = TRUE, scales = 1) %>% 
  data.frame(effN_v = .) %>% 
  rownames_to_column(var = "id") %>% 
  left_join(div_18S) %>% 
  ggplot(aes(x = effN, y = effN_v))+
  geom_point()

rrOTU_18S_m <- renyi(t(OTU_18S_m), hill = TRUE, scales = 1)
rrOTU_18S_m <-  as.vector (rrOTU_18S_m)
cor18S <- cbind(div_18S,rrOTU_18S_m)

cor(cor18S$effN, cor18S$rrOTU_18S_m)

OTUS_18SL <- filter(div_18S, substr(id, 1,1) == "L") %>% #filter litter samples
  dplyr::rename(OTUS_18SL = effN ) %>% 
  mutate(id_short = substr(id, 2, nchar(id))) %>% #id withou litter soil prefix
  dplyr::select(-id)

OTUS_18SS <- filter(div_18S, substr(id, 1,1) == "S") %>% #filter soil samples
  dplyr::rename(OTUS_18SS = effN) %>% 
  mutate(id_short = substr(id, 2, nchar(id)))  %>% #id withou litter soil prefix
  dplyr::select(-id)
head(OTUS_18SL)
nrow(OTUS_18SS)

head(dat)
dat2 <- dat[1:39,]
bio2 <- dat2 %>% 
  mutate_at( .vars=c(1:5), as.character) %>% 
  mutate(id_short = substr(Id, 2, nchar(Id))) %>% 
  dplyr::select(-Id, -Kind)

bio2 <- left_join(bio2, OTUS_18SL, by = "id_short")
bio2 <- left_join(bio2, OTUS_18SS, by = "id_short")
bio2 <- left_join(bio2, OTUS_16SL, by = "id_short")
bio2 <- left_join(bio2, OTUS_16SS, by = "id_short") 

head(bio2)
nrow(bio2)

#write.csv(bio2, "bio2.csv")
#########################################################
#######Hypothesis 1, soil variables effect###############
#########################################################
soil_phy_che <- read.csv("peerj-26029-metadata.csv")
head (soil_phy_che)
ncol(soil_phy_che)

 
###Guess values to missing data
 tempData <- soil_phy_che %>% 
   dplyr::select(8:27) %>% #all the soil columns
   distinct() %>% 
   mice(m=5, maxit=50, meth='pmm')
 
 completedData <- complete(tempData,1) #takes our permutation 1 from the list
 completed.metadata <- as.data.frame(completedData) #makes it a dataframe that you can use further downstream
 head (completed.metadata)
 completed.metadata <- data.frame(scale(completed.metadata))
 
 metadata <-cbind(bio2[1:5], completed.metadata)
 
# metadata <- metadata %>% 
#   mutate(id_short = substr(id, 2, nchar(id))) %>% 
#   left_join(completed.metadata)
# 
# head (metadata)


#################PCAs
head (metadata)

#Physical

Physical <- c("coarse_sand_fraction", "slim_sand_fraction", 
"total_sand_fraction",     "silt",     "clay")

Physical.pca <- prcomp(metadata[,Physical], center = F, scale. = F)

screeplot(Physical.pca)
summary (Physical.pca)
Physical <- Physical.pca$x[,1]


#Chemical

Chemical <- c("p", "k", "na", "ca", "mg",
 "al", "h_al", "sb", "t", "uppercase_t", "v", "m")

Chemical.pca <- prcomp(metadata[,Chemical], center = F, scale. = F)

screeplot(Chemical.pca)
summary (Chemical.pca)
Chemical <- Chemical.pca$x[,1]
 
##########################################
################ Make table with PCA scores
###########################################
data.sem <- cbind (metadata[,c(1:5,11,12)],Physical, Chemical)%>% 
mutate(id_short = paste(Locality,Habitat,Plot, sep = ""))
head (data.sem)

####################################################
#                                                  #
###Community structure and soil characteristics ####
####################################################

# multiple regresion with dissimilarity matrix

#soil
head (com16s)
s16.2 <- com16s[substr(rownames(com16s), 1,1) == "S",]

head(s16.2)
tail(s16.2)

jac.dist <- vegdist((s16.2), method = "jaccard")
latlong <- cbind(data.sem$Long,  data.sem$Lat)
#geo.dist1 <- vegdist((latlong), method = "euclidean")
geo.dist <- distm(x = latlong) / 1000
geo.dist <- geo.dist[lower.tri(geo.dist)]

glm16ss <- MRM(jac.dist ~ as.vector(geo.dist) + dist(Chemical) + dist(Physical) + 
                 dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm16ss

#litter
s16.3 <- com16s[substr(rownames(com16s), 1,1) == "L",]
jac.dist <- vegdist((s16.3), method = "jaccard")


glm16sl <- MRM(jac.dist ~ geo.dist + dist(Chemical) + dist(Physical) +
                 dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm16sl


#18S

#soil

s18.2 <- com18s[substr(rownames(com18s), 1, 1) == "S", ]
jac.dist <- vegdist((s18.2), method = "jaccard")

glm18ss <- MRM(jac.dist ~ geo.dist+ dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm18ss

#litter

s18.3 <- com18s[substr(rownames(com18s), 1,1) == "L",]
jac.dist <- vegdist((s18.3), method = "jaccard")

glm18sl <- MRM(jac.dist ~ geo.dist + dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm18sl

################################################################
##################Variance partitioning#########################
################################################################

#####Prokaryote (16S) 

#soil
S16.jac <- vegdist((s16.2), type = 'Jaccard', binary = TRUE)
mod1 <- varpart(S16.jac,  ~  data.sem$Chemical, ~data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)
mod1


#litter
S16.jac2 <- vegdist((s16.3), type = 'Jaccard', binary = TRUE)
mod2 <- varpart(S16.jac2, ~ data.sem$Chemical, ~data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)
mod2


#####Eukaryote (18S)

#soil
S18.jac <- vegdist((s18.2), type = 'Jaccard', binary = TRUE)
mod3 <- varpart(S18.jac, ~ data.sem$Chemical, ~data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)
mod3


#litter
S18.jac2 <- vegdist((s18.3), type = 'Jaccard', binary = TRUE)
mod4 <- varpart(S18.jac2, ~ data.sem$Chemical, ~data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)
mod4

#############Figure 3################

pdf('output_abu/FigS3A_16S_soil.pdf')
plot(mod1)
dev.off()

pdf('output_abu/FigS3B_16S_litter.pdf')
plot(mod2)
dev.off()

pdf('output_abu/FigS3C_18S_soil.pdf')
plot(mod3)
dev.off()

pdf('output_abu/FigS3D_18S_litter.pdf')
plot(mod4)
dev.off()

###########################################################
#                                                         #
###   GLM soil characteristics vs. richness   #############
#                                                         #
###########################################################


#########################################
###############GLM########################
###########################################

##16S

####soil

head(data.sem)

head(bio2)
ncol(bio2)
richness <- c("id_short", "OTUS_18SL", "OTUS_18SS", "OTUS_16SL", "OTUS_16SS")
richness2 <- bio2[,richness]

data.sem.glm <- merge(data.sem, richness2, by = "id_short")
write.csv(data.sem.glm, "output_abu/data.sem.glm.csv")
spatialm <- inla(OTUS_16SS ~ ph + c + Chemical + Physical , family = "poisson", control.predictor = list(compute = T),data = data.sem.glm)
summary(spatialm)
ExpY <- spatialm$summary.fitted.values[,"mean"]
E1 <- (data.sem.glm$OTUS_16SS - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length (spatialm$names.fixed)
Dispersion <- sum (E1^2) / (N - p)

head(data.sem.glm)
data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")
				
data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame (E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")


plot (data.sem.glm$E1 ~ data.sem.glm$Long)


Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort (D), y = (1:length(D))/length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex =1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern (mesh, alpha = 2)

w.index <- inla.spde.make.index(name = 'w',
n.spde = spde$n.spde, n.group = 1, n.repl = 1)

N <- nrow(data.sem.glm)
X <- data.frame(Intercept = rep(1, N),
ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
 Physical.std = data.sem.glm$Physical)

X <- as.data.frame (X)

stk2 <- inla.stack(tag = "Fit", data = list(y = data.sem.glm$OTUS_16SS), 
			A = list(A2, 1), effects = list(w = w.index, X = X))

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std 

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde) 

I216ss <- inla (spatialm2, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I216ss)

I316ss <- inla (spatialm3, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I316ss)

dic <- c(I216ss$dic$dic,I316ss$dic$dic)
waic <- c(I216ss$waic$waic,I316ss$waic$waic)
Z16ss <- cbind(dic, waic)
rownames(Z16ss) <- c("Poisson GLM", "Poisson GLM + SPDE")

##litter

spatialm <- inla(OTUS_16SL ~ ph + c + Chemical + Physical, family = "poisson", control.predictor = list(compute = T),data = data.sem.glm)
summary(spatialm)
ExpY <- spatialm$summary.fitted.values[,"mean"]
E1 <- (data.sem.glm$OTUS_16SL - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length (spatialm$names.fixed)
Dispersion <- sum (E1^2) / (N - p)

head(data.sem.glm)
data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")
				
data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame (E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")


plot (data.sem.glm$E1 ~ data.sem.glm$Long)


Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort (D), y = (1:length(D))/length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex =1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern (mesh, alpha = 2)

w.index <- inla.spde.make.index(name = 'w',
n.spde = spde$n.spde, n.group = 1, n.repl = 1)

N <- nrow(data.sem.glm)
X <- data.frame(Intercept = rep(1, N),
ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
 Physical.std = data.sem.glm$Physical)

X <- as.data.frame (X)

stk2 <- inla.stack(tag = "Fit", data = list(y = data.sem.glm$OTUS_16SL), 
			A = list(A2, 1), effects = list(w = w.index, X = X))

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std 

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde) 

I216sl <- inla (spatialm2, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I216sl)

I316sl <- inla (spatialm3, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I316sl)

dic <- c(I216sl$dic$dic,I316sl$dic$dic)
waic <- c(I216sl$waic$waic,I316sl$waic$waic)
Z16sl <- cbind(dic, waic)
rownames(Z16sl) <- c("Poisson GLM", "Poisson GLM + SPDE")
##18S

####soil

spatialm <- inla(OTUS_18SS~ ph + c + Chemical + Physical , family = "poisson", control.predictor = list(compute = T),data = data.sem.glm)
summary(spatialm)
ExpY <- spatialm$summary.fitted.values[,"mean"]
E1 <- (data.sem.glm$OTUS_18SS - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length (spatialm$names.fixed)
Dispersion <- sum (E1^2) / (N - p)

head(data.sem.glm)
data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")
				
data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame (E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")


plot (data.sem.glm$E1 ~ data.sem.glm$Long)


Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort (D), y = (1:length(D))/length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex =1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern (mesh, alpha = 2)

w.index <- inla.spde.make.index(name = 'w',
n.spde = spde$n.spde, n.group = 1, n.repl = 1)

N <- nrow(data.sem.glm)
X <- data.frame(Intercept = rep(1, N),
ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
 Physical.std = data.sem.glm$Physical)

X <- as.data.frame (X)

stk2 <- inla.stack(tag = "Fit", data = list(y = data.sem.glm$OTUS_18SS), 
			A = list(A2, 1), effects = list(w = w.index, X = X))

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std 

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde) 

I218ss <- inla (spatialm2, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I218ss)

I318ss <- inla (spatialm3, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I318ss)

dic <- c(I218ss$dic$dic,I318ss$dic$dic)
waic <- c(I218ss$waic$waic,I318ss$waic$waic)
Z18ss <- cbind(dic, waic)
rownames(Z18ss) <- c("Poisson GLM", "Poisson GLM + SPDE")


##litter

spatialm <- inla(OTUS_18SL~ ph + c + Chemical + Physical , family = "poisson", control.predictor = list(compute = T),data = data.sem.glm)
summary(spatialm)
ExpY <- spatialm$summary.fitted.values[,"mean"]
E1 <- (data.sem.glm$OTUS_18SL - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length (spatialm$names.fixed)
Dispersion <- sum (E1^2) / (N - p)

head(data.sem.glm)
data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")
				
data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame (E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")


plot (data.sem.glm$E1 ~ data.sem.glm$Long)

Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort (D), y = (1:length(D))/length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex =1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern (mesh, alpha = 2)

w.index <- inla.spde.make.index(name = 'w',
n.spde = spde$n.spde, n.group = 1, n.repl = 1)

N <- nrow(data.sem.glm)
X <- data.frame(Intercept = rep(1, N),
ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
 Physical.std = data.sem.glm$Physical)

X <- as.data.frame (X)

stk2 <- inla.stack(tag = "Fit", data = list(y = data.sem.glm$OTUS_18SL), 
			A = list(A2, 1), effects = list(w = w.index, X = X))

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std 

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde) 

I218sl <- inla (spatialm2, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I218sl)

I318sl <- inla (spatialm3, family = "poisson", data = inla.stack.data(stk2), 
		control.compute = list (dic = T, waic = T),
		control.predictor = list (A = inla.stack.A(stk2)))

summary(I318sl)

dic <- c(I218sl$dic$dic,I318sl$dic$dic)
waic <- c(I218sl$waic$waic,I318sl$waic$waic)
Z18sl <- cbind(dic, waic)
rownames(Z18sl) <- c("Poisson GLM", "Poisson GLM + SPDE")

#####################################################
#                                                   #
#              END GLM                              #
#                                                   #
##########################################################################################################

############################################################################
##Hypothesis 2#######
##############################################################################

##Test the correlation between litter and soil and create figure 2 and figures S1 and S2.

####16S

# bio_long <-
# bio2 %>%
#   select(starts_with("OTU"), id_short) %>%
#   gather(OTU, S, -id_short) %>%
#   mutate(origin = ifelse(substr(OTU, nchar(OTU), nchar(OTU)) == "L", "L", "S")) %>%
#   mutate(OTU = ifelse(grepl("18S", OTU), "OTU_18S", "OTU_16S")) %>%
#   distinct() %>%
#   spread(OTU, S)

#Rsquared 

rsq <- 
  lm(OTUS_16SS ~ OTUS_16SL, data = bio2) %>% 
  glance() %>%
  select(adj.r.squared, p.value) %>% 
  mutate_all(function(x) signif(x,2)) 

pear <- cor(bio2$OTUS_16SS, bio2$OTUS_16SL) 


############Figure4B

bio2 %>%
  ggplot()+
  geom_point(data = bio2, aes(x = OTUS_16SS, y = OTUS_16SL))+
  geom_smooth(data = bio2, aes(x = OTUS_16SS, y = OTUS_16SL), method=lm)+
  xlab("Number of Soil OTU")+
  ylab("Number of Litter OTU")+
  annotate("text", x = 400, y = 400, hjust = 1, label = paste("adj.r.squared: ", rsq$adj.r.squared))+
  annotate("text", x = 50, y = 50, hjust = 0, label = paste("p.value: ", rsq$p.value))+
  coord_fixed()+
  theme_bw()

ggsave("output_abu/figure4B_16slitersoil_correlation.pdf")


###18S data


#Rsquared

rsq <- 
  summary(lm(OTUS_18SS ~ OTUS_18SL, data = bio2)) %>% 
  glance() %>%
  select(adj.r.squared, p.value) %>% 
  mutate_all(function(x) signif(x,2))
  
pear <- cor(bio2$OTUS_18SS, bio2$OTUS_18SL, use = "complete.obs")

##################Figure4D

bio2 %>%
  ggplot()+
  geom_point(data = bio2, aes(x = OTUS_18SS, y = OTUS_18SL))+
 #geom_smooth(data = bio2, aes(x = OTUS_18SS, y = OTUS_18SL), method=lm, se = F)+
  annotate("text", x = 350, y = 350, hjust = 1, label = paste("p.value: ", rsq$p.value))+
  xlab("Number of Soil OTU")+
  ylab("Number of Litter OTU")+
  coord_fixed()+
  theme_bw()

ggsave("output_abu/figure4D_18slitersoil_correlation.pdf")


##########################################
###            NMDS                   ####
##########################################
# 16S two axis
com16s <- com16s[, colSums(com16s) != 0]

s16.nmds <- metaMDS(com16s, distance = "jaccard", k = 2, autotransform = F)
s16.nmds <- metaMDS(com16s, distance = "jaccard", k = 2, autotransform = F, previou.best = s16.nmds)

sc16 <- data.frame(scores(s16.nmds)) %>%
  rownames_to_column(var = "id_short") %>%
  mutate(Kind = ifelse(substr(id_short, 1, 1) == "L", "Litter", "Soil")) %>%
  mutate(id_short = substr(id_short, 2, nchar(id_short))) %>%
  left_join(bio2)

# 18S two axis
com18s <- com18s[, colSums(com18s) != 0]

s18.nmds <- metaMDS(com18s, distance = "jaccard", k = 2, autotransform = F)
s18.nmds <- metaMDS(com18s, distance = "jaccard", k = 2, autotransform = F, previou.best = s18.nmds)

sc18 <- data.frame(scores(s18.nmds)) %>%
  rownames_to_column(var = "id_short") %>%
  mutate(Kind = ifelse(substr(id_short, 1, 1) == "L", "Litter", "Soil")) %>%
  mutate(id_short = substr(id_short, 2, nchar(id_short))) %>%
  left_join(bio2) %>%
  na.omit()

#################################
# Figure 5 soil versus litter##

cols1 <- c("#1f78b4",
"#b2df8a")
## subfigure C
fig5C <- sc16 %>%
  ggplot() +
  geom_point(aes(x = NMDS1, y = NMDS2, colour = Kind, shape = Habitat), size = 3) +
  ggtitle("A - Prokaryote (16S)") +
  scale_fill_discrete(name = "kind") +
scale_color_manual(values=cols1)+
  xlab("") +
  ylab("") +
  theme_bw() +
  theme(legend.position = "bottom")

## subfigure D
fig5D <- sc18 %>%
  ggplot() +
  geom_point(aes(x = NMDS1, y = NMDS2, colour = Kind, shape = Habitat), size = 3) +
  ggtitle("B - Eukaryote (18S)") +
  xlab("") +
  ylab("") +
  scale_fill_discrete(name = "kind") +
scale_color_manual(values=cols1)+
  theme_bw() +
  theme(legend.position = "bottom")

fig5 <- gridExtra::grid.arrange(fig5C, fig5D, bottom = "NMDS1", left = "NMDS2")
ggsave("output_abu/Figure5_NMDS_Soil_Habitat_and_Locality.pdf", plot = fig5, width = 7, height = 8)

###########################################
###Envift for NMDS axis######################
################################################


##envfit
###16SS
envfit(sc16[,c("NMDS1","NMDS2")] ~ as.factor(sc16$Kind))
envfit(sc16[,c("NMDS1","NMDS2")] ~ as.factor(sc16$Habitat))

###18SS
envfit(sc18[,c("NMDS1","NMDS2")] ~ as.factor(sc18$Kind))
envfit(sc18[,c("NMDS1","NMDS2")] ~ as.factor(sc18$Habitat))

################################################
###PERMANOVA analysis on distance matrix#######
################################################

rownames(meta) <- meta$id

##16S
adonis2(com16s ~ kind + habitat,
        permutations = 9999,
        data = meta,
        method = "jaccard")

##18S
adonis2(com18s ~ kind + habitat, 
        permutations = 9999,
        data = meta[meta$id %in% rownames(com18s), ], 
        method = "jaccard")


###############################################################
############Taxonomic assessment###############################
###############################################################

###############################################################
#16S soil
##############################################################
##load and prepare data
###taxonomy
tax <- read_csv("peerj-26029-16S_tax2.csv")


#search for or sign as a sign of database confilcts
sum(grepl("\\|", tax$taxonomy))

###rarefaction
OTU_16S_rt <- t(OTU_16S_mR)
head(OTU_16S_rt)
rar16s <- as.data.frame(OTU_16S_rt) %>% 
  rownames_to_column(var = "OTU.ID")

head(rar16s)


rar16ss  <-  rar16s  %>%
  select(OTU.ID, starts_with("S")) #soil

head(rar16ss)

##merge taxonomy and abundance
rartaxs <- inner_join(rar16ss, tax)
head(rartaxs)

###Identity and number of OTUs not in the taxonomy file
sum(is.na(rartaxs$taxonomy))
sum(is.na(rartaxs$taxonomy))/nrow(rar16ss)
no.tax <- filter(rartaxs, is.na(rartaxs$taxonomy))
write_csv(no.tax, "output_abu/16S_OTUs_rarefied_without_taxonomy.csv")

##Create taxonomy table 

###rarefied
rar_16Ss <- gather(rartaxs, key = site, value = presence, -OTU.ID, -taxonomy)%>%
  filter(!is.na(presence))%>%
  filter(presence > 0)

#### define the taxonomic level to use for all groups,and add to table
tax.list <- c(
  "Proteobacteria", "Bacteroidetes", "Firmicutes", "Actinobacteria", "Acidobacteria", "Planctomycetes",
  "Verrucomicrobia", "Chloroflexi", "archaeota", "others", "unknown"
)

rar_16Ss <- rar_16Ss%>%
  mutate(tax.class = "others")%>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[9], taxonomy), tax.list[9], tax.class))%>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))


#create site to locality and habitat key

skey <- names(rar16ss)%>%
  as_tibble()%>%
  dplyr::rename(site = value)%>%
  mutate(locality = "BC")%>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality))%>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality))%>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality))%>%
  mutate(habitat = "IG")%>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat))%>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat))%>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

####create output tables
head(rar_16Ss)
out_rar <- rar_16Ss%>%
  left_join(skey, by = "site")

head(out_rar)
tail(out_rar)
write_csv(out_rar, "output_abu/16S_taxonomy_per_site_rarefied.csv")
###############################################################
#16S litter
##############################################################

rar16sl  <- rar16s  %>%
  select(OTU.ID, starts_with("L")) #soil

head(rar16sl)
tail(rar16sl)


##merge taxonomy and abundance
rartaxl <- inner_join(rar16sl, tax)

head(rartaxl)


###Identity and number of OTUs not in the taxonomy file
sum(is.na(rartaxl$taxonomy))
sum(is.na(rartaxl$taxonomy))/nrow(tax)
no.tax <- filter(rartaxl, is.na(rartaxl$taxonomy))
write_csv(no.tax, "output_abu/16S_OTUs_rarefied_without_taxonomy_litter.csv")

##Create taxonomy table 

###rarefied
rar_16Sl <- gather(rartaxl, key = site, value = presence, -OTU.ID, -taxonomy)%>%
  filter(!is.na(presence))%>%
  filter(presence > 0)

####define the taxonomic level to use for all groups,and add to table

rar_16Sl <- rar_16Sl%>%
  mutate(tax.class = "others")%>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[9], taxonomy), tax.list[9], tax.class))%>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

#create site to locality and habitat key
head(rar16sl)
skey <- names(rar16sl)%>%
  as_tibble()%>%
  dplyr::rename(site = value)%>%
  mutate(locality = "BC")%>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality))%>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality))%>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality))%>%
  mutate(habitat = "IG")%>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat))%>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat))%>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))


####create output tables
out_rar <- rar_16Sl%>%
  left_join(skey, by = "site")

head(out_rar)
write_csv(out_rar, "output_abu/16S_taxonomy_per_site_rarefied_litter.csv")


############################################################
#18S soil
############################################################
##load and prepare data
###taxonomy
tax <- read_csv("peerj-26029-18S_tax2.csv")
head(tax)

###rabundance
OTU_18S_rt <- t(OTU_18S_mR)
head(OTU_18S_rt)

rar18s <- as.data.frame(OTU_18S_rt) %>% 
  rownames_to_column(var = "OTU.ID")

head(rar18s)

rar18ss  <-  rar18s  %>%
  select(OTU.ID, starts_with("S")) #soil
head(rar18ss)

##rarefied, by mergin refaction results with taxonomy
rartax18ss <- inner_join(rar18ss, tax)
head(rartax18ss)

###Identity and number of OTUs not in the taxonomy file
sum(is.na(rartax18ss$taxonomy))
sum(is.na(rartax18ss$taxonomy))/nrow(rar18ss)
no.tax <- filter(rartax18ss, is.na(rartax18ss$taxonomy))
write_csv(no.tax, "output_abu/18S_OTUs_rarefied_without_taxonomy_18ss.csv")

###rarefied
rar_18Ss <- gather(rartax18ss, key = site, value = presence, -OTU.ID, -taxonomy)%>%
  filter(!is.na(presence))%>%
  filter(presence > 0)

####define the taxonomic level to use for all groups,and add to table
tax.list <- c(
   "Fungi", "Arthropoda",  "Alveolata", "Chloroplastida", "Nematoda", "Cercozoa", "Amoebozoa",
  "Stramenopiles"
)

rar_18Ss <- rar_18Ss%>%
  mutate(tax.class = "others")%>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class))%>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

#create site to locality and habitat key
head(rar18ss)
skey <- names(rar18ss)%>%
  as_tibble()%>%
  dplyr::rename(site = value)%>%
  mutate(locality = "BC")%>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality))%>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality))%>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality))%>%
  mutate(habitat = "IG")%>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat))%>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat))%>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

####create output tables
out_rar <- rar_18Ss%>%
  left_join(skey, by = "site")

head(out_rar)
write_csv(out_rar, "output_abu/18S_taxonomy_per_site_rarefied_18ss.csv")

############################################################
#18S litter
############################################################
###rarefaction

rar18sl  <-  rar18s  %>%
  select(OTU.ID, starts_with("L")) #litter
head(rar18sl)

##rarefied, by mergin refaction results with taxonomy
rartax18sl <- inner_join(rar18sl, tax)

###Identity and number of OTUs not in the taxonomy file
sum(is.na(rartax18sl$taxonomy))
sum(is.na(rartax18sl$taxonomy))/nrow(rar18sl)
no.tax <- filter(rartax18sl, is.na(rartax18sl$taxonomy))
write_csv(no.tax, "output_abu/18S_OTUs_rarefied_without_taxonomy_18sl.csv")

##Create taxonomy table 

###rarefied
rar_18Sl <- gather(rartax18sl, key = site, value = presence, -OTU.ID, -taxonomy)%>%
  filter(!is.na(presence))%>%
  filter(presence > 0)

####define the taxonomic level to use for all groups,and add to table
rar_18Sl <- rar_18Sl%>%
  mutate(tax.class = "others")%>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class))%>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class))%>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

#create site to locality and habitat key
head(rar18sl)
skey <- names(rar18sl)%>%
  as_tibble()%>%
  dplyr::rename(site = value)%>%
  mutate(locality = "BC")%>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality))%>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality))%>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality))%>%
  mutate(habitat = "IG")%>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat))%>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat))%>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

####create output tables
out_rar <- rar_18Sl%>%
  left_join(skey, by = "site")

head(out_rar)
write_csv(out_rar, "output_abu/18S_taxonomy_per_site_rarefied_18sl.csv")

######################################################################
#####################Plots taxonomic groups############################
######################################################################

##########################################
#rarified
##########################################
S16_rars <- read_csv("output_abu/16S_taxonomy_per_site_rarefied.csv")
tail(S16_rars)
S16_rarl <- read_csv("output_abu/16S_taxonomy_per_site_rarefied_litter.csv")
tail(S16_rarl)
S18_rars <- read_csv("output_abu/18S_taxonomy_per_site_rarefied_18ss.csv")
tail(S18_rars)
S18_rarl <- read_csv("output_abu/18S_taxonomy_per_site_rarefied_18sl.csv")
tail(S18_rarl)

#load 16S
soil_16S <- S16_rars%>%
  distinct(OTU.ID, .keep_all = T)%>%
  group_by( tax.class)%>%
  summarise(OTUs.number = sum(presence)) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3))%>%
  mutate(kind = "Soil")%>%
  filter(!is.na(kind))%>%
  dplyr::rename(value = kind)%>%
  mutate(type = "Kind")


soil_16S$tax.class <-factor(soil_16S$tax.class, levels = unique(soil_16S$tax.class[order(soil_16S$fraction.OTUs)]))

litter_16S <- S16_rarl%>%
  distinct(OTU.ID,  .keep_all = T)%>%
  group_by( tax.class)%>%
  summarise(OTUs.number = sum(presence)) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3))%>%
  mutate(kind = "Litter")%>%
  filter(!is.na(kind))%>%
  dplyr::rename(value = kind)%>%
  mutate(type = "Kind")

litter_16S$tax.class <- factor(litter_16S$tax.class, 
                               levels = unique(litter_16S$tax.class[order(litter_16S$fraction.OTUs)]))

plo_16S <- bind_rows(soil_16S, litter_16S)%>%
  mutate(marker = "16S")

tail(plo_16S)

plo_16S$tax.class <-factor(plo_16S$tax.class, levels = unique(plo_16S$tax.class[order(plo_16S$fraction.OTUs)]))

#load 18S
soil_18S <- S18_rars%>%
  distinct(OTU.ID, .keep_all = T)%>%
  group_by( tax.class)%>%
  summarise(OTUs.number = sum(presence)) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3))%>%
  mutate(kind = "Soil")%>%
  filter(!is.na(kind))%>%
  dplyr::rename(value = kind)%>%
  mutate(type = "Kind")

litter_18S <- S18_rarl%>%
  distinct(OTU.ID,  .keep_all = T)%>%
  group_by( tax.class)%>%
  summarise(OTUs.number = sum(presence)) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3))%>%
  mutate(kind = "Litter")%>%
  filter(!is.na(kind))%>%
  dplyr::rename(value = kind)%>%
  mutate(type = "Kind")

litter_18S$tax.class <- factor(litter_18S$tax.class, 
                               levels = unique(litter_18S$tax.class[order(litter_18S$fraction.OTUs)]))

plo_18S <- bind_rows(soil_18S, litter_18S)%>%
  mutate(marker = "18S")

plo_18S$tax.class <-factor(plo_18S$tax.class, levels = unique(plo_18S$tax.class[order(plo_18S$fraction.OTUs)]))


#number 

S16 <- ggplot(data = plo_16S, aes(x= value, y=OTUs.number, fill=tax.class))+
  geom_bar(stat = "identity", position=position_dodge())+
  scale_fill_brewer(palette = "Set3", name = "Taxa Prokaryote (16S)")+
  ylab("Number of OTUs")+
  xlab("")+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        legend.position = "right")+
  facet_grid(~type, scales = "free_x")


S18 <- ggplot(data = plo_18S, aes(x= value, y=OTUs.number, fill=tax.class))+
  geom_bar(stat = "identity", position=position_dodge())+
  scale_fill_brewer(palette = "Paired", name = "Taxa Eukaryote (18S)")+
  ylab("Number of OTUs")+
  xlab("")+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        legend.position = "right")+
  facet_grid(~type, scales = "free_x")


out <- grid.arrange(S16, S18, nrow = 2)

ggsave("output_abu/Fig6C&D_number_of_OTUs_rarefied.pdf", plot = out)

#fraction
S16 <- ggplot(data = plo_16S, aes(x= value, y=fraction.OTUs, fill=tax.class))+
  geom_bar(stat = "identity", position=position_dodge())+
  scale_fill_brewer(palette = "Set3", name = "Taxa 16S")+
  ylab("Fraction of OTUs")+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        legend.position = "right")+
  facet_grid(~type, scales = "free_x")


S18 <- ggplot(data = plo_18S, aes(x= value, y=fraction.OTUs , fill=tax.class))+
  geom_bar(stat = "identity", position=position_dodge())+
  scale_fill_brewer(palette = "Paired",name = "Taxon 18S")+
  ylab("Fraction of OTUs")+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        legend.position = "right")+
  facet_grid(~type, scales = "free_x")


out <- grid.arrange(S16, S18, nrow = 2)


ggsave("output_abu/Fig6CandD_fraction_of_OTUs_rarefied.pdf", plot = out)