title:"Analyses_amazon_soil_metabarcoding"
author:"Camila Ritter, Alex Zizka, Fabian Roger and Christopher Barnes"

##################################################
################### Install Packages###############
##################################################

# install.packages ("tidyverse", dependencies = T)
# install.packages("ggplot2", dependencies = T)
# install.packages ("gridExtra", dependencies = T)
# install.packages ("ggfortify", dependencies = T)
# install.packages ("vegan", dependencies = T)
# install.packages ("geosphere", dependencies = T)
# install.packages ("viridis", dependencies = T)
# install.packages ("Hmisc", dependencies = T)
# install.packages("mice", dependencies = T)
# install.packages("INLA", repos="https://inla.r-inla-download.org/R/stable", dep=TRUE)
# install.packages("gstat", dependencies = T)
# install.packages("lazyeval")
# install.packages("broom")
# install.packages("ecodist")
# install.packages("splancs", dependencies = T)
# source("https://bioconductor.org/biocLite.R")
# biocLite("DESeq2")
# install.packages("entropart")
# biocLite("phyloseq")

########### Library packages

#' if some packages (at the bottom of the list) fail to load with
#' "maximal number of DLLs reached" you have to set a higher value for the
#' R_MAX_NUM_DLLS=150 variable in the Renviron file. This is because some packages
#' loaded in this script have a lot of dependencies and the total number of loaded packages
#' becomes higher than allowed by default
#'
#' you can find the Renviron file under R.home() /etc/
#' setting R_MAX_NUM_DLLS=200 (pasting this into the Renviron file, saving and restarting R)
#' solves the issue for me.
#'
#' R version 3.4.3 (2017-11-30)
#' Platform: x86_64-apple-darwin15.6.0 (64-bit)
#' Running under: macOS High Sierra 10.13.3
#'

library(tidyverse)
library(gridExtra)
library(ggfortify)
library(ggplot2)
library(vegan)
library(geosphere)
library(viridis)
library(Hmisc)
library(mice)
library(stringr)
library(stringi)
library(ecodist)
library(RColorBrewer)
library(INLA)
library(sp)
library(gstat)
library(splancs)
library(broom)
library (ecodist)


### make directory to store output
dir.create("output_p_a")

######################################################
###################### Import data#####################
######################################################
#### Prokaryote (16S)#######

# Read in metadata
meta <- read.csv("peerj-26029-metadata.csv")

################# Richness 16S##################################
# read in 16S otu table
OTU_16S <- read.csv("peerj-26029-16S_all.csv")

# transform to matrix
OTU_M16S <- as.matrix(OTU_16S[, -1])
dimnames(OTU_M16S) <- list(OTU_16S[, 1], colnames(OTU_16S[, -1]))

# read depth per sample
plot(log(sort(colSums(OTU_M16S), decreasing = T), base = 10),
  main = "seqeuncing depth 18S",
  ylab = "log_10(depth)",
  xlab = "sample by rank",
  ylim = c(0, 5)
)


# calculate rarefied richness (based on smallest sample)
rar_Richness_16S <- rarefy(t(OTU_M16S), sample = min(colSums(OTU_M16S)), se = F)

# make richness to dataframe
rar_Richness_16S <-
  rar_Richness_16S %>%
  data.frame(S = .) %>%
  rownames_to_column(var = "id")

# export richness for 16S
write.csv(rar_Richness_16S, "output_p_a/rich16S.csv")

############ Figure S1A

pdf("output_p_a/FigS1A_16S_raref.pdf")
rarecurve(t(OTU_M16S), step = 1000, main = "Prokaryotes - 16S", ylab = "OTU", label = FALSE)
abline(v = min(colSums(OTU_M16S)), col = "red", lty = 2)
abline()
dev.off()


################### Richness 18S###################################

# read in OTU table
OTU_18S <- read.csv("peerj-26029-18S_all.csv")

# transform to matrix
OTU_M18S <- as.matrix(OTU_18S[, -1])

dimnames(OTU_M18S) <- list(OTU_18S[, 1], colnames(OTU_18S[, -1]))

# plot sampling depth
plot(log(sort(colSums(OTU_M18S), decreasing = T), base = 10),
  main = "seqeuncing depth 18S",
  ylab = "log_10(depth)",
  xlab = "sample by rank",
  ylim = c(0, 6)
)

#' The lowest sample has 1000x fewer reads than the remaining samples.
#' We exclude it from further analysis
#' the sample is:
meta[meta$id == names(which(colSums(OTU_M18S) == min(colSums(OTU_M18S)))), 1:8]

# exclude sample
OTU_M18S <- OTU_M18S[, which(colnames(OTU_M18S) != "SJAUTFP1")]

# calculate rarefied richness based on (now) smallest sample
rar_Richness_18S <- rarefy(t(OTU_M18S), sample = min(colSums(OTU_M18S)), se = F)

rar_Richness_18S <-
  rar_Richness_18S %>%
  data.frame(S = .) %>%
  rownames_to_column(var = "id")

# export rarefied richness for 18S
write.csv(rar_Richness_18S, "output_p_a/rich18S.csv")

############### FigureS1B

pdf("output_p_a/FigS1B_18S_raref.pdf")
rarecurve(t(OTU_M18S),
  step = 1000,
  main = "Eukaryotes - 18S",
  label = FALSE, ylab = "OTU"
)
abline(v = min(colSums(OTU_M18S)), col = "red", lty = 2)
dev.off()

##############################################################
####################### Making tables##########################
##############################################################
#########################################################################
################ Making the presence/absence matrix#######################
#########################################################################

# rarefy down to lowest sample
com16s <- rrarefy(t(OTU_M16S), sample = min(colSums(OTU_M16S)))

# transform to presence/absence matrix
com16s[com16s > 1] <- 1

# export
write.csv(com16s, "output_p_a/otu_16s_presence_absence.csv")

# rarefy
com18s <- rrarefy(t(OTU_M18S), sample = min(colSums(OTU_M18S)))

# presence-absence
com18s [com18s > 1] <- 1

# export
write.csv(com18s, "output_p_a/otu_18s_presence_absence.csv")

########### Making a single table with OTUs rarefied numbers and metadata#############
# Richness + geographic + edaphic soil data

# add metadata
dat <- meta

# scale all numeric variables
dat <- mutate_at(dat, .vars = c(8:27), funs(scale(.) %>% as.vector()))

# capitalize colnames
names(dat) <- capitalize(names(dat))


OTUS_18SL <- filter(rar_Richness_18S, substr(id, 1, 1) == "L") %>% # filter litter samples
  dplyr::rename(OTUS_18SL = S) %>%
  mutate(id_short = substr(id, 2, nchar(id))) %>% # id withou litter soil prefix
  select(-id)

OTUS_18SS <- filter(rar_Richness_18S, substr(id, 1, 1) == "S") %>% # filter litter samples
  dplyr::rename(OTUS_18SS = S) %>%
  mutate(id_short = substr(id, 2, nchar(id))) %>% # id withou litter soil prefix
  select(-id)

OTUS_16SL <- filter(rar_Richness_16S, substr(id, 1, 1) == "L") %>% # filter litter samples
  dplyr::rename(OTUS_16SL = S) %>%
  mutate(id_short = substr(id, 2, nchar(id))) %>% # id withou litter soil prefix
  select(-id)

OTUS_16SS <- filter(rar_Richness_16S, substr(id, 1, 1) == "S") %>% # filter litter samples
  dplyr::rename(OTUS_16SS = S) %>%
  mutate(id_short = substr(id, 2, nchar(id))) %>% # id withou litter soil prefix
  select(-id)

dat2 <- dat[1:39, ]
bio2 <- dat2 %>%
  mutate_at(.vars = c(1:5), as.character) %>%
  mutate(id_short = substr(Id, 2, nchar(Id))) %>%
  select(-Id, -Kind)

bio2 <- left_join(bio2, OTUS_18SL, by = "id_short")
bio2 <- left_join(bio2, OTUS_18SS, by = "id_short")
bio2 <- left_join(bio2, OTUS_16SL, by = "id_short")
bio2 <- left_join(bio2, OTUS_16SS, by = "id_short")

# write.csv(bio2, "output_p_a/bio2.csv")
#########################################################
####### Hypothesis 1, soil variables effect###############
#########################################################
soil_phy_che <- read.csv("peerj-26029-metadata.csv")

# Impute missing data
tempData <- soil_phy_che %>%
  select(8:27) %>% # all the soil columns
  distinct() %>%
  mice(m = 5, maxit = 50, meth = "pmm")

completedData <- complete(tempData, 1) # takes our permutation 1 from the list
completed.metadata <- as.data.frame(completedData) # makes it a dataframe that you can use further downstream
head(completed.metadata)
completed.metadata <- data.frame(scale(completed.metadata))

metadata <- cbind(bio2[1:5], completed.metadata)

# Principle component analyses
## Physical
Physical <- c(
  "coarse_sand_fraction", "slim_sand_fraction",
  "total_sand_fraction", "silt", "clay"
)

Physical.pca <- prcomp(metadata[, Physical], center = F, scale. = F)

screeplot(Physical.pca)
summary(Physical.pca)
Physical <- Physical.pca$x[, 1]

## Chemical
Chemical <- c(
  "p", "k", "na", "ca", "mg",
  "al", "h_al", "sb", "t", "uppercase_t", "v", "m"
)

Chemical.pca <- prcomp(metadata[, Chemical], center = F, scale. = F)

screeplot(Chemical.pca)
summary(Chemical.pca)
Chemical <- Chemical.pca$x[, 1]

##################################
########################## Figure2
##################################

Fig2A <- autoplot(Physical.pca,
  data = metadata, colour = "Habitat", shape = "Habitat", size = 3, asp=1,
  loadings = T, frame = T, loadings.colour = "blue", plot.background = element_blank(),
  loadings.label = T, loadings.label.size = 5
) +
  theme(
    plot.background = element_blank(),
    panel.background = element_rect(fill = "transparent", color = "black", size = 1),
    legend.text = element_text(),
    legend.key = element_blank()
  )

ggsave("output_p_a/Fig2A.pdf", plot = Fig2A, width = 7, height = 8)

Fig2B <- autoplot(Chemical.pca,
  data = metadata, colour = "Habitat", shape = "Habitat", size = 3,asp=1,
  loadings = T, frame = T, loadings.colour = "blue", plot.background = element_blank(),
  loadings.label = T, loadings.label.size = 3
) +
  theme(
    plot.background = element_blank(),
    panel.background = element_rect(fill = "transparent", color = "black", size = 1),
    legend.text = element_text(),
    legend.key = element_blank()
  )

ggsave("output_p_a/Fig2B.pdf", plot = Fig2B, width = 7, height = 8)

##########################################
################ Make table with PCA scores
###########################################
data.sem <- cbind(metadata[, c(1:5, 11, 12)], Physical, Chemical) %>%
  mutate(id_short = paste(Locality, Habitat, Plot, sep = ""))

####################################################
### Community structure and soil characteristics ####
####################################################


# multiple regresioon with dissimilarity matrix

#16S

#soil
head (com16s)
s16.2 <- com16s[substr(rownames(com16s), 1,1) == "S",]

head(s16.2)
tail(s16.2)

jac.dist <- vegdist((s16.2), method = "jaccard")
latlong <- cbind(data.sem$Long,  data.sem$Lat)
geo.dist <- vegdist((latlong), method = "euclidean")

glm16ss <- MRM(jac.dist ~ geo.dist + dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm16ss

#litter
s16.3 <- com16s[substr(rownames(com16s), 1,1) == "L",]
jac.dist <- vegdist((s16.3), method = "jaccard")


glm16sl <- MRM(jac.dist ~ geo.dist + dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm16sl

#18S

#soil

s18.2 <- com18s[substr(rownames(com18s), 1, 1) == "S", ]
jac.dist <- vegdist((s18.2), method = "jaccard")

data.sem.18s <- data.sem[!(rownames(data.sem) == "JAUTFP1"), ]
data.sem.18s <- data.sem[data.sem$id_short != "JAUTFP1", ]
latlong.18s <- cbind(data.sem.18s$Long,  data.sem.18s$Lat)
geo.dist.18s <- vegdist((latlong.18s), method = "euclidean")


glm18ss <- MRM(jac.dist ~ geo.dist.18s+ dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem.18s, nperm=10000, mrank = T)
glm18ss

#litter

s18.3 <- com18s[substr(rownames(com18s), 1,1) == "L",]
jac.dist <- vegdist((s18.3), method = "jaccard")

glm18sl <- MRM(jac.dist ~ geo.dist + dist(Chemical) + dist(Physical) + dist(ph)+ dist(c),data=data.sem, nperm=10000, mrank = T)
glm18sl

################################################################
################## Variance partitioning#########################
################################################################

# Prokaryote (16S)
## soil
S16.jac <- vegdist((s16.2), type = "Jaccard", binary = TRUE)
mod1 <- varpart(S16.jac, ~ data.sem$Chemical, ~ data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)

## litter
S16.jac2 <- vegdist((s16.3), type = "Jaccard", binary = TRUE)
mod2 <- varpart(S16.jac2, ~ data.sem$Chemical, ~ data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)

# Eukaryote (18S)
## soil
S18.jac <- vegdist((s18.2), type = "Jaccard", binary = TRUE)
mod3 <- varpart(S18.jac, ~ data.sem18s$Chemical, ~ data.sem18s$Physical, ~ data.sem18s$ph, ~ data.sem18s$c)

## litter
S18.jac2 <- vegdist((s18.3), type = "Jaccard", binary = TRUE)
mod4 <- varpart(S18.jac2, ~ data.sem$Chemical, ~ data.sem$Physical, ~ data.sem$ph, ~ data.sem$c)

############# Figure 3################

pdf("output_p_a/Fig3A_16S_soil.pdf")
plot(mod1)
dev.off()

pdf("output_p_a/Fig3B_16S_litter.pdf")
plot(mod2)
dev.off()

pdf("output_p_a/Fig3C_18S_soil.pdf")
plot(mod3)
dev.off()

pdf("output_p_a/Fig3D_18S_litter.pdf")
plot(mod4)
dev.off()


###########################################################
#                                                         #
###   GLM soil characteristics vs. richness   #############
#                                                         #
###########################################################
# GLM
## 16S
### soil
richness <- c("id_short", "OTUS_18SL", "OTUS_18SS", "OTUS_16SL", "OTUS_16SS")
richness2 <- bio2[, richness]

data.sem.glm <- merge(data.sem, richness2, by = "id_short")

spatialm <- inla(OTUS_16SS ~ ph + c + Chemical + Physical,
  family = "poisson",
  control.predictor = list(compute = T), data = data.sem.glm
)

ExpY <- spatialm$summary.fitted.values[, "mean"]
E1 <- (data.sem.glm$OTUS_16SS - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length(spatialm$names.fixed)
Dispersion <- sum(E1^2) / (N - p)

data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")

data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame(E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")

plot(data.sem.glm$E1 ~ data.sem.glm$Long)

Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort(D), y = (1:length(D)) / length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex = 1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern(mesh, alpha = 2)

w.index <- inla.spde.make.index(
  name = "w",
  n.spde = spde$n.spde, n.group = 1, n.repl = 1
)

N <- nrow(data.sem.glm)
X <- data.frame(
  Intercept = rep(1, N),
  ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
  Physical.std = data.sem.glm$Physical
)

X <- as.data.frame(X)

stk2 <- inla.stack(
  tag = "Fit", data = list(y = data.sem.glm$OTUS_16SS),
  A = list(A2, 1), effects = list(w = w.index, X = X)
)

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde)

I2s16s <- inla(spatialm2,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

I3s16s <- inla(spatialm3,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

dic <- c(I2s16s$dic$dic, I3s16s$dic$dic)
waic <- c(I2s16s$waic$waic, I3s16s$waic$waic)
Zs16s <- cbind(dic, waic)
rownames(Zs16s) <- c("Poisson GLM", "Poisson GLM + SPDE")

### litter
spatialm <- inla(OTUS_16SL ~ ph + c + Chemical + Physical, family = "poisson", control.predictor = list(compute = T), data = data.sem.glm)

ExpY <- spatialm$summary.fitted.values[, "mean"]
E1 <- (data.sem.glm$OTUS_16SL - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length(spatialm$names.fixed)
Dispersion <- sum(E1^2) / (N - p)

data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")

data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame(E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")

plot(data.sem.glm$E1 ~ data.sem.glm$Long)

Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort(D), y = (1:length(D)) / length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex = 1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)

spde <- inla.spde2.matern(mesh, alpha = 2)

w.index <- inla.spde.make.index(
  name = "w",
  n.spde = spde$n.spde, n.group = 1, n.repl = 1
)

N <- nrow(data.sem.glm)
X <- data.frame(
  Intercept = rep(1, N),
  ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
  Physical.std = data.sem.glm$Physical
)

X <- as.data.frame(X)

stk2 <- inla.stack(
  tag = "Fit", data = list(y = data.sem.glm$OTUS_16SL),
  A = list(A2, 1), effects = list(w = w.index, X = X)
)

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde)

I2l16s <- inla(spatialm2,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

I3l16s <- inla(spatialm3,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

dic <- c(I2l16s$dic$dic, I3l16s$dic$dic)
waic <- c(I2l16s$waic$waic, I3l16s$waic$waic)
Zl16s <- cbind(dic, waic)
rownames(Zl16s) <- c("Poisson GLM", "Poisson GLM + SPDE")

## 18S
### soil
spatialm <- inla(OTUS_18SS ~ ph + c + Chemical + Physical, family = "poisson", control.predictor = list(compute = T), data = data.sem.glm)
summary(spatialm)
ExpY <- spatialm$summary.fitted.values[, "mean"]
E1 <- (data.sem.glm$OTUS_18SS - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length(spatialm$names.fixed)
Dispersion <- sum(E1^2) / (N - p)

data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")

data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame(E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")

plot(data.sem.glm$E1 ~ data.sem.glm$Long)

Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort(D), y = (1:length(D)) / length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex = 1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern(mesh, alpha = 2)

w.index <- inla.spde.make.index(
  name = "w",
  n.spde = spde$n.spde, n.group = 1, n.repl = 1
)

N <- nrow(data.sem.glm)
X <- data.frame(
  Intercept = rep(1, N),
  ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
  Physical.std = data.sem.glm$Physical
)

X <- as.data.frame(X)

stk2 <- inla.stack(
  tag = "Fit", data = list(y = data.sem.glm$OTUS_18SS),
  A = list(A2, 1), effects = list(w = w.index, X = X)
)

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde)

I2s18s <- inla(spatialm2,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

I3s18s <- inla(spatialm3,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

dic <- c(I2s18s$dic$dic, I3s18s$dic$dic)
waic <- c(I2s18s$waic$waic, I3s18s$waic$waic)
Zs18s <- cbind(dic, waic)
rownames(Zs18s) <- c("Poisson GLM", "Poisson GLM + SPDE")


### litter
spatialm <- inla(OTUS_18SL ~ ph + c + Chemical + Physical, family = "poisson", control.predictor = list(compute = T), data = data.sem.glm)

ExpY <- spatialm$summary.fitted.values[, "mean"]
E1 <- (data.sem.glm$OTUS_18SL - ExpY) / sqrt(ExpY)
N <- nrow(data.sem.glm)
p <- length(spatialm$names.fixed)
Dispersion <- sum(E1^2) / (N - p)

head(data.sem.glm)
data.sem.glm$E1 <- E1
MyVar <- c("ph", "c", "Chemical", "Physical")

data.sem.glm$Xkm <- data.sem.glm$Long
data.sem.glm$Ykm <- data.sem.glm$Lat
mydata <- data.frame(E1, data.sem.glm$Xkm, data.sem.glm$Ykm)
coordinates(mydata) <- c("data.sem.glm.Xkm", "data.sem.glm.Ykm")

plot(data.sem.glm$E1 ~ data.sem.glm$Long)

Loc <- cbind(data.sem.glm$Xkm, data.sem.glm$Ykm)
D <- dist(Loc)
hist(D, freq = T, main = "")

plot(x = sort(D), y = (1:length(D)) / length(D), type = "l")

convHull <- inla.nonconvex.hull(Loc)
mesh1 <- inla.mesh.2d(boundary = convHull, max.edge = c(100.5))
plot(mesh1)
points(Loc, coil = 1, pch = 16, cex = 1)

mesh <- mesh1
A2 <- inla.spde.make.A(mesh1, loc = Loc)
dim(A2)

spde <- inla.spde2.matern(mesh, alpha = 2)

w.index <- inla.spde.make.index(
  name = "w",
  n.spde = spde$n.spde, n.group = 1, n.repl = 1
)

N <- nrow(data.sem.glm)
X <- data.frame(
  Intercept = rep(1, N),
  ph.std = data.sem.glm$ph, C.std = data.sem.glm$c, Chemical.std = data.sem.glm$Chemical,
  Physical.std = data.sem.glm$Physical
)

X <- as.data.frame(X)

stk2 <- inla.stack(
  tag = "Fit", data = list(y = data.sem.glm$OTUS_18SL),
  A = list(A2, 1), effects = list(w = w.index, X = X)
)

spatialm2 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std

spatialm3 <- y ~ -1 + Intercept + ph.std + C.std + Chemical.std + Physical.std + f(w, model = spde)

I2l18s <- inla(spatialm2,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

I3l18s <- inla(spatialm3,
  family = "poisson", data = inla.stack.data(stk2),
  control.compute = list(dic = T, waic = T),
  control.predictor = list(A = inla.stack.A(stk2))
)

dic <- c(I2l18s$dic$dic, I3l18s$dic$dic)
waic <- c(I2l18s$waic$waic, I3l18s$waic$waic)
Zl18s <- cbind(dic, waic)
rownames(Zl18s) <- c("Poisson GLM", "Poisson GLM + SPDE")
#####################################################
#                                                   #
#              END GLM                              #
#                                                   #
#####################################################
############################################################################
## Hypothesis 2#######
##############################################################################

## Test the correlation between litter and soil and create figure 2 and figures S1 and S2.
#### 16S
# Rsquared
rsq <-
  lm(OTUS_16SS ~ OTUS_16SL, data = bio2) %>%
  glance() %>%
  select(adj.r.squared, p.value) %>%
  mutate_all(function(x) signif(x, 2))

pear <- cor(bio2$OTUS_16SS, bio2$OTUS_16SL)


############ Figure4
bio2 %>%
  ggplot() +
  geom_point(data = bio2, aes(x = OTUS_16SS, y = OTUS_16SL)) +
  geom_smooth(data = bio2, aes(x = OTUS_16SS, y = OTUS_16SL), method = lm) +
  xlab("Number of Soil OTU") +
  ylab("Number of Litter OTU") +
  annotate("text", x = 500, y = 1600, hjust = 0, label = paste("adj.r.squared: ", rsq$adj.r.squared)) +
  annotate("text", x = 500, y = 1500, hjust = 0, label = paste("p.value: ", rsq$p.value)) +
  coord_fixed() +
  theme_bw()

ggsave("output_p_a/figure4A_16slitersoil_correlation.pdf")


## 18S data
# Rsquared
rsq <-
  summary(lm(OTUS_18SS ~ OTUS_18SL, data = bio2)) %>%
  glance() %>%
  select(adj.r.squared, p.value) %>%
  mutate_all(function(x) signif(x, 2))

pear <- cor(bio2$OTUS_18SS, bio2$OTUS_18SL, use = "complete.obs")

################## Figure4C
bio2 %>%
  ggplot() +
  geom_point(data = bio2, aes(x = OTUS_18SS, y = OTUS_18SL)) +
  # geom_smooth(data = bio2, aes(x = OTUS_18SS, y = OTUS_18SL), method=lm, se = F)+
  annotate("text", x = 1100, y = 1500, hjust = 0, label = paste("p.value: ", rsq$p.value)) +
  xlab("Number of Soil OTU") +
  ylab("Number of Liter OTU") +
  coord_fixed() +
  theme_bw()

ggsave("output_p_a/figure4C_18slitersoil_correlation.pdf")

##################################
############ Supplementary material
##################################

### Correlation without outlier
bio3 <- filter(bio2, OTUS_16SS != min(OTUS_16SS))

#### 16S
# Rsquared
rsq <-
  summary(lm(OTUS_16SS ~ OTUS_16SL, data = bio3)) %>%
  glance() %>%
  select(adj.r.squared, p.value) %>%
  mutate_all(function(x) signif(x, 2))

pear <- cor(bio3$OTUS_16SS, bio3$OTUS_16SL, use = "complete.obs")

######### FigureS3A
bio3 %>%
  ggplot() +
  geom_point(data = bio3, aes(x = OTUS_16SS, y = OTUS_16SL)) +
  geom_smooth(data = bio3, aes(x = OTUS_16SS, y = OTUS_16SL), method = lm) +
  annotate("text", x = 800, y = 1600, hjust = 0, label = paste("adj.r.squared: ", rsq$adj.r.squared)) +
  annotate("text", x = 800, y = 1500, hjust = 0, label = paste("p.value: ", rsq$p.value)) +
  xlab("Number of Soil OTU") +
  ylab("Number of Litter OTU") +
  coord_fixed() +
  theme_bw()

ggsave("output_p_a/figureS3A_16slitersoil_correlation.pdf")


### 18S data
# Rsquared
rsq <-
  summary(lm(OTUS_18SS ~ OTUS_18SL, data = bio3)) %>%
  glance() %>%
  select(adj.r.squared, p.value) %>%
  mutate_all(function(x) signif(x, 2))

pear <- cor(bio3$OTUS_18SS, bio3$OTUS_18SL, use = "complete.obs")

################ FiguresS3C
bio3 %>%
  ggplot() +
  geom_point(data = bio3, aes(x = OTUS_18SL, y = OTUS_18SS)) +
  # geom_smooth(data = bio3, aes(x = OTUS_18SL, y = OTUS_18SS), method=lm)+
  annotate("text", x = 1100, y = 1500, hjust = 0, label = paste("p.value: ", rsq$p.value)) +
  xlab("Number of Soil OTU") +
  ylab("Number of Liter OTU") +
  coord_fixed() +
  theme_bw()

ggsave("output_p_a/figureS3C_18slitersoil_correlation.pdf")

##########################################
###            NMDS                   ####
##########################################
# 16S two axis
com16s <- com16s[, colSums(com16s) != 0]

s16.nmds <- metaMDS(com16s, distance = "jaccard", k = 2, autotransform = F)
s16.nmds <- metaMDS(com16s, distance = "jaccard", k = 2, autotransform = F, previou.best = s16.nmds)

sc16 <- data.frame(scores(s16.nmds)) %>%
  rownames_to_column(var = "id_short") %>%
  mutate(Kind = ifelse(substr(id_short, 1, 1) == "L", "Litter", "Soil")) %>%
  mutate(id_short = substr(id_short, 2, nchar(id_short))) %>%
  left_join(bio2)

# 18S two axis
com18s <- com18s[, colSums(com18s) != 0]

s18.nmds <- metaMDS(com18s, distance = "jaccard", k = 2, autotransform = F)
s18.nmds <- metaMDS(com18s, distance = "jaccard", k = 2, autotransform = F, previou.best = s18.nmds)

sc18 <- data.frame(scores(s18.nmds)) %>%
  rownames_to_column(var = "id_short") %>%
  mutate(Kind = ifelse(substr(id_short, 1, 1) == "L", "Litter", "Soil")) %>%
  mutate(id_short = substr(id_short, 2, nchar(id_short))) %>%
  left_join(bio2) %>%
  na.omit()

#################################
# Figure 5 soil versus litter##

cols1 <- c("#1f78b4",
"#b2df8a")
## subfigure A
fig5A <- sc16 %>%
  ggplot() +
  geom_point(aes(x = NMDS1, y = NMDS2, colour = Kind, shape = Habitat), size = 3) +
  ggtitle("A - Prokaryote (16S)") +
  scale_fill_discrete(name = "kind") +
scale_color_manual(values=cols1)+
  xlab("") +
  ylab("") +
  theme_bw() +
  theme(legend.position = "bottom")

## subfigure B
fig5B <- sc18 %>%
  ggplot() +
  geom_point(aes(x = NMDS1, y = NMDS2, colour = Kind, shape = Habitat), size = 3) +
  ggtitle("B - Eukaryote (18S)") +
  xlab("") +
  ylab("") +
  scale_fill_discrete(name = "kind") +
scale_color_manual(values=cols1)+
  theme_bw() +
  theme(legend.position = "bottom")

fig5 <- gridExtra::grid.arrange(fig5A, fig5B, bottom = "NMDS1", left = "NMDS2")
ggsave("output_p_a/Figure5_NMDS_Soil_Habitat_and_Locality.pdf", plot = fig5, width = 7, height = 8)

###########################################
### Envift for NMDS axis######################
################################################
## envfit
### 16SS
envfit(sc16[, c("NMDS1", "NMDS2")] ~ as.factor(sc16$Kind))
envfit(sc16[, c("NMDS1", "NMDS2")] ~ as.factor(sc16$Habitat))

### 18SS
envfit(sc18[, c("NMDS1", "NMDS2")] ~ as.factor(sc18$Kind))
envfit(sc18[, c("NMDS1", "NMDS2")] ~ as.factor(sc18$Habitat))

################################################
### PERMANOVA analysis on distance matrix#######
################################################

rownames(meta) <- meta$id

## 16S
adonis2(com16s ~ kind + habitat,
  permutations = 9999,
  data = meta,
  method = "jaccard"
)

## 18S
adonis2(com18s ~ kind + habitat,
  permutations = 9999,
  data = meta[meta$id %in% rownames(com18s), ],
  method = "jaccard"
)

###############################################################
############ Taxonomic assessment###############################
###############################################################
###############################################################
# 16S soil
##############################################################
## load and prepare data
### taxonomy
tax <- read_csv("peerj-26029-16S_tax2.csv")

# search for or sign as a sign of database confilcts
sum(grepl("\\|", tax$taxonomy))

### rarefaction
rar16s <- as.data.frame(t(com16s)) %>%
  rownames_to_column(var = "OTU.ID")

rar16ss <- rar16s %>%
  select(OTU.ID, starts_with("S")) # soil

## merge taxonomy and abundance
rartaxs <- inner_join(rar16ss, tax)

### Identity and number of OTUs not in the taxonomy file
no.tax <- filter(rartaxs, is.na(rartaxs$taxonomy))
write_csv(no.tax, "output_p_a/16S_OTUs_rarefied_without_taxonomy.csv")

## Create taxonomy table
### rarefied
rar_16Ss <- gather(rartaxs, key = site, value = presence, -OTU.ID, -taxonomy) %>%
  filter(!is.na(presence)) %>%
  filter(presence > 0)

#### define the taxonomic level to use for all groups,and add to table
tax.list <- c(
  "Proteobacteria", "Bacteroidetes", "Firmicutes", "Actinobacteria", "Acidobacteria", "Planctomycetes",
  "Verrucomicrobia", "Chloroflexi", "archaeota", "others", "unknown"
)

rar_16Ss <- rar_16Ss %>%
  mutate(tax.class = "others") %>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[9], taxonomy), tax.list[9], tax.class)) %>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

# create site to locality and habitat key

skey <- names(rar16ss) %>%
  as_tibble() %>%
  dplyr::rename(site = value) %>%
  mutate(locality = "BC") %>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality)) %>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality)) %>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality)) %>%
  mutate(habitat = "IG") %>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat)) %>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat)) %>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

#### create output tables
out_rar <- rar_16Ss %>%
  left_join(skey, by = "site")

write_csv(out_rar, "output_p_a/16S_taxonomy_per_site_rarefied.csv")
###############################################################
# 16S litter
##############################################################
rar16sl <- rar16s %>%
  select(OTU.ID, starts_with("L")) # soil

## merge taxonomy and abundance
rartaxl <- inner_join(rar16sl, tax)

### Identity and number of OTUs not in the taxonomy file
sum(is.na(rartaxl$taxonomy))
sum(is.na(rartaxl$taxonomy)) / nrow(tax)
no.tax <- filter(rartaxl, is.na(rartaxl$taxonomy))
write_csv(no.tax, "output_p_a/16S_OTUs_rarefied_without_taxonomy_litter.csv")

## Create taxonomy table
### rarefied
rar_16Sl <- gather(rartaxl, key = site, value = presence, -OTU.ID, -taxonomy) %>%
  filter(!is.na(presence)) %>%
  filter(presence > 0)

#### define the taxonomic level to use for all groups,and add to table
rar_16Sl <- rar_16Sl %>%
  mutate(tax.class = "others") %>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[9], taxonomy), tax.list[9], tax.class)) %>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

# create site to locality and habitat key
skey <- names(rar16sl) %>%
  as_tibble() %>%
  dplyr::rename(site = value) %>%
  mutate(locality = "BC") %>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality)) %>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality)) %>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality)) %>%
  mutate(habitat = "IG") %>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat)) %>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat)) %>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))


#### create output tables
out_rar <- rar_16Sl %>%
  left_join(skey, by = "site")

write_csv(out_rar, "output_p_a/16S_taxonomy_per_site_rarefied_litter.csv")

############################################################
# 18S soil
############################################################
## load and prepare data
### taxonomy
tax <- read_csv("peerj-26029-18S_tax2.csv")

### rarefaction
rar18s <- as.data.frame(t(com18s)) %>%
  rownames_to_column(var = "OTU.ID")

rar18ss <- rar18s %>%
  select(OTU.ID, starts_with("S")) # soil

## rarefied, by mergin refaction results with taxonomy
rartax18ss <- inner_join(rar18ss, tax)

### Identity and number of OTUs not in the taxonomy file
sum(is.na(rartax18ss$taxonomy))
sum(is.na(rartax18ss$taxonomy)) / nrow(rar18ss)
no.tax <- filter(rartax18ss, is.na(rartax18ss$taxonomy))
write_csv(no.tax, "output_p_a/18S_OTUs_rarefied_without_taxonomy_18ss.csv")

### rarefied
rar_18Ss <- gather(rartax18ss, key = site, value = presence, -OTU.ID, -taxonomy) %>%
  filter(!is.na(presence)) %>%
  filter(presence > 0)

#### define the taxonomic level to use for all groups,and add to table
tax.list <- c(
 "Fungi", "Arthropoda", "Alveolata", "Chloroplastida", "Nematoda", "Cercozoa", "Amoebozoa",
  "Stramenopiles"
)

rar_18Ss <- rar_18Ss %>%
  mutate(tax.class = "others") %>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class)) %>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

# create site to locality and habitat key
skey <- names(rar18ss) %>%
  as_tibble() %>%
  dplyr::rename(site = value) %>%
  mutate(locality = "BC") %>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality)) %>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality)) %>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality)) %>%
  mutate(habitat = "IG") %>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat)) %>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat)) %>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

#### create output tables
out_rar <- rar_18Ss %>%
  left_join(skey, by = "site")

write_csv(out_rar, "output_p_a/18S_taxonomy_per_site_rarefied_18ss.csv")

############################################################
# 18S litter
############################################################
### rarefaction
rar18sl <- rar18s %>%
  select(OTU.ID, starts_with("L")) # litter

## rarefied, by mergin refaction results with taxonomy
rartax18sl <- inner_join(rar18sl, tax)

### Identity and number of OTUs not in the taxonomy file
sum(is.na(rartax18sl$taxonomy))
sum(is.na(rartax18sl$taxonomy)) / nrow(rar18sl)
no.tax <- filter(rartax18sl, is.na(rartax18sl$taxonomy))
write_csv(no.tax, "output_p_a/18S_OTUs_rarefied_without_taxonomy_18sl.csv")

## Create taxonomy table

### rarefied
rar_18Sl <- gather(rartax18sl, key = site, value = presence, -OTU.ID, -taxonomy) %>%
  filter(!is.na(presence)) %>%
  filter(presence > 0)

#### define the taxonomic level to use for all groups,and add to table
rar_18Sl <- rar_18Sl %>%
  mutate(tax.class = "others") %>%
  mutate(tax.class = ifelse(grepl(tax.list[1], taxonomy), tax.list[1], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[2], taxonomy), tax.list[2], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[3], taxonomy), tax.list[3], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[4], taxonomy), tax.list[4], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[5], taxonomy), tax.list[5], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[6], taxonomy), tax.list[6], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[7], taxonomy), tax.list[7], tax.class)) %>%
  mutate(tax.class = ifelse(grepl(tax.list[8], taxonomy), tax.list[8], tax.class)) %>%
  mutate(tax.class = ifelse(is.na(taxonomy), "unknown", tax.class))

# create site to locality and habitat key
skey <- names(rar18sl) %>%
  as_tibble() %>%
  dplyr::rename(site = value) %>%
  mutate(locality = "BC") %>%
  mutate(locality = ifelse(grepl("CUI", site), "CUI", locality)) %>%
  mutate(locality = ifelse(grepl("CXN", site), "CXN", locality)) %>%
  mutate(locality = ifelse(grepl("JAU", site), "JAU", locality)) %>%
  mutate(habitat = "IG") %>%
  mutate(habitat = ifelse(grepl("TF", site), "TF", habitat)) %>%
  mutate(habitat = ifelse(grepl("VZ", site), "VZ", habitat)) %>%
  mutate(habitat = ifelse(grepl("CAM", site), "CAM", habitat))

#### create output tables
out_rar <- rar_18Sl %>%
  left_join(skey, by = "site")

write_csv(out_rar, "output_p_a/18S_taxonomy_per_site_rarefied_18sl.csv")

######################################################################
##################### Plots taxonomic groups############################
######################################################################

##########################################
# rarified
##########################################
S16_rars <- read_csv("output_p_a/16S_taxonomy_per_site_rarefied.csv")
S16_rarl <- read_csv("output_p_a/16S_taxonomy_per_site_rarefied_litter.csv")
S18_rars <- read_csv("output_p_a/18S_taxonomy_per_site_rarefied_18ss.csv")
S18_rarl <- read_csv("output_p_a/18S_taxonomy_per_site_rarefied_18sl.csv")

# load 16S
soil_16S <- S16_rars %>%
  distinct(OTU.ID, .keep_all = T) %>%
  group_by(tax.class) %>%
  summarise(OTUs.number = n()) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3)) %>%
  mutate(kind = "Soil") %>%
  filter(!is.na(kind)) %>%
  dplyr::rename(value = kind) %>%
  mutate(type = "Kind")


soil_16S$tax.class <- factor(soil_16S$tax.class, levels = unique(soil_16S$tax.class[order(soil_16S$fraction.OTUs)]))

litter_16S <- S16_rarl %>%
  distinct(OTU.ID, .keep_all = T) %>%
  group_by(tax.class) %>%
  summarise(OTUs.number = n()) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3)) %>%
  mutate(kind = "Litter") %>%
  filter(!is.na(kind)) %>%
  dplyr::rename(value = kind) %>%
  mutate(type = "Kind")

litter_16S$tax.class <- factor(litter_16S$tax.class,
  levels = unique(litter_16S$tax.class[order(litter_16S$fraction.OTUs)])
)

plo_16S <- bind_rows(soil_16S, litter_16S) %>%
  mutate(marker = "16S")

plo_16S$tax.class <- factor(plo_16S$tax.class, levels = unique(plo_16S$tax.class[order(plo_16S$fraction.OTUs)]))

# load 18S
soil_18S <- S18_rars %>%
  distinct(OTU.ID, .keep_all = T) %>%
  group_by(tax.class) %>%
  summarise(OTUs.number = n()) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3)) %>%
  mutate(kind = "Soil") %>%
  filter(!is.na(kind)) %>%
  dplyr::rename(value = kind) %>%
  mutate(type = "Kind")

litter_18S <- S18_rarl %>%
  distinct(OTU.ID, .keep_all = T) %>%
  group_by(tax.class) %>%
  summarise(OTUs.number = n()) %>%
  mutate(fraction.OTUs = round(OTUs.number / sum(OTUs.number), 3)) %>%
  mutate(kind = "Litter") %>%
  filter(!is.na(kind)) %>%
  dplyr::rename(value = kind) %>%
  mutate(type = "Kind")

litter_18S$tax.class <- factor(litter_18S$tax.class,
  levels = unique(litter_18S$tax.class[order(litter_18S$fraction.OTUs)])
)

plo_18S <- bind_rows(soil_18S, litter_18S) %>%
  mutate(marker = "18S")

plo_18S$tax.class <- factor(plo_18S$tax.class, levels = unique(plo_18S$tax.class[order(plo_18S$fraction.OTUs)]))

# number
S16 <- ggplot(data = plo_16S, aes(x = value, y = OTUs.number, fill = tax.class)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  scale_fill_brewer(palette = "Set3", name = "Taxa Prokaryote (16S)") +
  ylab("Number of OTUs") +
  xlab("") +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill = NA, size = 1),
    legend.position = "right"
  ) +
  facet_grid(~ type, scales = "free_x")

S18 <- ggplot(data = plo_18S, aes(x = value, y = OTUs.number, fill = tax.class)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  scale_fill_brewer(palette = "Paired", name = "Taxa Eukaryote (18S)") +
  ylab("Number of OTUs") +
  xlab("") +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill = NA, size = 1),
    legend.position = "right"
  ) +
  facet_grid(~ type, scales = "free_x")

out <- grid.arrange(S16, S18, nrow = 2)

ggsave("output_p_a/FiS4B_number_of_OTUs_rarefied.pdf", plot = out)

# fraction
S16 <- ggplot(data = plo_16S, aes(x = value, y = fraction.OTUs, fill = tax.class)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  scale_fill_brewer(palette = "Set3", name = "Taxa 16S") +
  ylab("Fraction of unique OTUs") +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill = NA, size = 1),
    legend.position = "right"
  ) +
  facet_grid(~ type, scales = "free_x")


S18 <- ggplot(data = plo_18S, aes(x = value, y = fraction.OTUs, fill = tax.class)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  scale_fill_brewer(palette = "Paired", name = "Taxon 18S") +
  ylab("Fraction of unique OTUs") +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill = NA, size = 1),
    legend.position = "right"
  ) +
  facet_grid(~ type, scales = "free_x")


out <- grid.arrange(S16, S18, nrow = 2)

ggsave("output_p_a/Fig6AandC_fraction_of_OTUs_rarefied.pdf", plot = out)