##### To make Figure S2, boxplots of Shannon's Index of Diversity and read counts

# Generate table of Shannon's Diversity
# Vegan version 2.3-2
library("vegan")
d<-read.table("GP_otu.txt", header=T, sep="\t", stringsAsFactors=F, quote = "", check.names=F, row.names=1, comment.char="")

#sammples must be row-wise
shan<-diversity(t(d), index = "shannon", MARGIN = 1, base = exp(1))
write.table(shan, file="Supplemental_Data_S3.txt", sep="\t")

# read in Shannon's Index table
# read in table generated from colSums of OTU table (d) = sums.txt
#colSums(d, na.rm = FALSE, dims = 1)

sh <- read.table("Supplemental_Data_S3.txt", sep="\t", header=T)
sums <- read.table("sums.txt", sep="\t", header=T)

# make a new factor that reflects the desired order, as opposed to default which is alphabetical
order<-as.factor(c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4))

# column bind order to existing Shannon's Index data
# column bind order to existing sums data
shan<-cbind(sh,order)
sums<-cbind(sums,order)

# rename the factor to correct groups
levels(shan$order)<-c("CDNBW","CDLBW","WDNBW","WDLBW")
levels(sums$order)<-c("CDNBW","CDLBW","WDNBW","WDLBW")


# make the plot
pdf(file="Supplemental_Fig_S2.pdf")
par(mfrow=c(1,2))
boxplot(data=shan,shannon~order, xlab="Groups", ylab="Shannon's Index")
boxplot(data=sums,sum~order, xlab="Groups", ylab="Read Counts")
dev.off()


#####To make Figure 1, a coloured compositional biplot of the samples
library(compositions)
library(zCompositions)

# read otu table 
otu <- read.table("S1_Table_OTUs.txt", header=T, row.names=1, check.names=F, comment.char="", sep="\t", skip=1)

# remove taxonomy column
tax <- otu$taxonomy
otu$taxonomy <- NULL
otu<-t(otu)

# replace 0 values with Jeffrey's Prior of 0.5
# samples must be in rows
otu.0 <- (otu + 0.5)

# apply the centered log-ratio transformation
# R rotates data when apply with the row margin is used
# so we transpose it back
otu.clr <- t(apply(otu.0, 1, function(x){log(x) - mean(log(x))}))

# make a dataframe of conditions for colouring the plot
# based on diet groups 
conds <- data.frame(c(rep(1,length(grep("CD", rownames(otu.clr)))),
    rep(2, length(grep("WD", rownames(otu.clr))))))

# can also be done by diet AND birth weight (4 groups), as per Supplementary Figure 1
# when colouring into 4 groups, the birth weight groups entirely overlap
# conds <- data.frame(c(rep(1,length(grep("CD_NBW", rownames(otu.clr)))),
#   rep(2, length(grep("CD_LBW", rownames(otu.clr)))),
#   rep(3, length(grep("WD_NBW", rownames(otu.clr)))),
#   rep(4, length(grep("WD_LBW", rownames(otu.clr))))))

colnames(conds) <- "cond"

palette=palette(c("darkcyan", "magenta"))

# or for the 4 groups, remove comment from below (supplementary Figure 1)
# palette=palette(c("darkcyan", "magenta", "darkblue", "orange"))

# get the metric variance of the dataset, from compositions package
mvar.clr <- mvar(otu.clr)

# this uses the prcomp function for SVD which does not check to see if there are more variables than samples
# base R\
pc.clr <- prcomp(otu.clr)

# make the plot
png(file="Fig2.png", pointsize=24, height=960, width=960)

# make the biplot, function is from compositions package
coloredBiplot(pc.clr,col=rgb(0,0,0,0.2),cex=c(0.6,0.5),
 xlab=paste("PC1: ", round(sum(pc.clr$sdev[1]^2)/mvar.clr, 3), sep=""),
 ylab=paste("PC2: ", round(sum(pc.clr$sdev[2]^2)/mvar.clr, 3), sep=""),
 xlabs.col=conds$cond, arrow.len=0.05, var.axes=F, expand=0.9,  scale=0)
dev.off()

# make supplementary Figure S4 (remove comments from below)

# png(file="S4_Fig_Biplot.png", pointsize=24, height=960, width=960)
# coloredBiplot(pc.clr,col=rgb(0,0,0,0.2),cex=c(0.6,0.5),
# xlab=paste("PC1: ", round(sum(pc.clr$sdev[1]^2)/mvar.clr, 3), sep=""),
# ylab=paste("PC2: ", round(sum(pc.clr$sdev[2]^2)/mvar.clr, 3), sep=""),
# xlabs.col=conds$cond, arrow.len=0.05, var.axes=F, expand=0.9,  scale=0)
# dev.off()

########### To make Figure 2, strip chart of differential OTUs between diet groups

draw.grey.boxes <- function(at) {
   xlim <- par('usr')[1:2] # gets the limit of the bounding box
   for ( a in 1:length(at) ) {
       if ( a %% 2 == 1 ) next
       rect( xlim[1], a - 0.5, xlim[2], a + 0.5, border=NA, col=rgb(0,0,0,0.1) )
   }

}

codaSeq.stripchart <- function(
    aldex.out=NULL, group.table=NULL, group.label="genus", method="wi.eBH",
    x.axis="effect", cex=0.8, effect.cutoff=1, p.cutoff=1,
    main=NULL, mar=c(2,12,4,0.5), do.ylab=TRUE)
  {
  # aldex.out is the data frame of observations to be plotted
  # group.table taxon information. Each column is a taxon name at a specific level
  # group.label is the column name from the taxon file
  # x.axis should be either diff.btw (difference) or effect
  # cex controls plot size
  # p.cutoff is the BH corrected wilcoxon rank test statistic
  # effect.cutoff is the BH corrected wilcoxon rank test statistic
  # do.ylab controls whether to add y axis label to plot for pseudo-time series display
  # copy the aldex output data frame

  if(is.null(aldex.out)) stop("please supply an appropriate input from ALDEx2")
  if(is.null(group.table)) stop("please supply an appropriate taxon table")

  aldex.out <- data.frame(aldex.out, taxon[rownames(aldex.out), group.label])
  colnames(aldex.out)[ncol(aldex.out)] <- group.label
  aldex.out <- droplevels(aldex.out)

  # get a vector of significant and non-significant rows
  non.sig <- abs(aldex.out$effect) < effect.cutoff
  sig.pos <- aldex.out$effect >= effect.cutoff
  sig.neg <- aldex.out$effect <= effect.cutoff * -1

  # get a vector of significant and non-significant rows
  p.sig <- aldex.out[,method]) < p.cutoff & abs(aldex.out$effect) < effect.cutoff

  # make a list of unique groups in the dataset
  # there may be empty groups.set, ignore these for now
  groups.set <- unique(aldex.out[[group.label]])

  # generate a y axis plotting limit a bit larger than needed
  ylim<-c(length(groups.set) - (length(groups.set)+0.5), length(groups.set) + 0.5)

  x.axis <- x.axis # we find the effect or diff.btw columns to be most useful
  xlim = c(min(-1 * max(abs(aldex.out[,x.axis]))), max(aldex.out[,x.axis]))

  # basically can call the different significant groups.set within strip chart
  par(mar=mar, las=1, cex=cex)
if(do.ylab == TRUE) { stripchart(aldex.out[non.sig,x.axis] ~ aldex.out[non.sig,group.label],
    col=c(rgb(0,0,0,0.3),rgb(0,0,0,0.3)), method="jitter", pch=19, xlim=xlim, xlab=x.axis, main=main)
    }
if(do.ylab == FALSE) {stripchart(aldex.out[non.sig,x.axis] ~ aldex.out[non.sig,group.label],
    col=rgb(0,0,0,0.3), method="jitter", pch=19, xlim=xlim, xlab=x.axis, main=main, yaxt="n")
    }

draw.grey.boxes(as.vector(groups.set))

  stripchart(aldex.out[p.sig,x.axis] ~ aldex.out[p.sig,group.label],
    col=rgb(0,0,1,0.7),method="jitter", pch=19, add=T, cex=cex+0.2)

  stripchart(aldex.out[sig.pos,x.axis] ~ aldex.out[sig.pos,group.label],
    col=rgb(1,0,0,1),method="jitter", pch=19, add=T, cex=cex+0.2)
  stripchart(aldex.out[sig.neg,x.axis] ~ aldex.out[sig.neg,group.label],
    col=rgb(1,0,0,1),method="jitter", pch=19, add=T, cex=cex+0.2)

  abline(v=0, lty=2, col=rgb(0,0,0,0.2),lwd=2)
  abline(v=1, lty=3, col=rgb(0,0,0,0.2),lwd=2)
  abline(v= -1, lty=3, col=rgb(0,0,0,0.2),lwd=2)

}


codaSeq.stripchart(aldex.out=d, group.table=t, group.label="genus", p.cutoff=0.1)

#####To make Figure S6, heatmap of OTUs of different abundance between diet groups

# r script for plotting otu or higher level taxa based on the AS heatmap script, have option to log transform as well as sort
# strip out headerline and the number sign on the column names first

library(gplots)

rm(list=ls(all=TRUE)) #clear all previous variables


absolute_abundances <- read.table("S1_Table_OTUs.txt", header=T, sep="\t", skip=1)

# leave below in to order alphabetically by taxonomies
# absolute_abundances<- absolute_abundances[order(absolute_abundances$taxonomy),] 

#otu_num <- absolute_abundances$Taxon
taxa_names <- absolute_abundances$taxonomy

rownames(absolute_abundances)<-absolute_abundances$OTU.ID
absolute_abundances_notaxa <- absolute_abundances[,2:(ncol(absolute_abundances)-1)] #strip the taxonomy column (assuming it is the last
relative_abundances <- apply(absolute_abundances_notaxa[1:nrow(absolute_abundances_notaxa),], 2, function(y) { 100*(y / sum(y)) } )

#labelforrows=paste("OTU_",otu_num,taxa_names,sep="\t")
labelforrows=taxa_names

#table_check <- apply( relative_abundances, 2, function(x) { sum(x,na.rm=TRUE) } ) # should be all equal to one
#if ( any(abs(table_check-100) > (.Machine$double.eps*16) ) ) stop("error")

#do a log transform, watch the effect of the pseudo count

relative_abundances<-relative_abundances + 0.9 #add pseudo count to transform, already in percent form, so add <1% as this was the cut off
relative_abundances<-log2(relative_abundances) #to log transform


#only plot sig. different otus between diet, without changing relative abundance
write.table (relative_abundances, "relative_abundances_heatmap.txt", sep="\t", quote=FALSE)

# keep all rows for OTUs significant between diet, as determined by ALDEx2 test
# delete the rest!!
# keep these 
#("2","6","10","11","13","16","17","21","22","26","31","34","35","36","44","49","50","51","55","56","58","66","71","73","76","77","78","87","110","111","112","124","129","131","137","143","151","158","160","165","169","173","174","190","193","208","221","234","237","269","307","328","334","337","375","436","494","598","693","739","996","1117","1244","1303","1451","2064","2127","2206","2230","2957","2987","3534","4068","4830","5426","5945","6160","6315","6323","6748","7249","7694","7875","8218","8344")


abundances <- read.table("relative_abundances_heatmap.txt", sep="\t")
pdf('Fig3.pdf', height=8, width=9)

#heatmap(relative_abundances, Rowv = NA, Colv = NA,  col=cm.colors(256), scale="row", margins=c(5,5), revC, xlab="Sample", ylab="OTU", labRow=otu_num)

abundances<-as.matrix(abundances)
heatmap(abundances, Rowv=NA, Colv=NA, col=colorRampPalette(c("aliceblue","blue","green","orange","red"))(100), scale="none", margins=c(5,5), xlab="Sample", ylab="OTU", revC=T)
dev.off() 

##### ALDEx2 tests for significantly different OTUs
 
at<-read.table("S1_Table_OTUs.txt", sep="\t", quote="", check.names=F, header=T, row.names=1, skip=1)

# first, compare all samples based on diet
CD<-c("CD_NBW_193642_","CD_NBW_193648_","CD_NBW_193649_","CD_NBW_211558_","CD_NBW_193647_","CD_NBW_199726_","CD_NBW_199731_","CD_NBW_205421_","CD_NBW_205431_","CD_LBW_193641_","CD_LBW_199724_","CD_LBW_199744_","CD_LBW_205420_","CD_LBW_211554_","CD_LBW_199730_","CD_LBW_205406_","CD_LBW_205419_","CD_LBW_205424_","CD_LBW_205430_")
WD<-c("WD_NBW_205422_","WD_NBW_205423_","WD_NBW_185551_","WD_NBW_193635_","WD_NBW_193639_","WD_NBW_199718_","WD_LBW_185545_","WD_LBW_193634_","WD_LBW_199719_","WD_LBW_199720_","WD_LBW_199734_","WD_LBW_199721_","WD_LBW_199735_","WD_LBW_205429_")

aldex.in<-at[,c(CD,WD)]
conds<-c(rep("CD", length (CD)), rep("WD", length (WD)))
x <- aldex(aldex.in, conds, mc.samples=128, test="t", effect=TRUE, include.sample.summary=FALSE, verbose=FALSE)
x<-x[order(x$we.eBH),]
write.table(x, file="aldex_diet", sep="\t", quote=FALSE, col.names=NA)


# Could also test based on birth weight, or diet+birth weight combined
# But only groups that are separable on the PCA are diet groups
NBW<-c("CD_NBW_193642_","CD_NBW_193647_","CD_NBW_193648_","CD_NBW_193649_","CD_NBW_199731_","CD_NBW_205421_","CD_NBW_205431_","CD_NBW_211558_","CD_NBW_199726_","WD_NBW_185551_","WD_NBW_193635_","WD_NBW_193639_","WD_NBW_199718_","WD_NBW_205422_","WD_NBW_205423_")
LBW<-c("CD_LBW_193641_","CD_LBW_199724_","CD_LBW_199730_","CD_LBW_199744_","CD_LBW_205406_","CD_LBW_205419_","CD_LBW_205420_","CD_LBW_205424_","CD_LBW_205430_","CD_LBW_211554_","WD_LBW_185545_","WD_LBW_193634_","WD_LBW_199719_","WD_LBW_199720_","WD_LBW_199721_","WD_LBW_199734_","WD_LBW_199735_","WD_LBW_205429_")

aldex.in<-at[,c(NBW,LBW)]
conds<-c(rep("NBW", length (NBW)), rep("LBW", length (LBW)))
x <- aldex(aldex.in, conds, mc.samples=128, test="t", effect=TRUE, include.sample.summary=FALSE, verbose=FALSE)
x<-x[order(x$we.eBH),]
write.table(x, file=“aldex_bw”, sep="\t", quote=FALSE, col.names=NA)