##### To make Figure S2, boxplots of Shannon's Index of Diversity and read counts # Generate table of Shannon's Diversity # Vegan version 2.3-2 library("vegan") d<-read.table("GP_otu.txt", header=T, sep="\t", stringsAsFactors=F, quote = "", check.names=F, row.names=1, comment.char="") #sammples must be row-wise shan<-diversity(t(d), index = "shannon", MARGIN = 1, base = exp(1)) write.table(shan, file="Supplemental_Data_S3.txt", sep="\t") # read in Shannon's Index table # read in table generated from colSums of OTU table (d) = sums.txt #colSums(d, na.rm = FALSE, dims = 1) sh <- read.table("Supplemental_Data_S3.txt", sep="\t", header=T) sums <- read.table("sums.txt", sep="\t", header=T) # make a new factor that reflects the desired order, as opposed to default which is alphabetical order<-as.factor(c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4)) # column bind order to existing Shannon's Index data # column bind order to existing sums data shan<-cbind(sh,order) sums<-cbind(sums,order) # rename the factor to correct groups levels(shan$order)<-c("CDNBW","CDLBW","WDNBW","WDLBW") levels(sums$order)<-c("CDNBW","CDLBW","WDNBW","WDLBW") # make the plot pdf(file="Supplemental_Fig_S2.pdf") par(mfrow=c(1,2)) boxplot(data=shan,shannon~order, xlab="Groups", ylab="Shannon's Index") boxplot(data=sums,sum~order, xlab="Groups", ylab="Read Counts") dev.off() #####To make Figure 1, a coloured compositional biplot of the samples library(compositions) library(zCompositions) # read otu table otu <- read.table("S1_Table_OTUs.txt", header=T, row.names=1, check.names=F, comment.char="", sep="\t", skip=1) # remove taxonomy column tax <- otu$taxonomy otu$taxonomy <- NULL otu<-t(otu) # replace 0 values with Jeffrey's Prior of 0.5 # samples must be in rows otu.0 <- (otu + 0.5) # apply the centered log-ratio transformation # R rotates data when apply with the row margin is used # so we transpose it back otu.clr <- t(apply(otu.0, 1, function(x){log(x) - mean(log(x))})) # make a dataframe of conditions for colouring the plot # based on diet groups conds <- data.frame(c(rep(1,length(grep("CD", rownames(otu.clr)))), rep(2, length(grep("WD", rownames(otu.clr)))))) # can also be done by diet AND birth weight (4 groups), as per Supplementary Figure 1 # when colouring into 4 groups, the birth weight groups entirely overlap # conds <- data.frame(c(rep(1,length(grep("CD_NBW", rownames(otu.clr)))), # rep(2, length(grep("CD_LBW", rownames(otu.clr)))), # rep(3, length(grep("WD_NBW", rownames(otu.clr)))), # rep(4, length(grep("WD_LBW", rownames(otu.clr)))))) colnames(conds) <- "cond" palette=palette(c("darkcyan", "magenta")) # or for the 4 groups, remove comment from below (supplementary Figure 1) # palette=palette(c("darkcyan", "magenta", "darkblue", "orange")) # get the metric variance of the dataset, from compositions package mvar.clr <- mvar(otu.clr) # this uses the prcomp function for SVD which does not check to see if there are more variables than samples # base R\ pc.clr <- prcomp(otu.clr) # make the plot png(file="Fig2.png", pointsize=24, height=960, width=960) # make the biplot, function is from compositions package coloredBiplot(pc.clr,col=rgb(0,0,0,0.2),cex=c(0.6,0.5), xlab=paste("PC1: ", round(sum(pc.clr$sdev[1]^2)/mvar.clr, 3), sep=""), ylab=paste("PC2: ", round(sum(pc.clr$sdev[2]^2)/mvar.clr, 3), sep=""), xlabs.col=conds$cond, arrow.len=0.05, var.axes=F, expand=0.9, scale=0) dev.off() # make supplementary Figure S4 (remove comments from below) # png(file="S4_Fig_Biplot.png", pointsize=24, height=960, width=960) # coloredBiplot(pc.clr,col=rgb(0,0,0,0.2),cex=c(0.6,0.5), # xlab=paste("PC1: ", round(sum(pc.clr$sdev[1]^2)/mvar.clr, 3), sep=""), # ylab=paste("PC2: ", round(sum(pc.clr$sdev[2]^2)/mvar.clr, 3), sep=""), # xlabs.col=conds$cond, arrow.len=0.05, var.axes=F, expand=0.9, scale=0) # dev.off() ########### To make Figure 2, strip chart of differential OTUs between diet groups draw.grey.boxes <- function(at) { xlim <- par('usr')[1:2] # gets the limit of the bounding box for ( a in 1:length(at) ) { if ( a %% 2 == 1 ) next rect( xlim[1], a - 0.5, xlim[2], a + 0.5, border=NA, col=rgb(0,0,0,0.1) ) } } codaSeq.stripchart <- function( aldex.out=NULL, group.table=NULL, group.label="genus", method="wi.eBH", x.axis="effect", cex=0.8, effect.cutoff=1, p.cutoff=1, main=NULL, mar=c(2,12,4,0.5), do.ylab=TRUE) { # aldex.out is the data frame of observations to be plotted # group.table taxon information. Each column is a taxon name at a specific level # group.label is the column name from the taxon file # x.axis should be either diff.btw (difference) or effect # cex controls plot size # p.cutoff is the BH corrected wilcoxon rank test statistic # effect.cutoff is the BH corrected wilcoxon rank test statistic # do.ylab controls whether to add y axis label to plot for pseudo-time series display # copy the aldex output data frame if(is.null(aldex.out)) stop("please supply an appropriate input from ALDEx2") if(is.null(group.table)) stop("please supply an appropriate taxon table") aldex.out <- data.frame(aldex.out, taxon[rownames(aldex.out), group.label]) colnames(aldex.out)[ncol(aldex.out)] <- group.label aldex.out <- droplevels(aldex.out) # get a vector of significant and non-significant rows non.sig <- abs(aldex.out$effect) < effect.cutoff sig.pos <- aldex.out$effect >= effect.cutoff sig.neg <- aldex.out$effect <= effect.cutoff * -1 # get a vector of significant and non-significant rows p.sig <- aldex.out[,method]) < p.cutoff & abs(aldex.out$effect) < effect.cutoff # make a list of unique groups in the dataset # there may be empty groups.set, ignore these for now groups.set <- unique(aldex.out[[group.label]]) # generate a y axis plotting limit a bit larger than needed ylim<-c(length(groups.set) - (length(groups.set)+0.5), length(groups.set) + 0.5) x.axis <- x.axis # we find the effect or diff.btw columns to be most useful xlim = c(min(-1 * max(abs(aldex.out[,x.axis]))), max(aldex.out[,x.axis])) # basically can call the different significant groups.set within strip chart par(mar=mar, las=1, cex=cex) if(do.ylab == TRUE) { stripchart(aldex.out[non.sig,x.axis] ~ aldex.out[non.sig,group.label], col=c(rgb(0,0,0,0.3),rgb(0,0,0,0.3)), method="jitter", pch=19, xlim=xlim, xlab=x.axis, main=main) } if(do.ylab == FALSE) {stripchart(aldex.out[non.sig,x.axis] ~ aldex.out[non.sig,group.label], col=rgb(0,0,0,0.3), method="jitter", pch=19, xlim=xlim, xlab=x.axis, main=main, yaxt="n") } draw.grey.boxes(as.vector(groups.set)) stripchart(aldex.out[p.sig,x.axis] ~ aldex.out[p.sig,group.label], col=rgb(0,0,1,0.7),method="jitter", pch=19, add=T, cex=cex+0.2) stripchart(aldex.out[sig.pos,x.axis] ~ aldex.out[sig.pos,group.label], col=rgb(1,0,0,1),method="jitter", pch=19, add=T, cex=cex+0.2) stripchart(aldex.out[sig.neg,x.axis] ~ aldex.out[sig.neg,group.label], col=rgb(1,0,0,1),method="jitter", pch=19, add=T, cex=cex+0.2) abline(v=0, lty=2, col=rgb(0,0,0,0.2),lwd=2) abline(v=1, lty=3, col=rgb(0,0,0,0.2),lwd=2) abline(v= -1, lty=3, col=rgb(0,0,0,0.2),lwd=2) } codaSeq.stripchart(aldex.out=d, group.table=t, group.label="genus", p.cutoff=0.1) #####To make Figure S6, heatmap of OTUs of different abundance between diet groups # r script for plotting otu or higher level taxa based on the AS heatmap script, have option to log transform as well as sort # strip out headerline and the number sign on the column names first library(gplots) rm(list=ls(all=TRUE)) #clear all previous variables absolute_abundances <- read.table("S1_Table_OTUs.txt", header=T, sep="\t", skip=1) # leave below in to order alphabetically by taxonomies # absolute_abundances<- absolute_abundances[order(absolute_abundances$taxonomy),] #otu_num <- absolute_abundances$Taxon taxa_names <- absolute_abundances$taxonomy rownames(absolute_abundances)<-absolute_abundances$OTU.ID absolute_abundances_notaxa <- absolute_abundances[,2:(ncol(absolute_abundances)-1)] #strip the taxonomy column (assuming it is the last relative_abundances <- apply(absolute_abundances_notaxa[1:nrow(absolute_abundances_notaxa),], 2, function(y) { 100*(y / sum(y)) } ) #labelforrows=paste("OTU_",otu_num,taxa_names,sep="\t") labelforrows=taxa_names #table_check <- apply( relative_abundances, 2, function(x) { sum(x,na.rm=TRUE) } ) # should be all equal to one #if ( any(abs(table_check-100) > (.Machine$double.eps*16) ) ) stop("error") #do a log transform, watch the effect of the pseudo count relative_abundances<-relative_abundances + 0.9 #add pseudo count to transform, already in percent form, so add <1% as this was the cut off relative_abundances<-log2(relative_abundances) #to log transform #only plot sig. different otus between diet, without changing relative abundance write.table (relative_abundances, "relative_abundances_heatmap.txt", sep="\t", quote=FALSE) # keep all rows for OTUs significant between diet, as determined by ALDEx2 test # delete the rest!! # keep these #("2","6","10","11","13","16","17","21","22","26","31","34","35","36","44","49","50","51","55","56","58","66","71","73","76","77","78","87","110","111","112","124","129","131","137","143","151","158","160","165","169","173","174","190","193","208","221","234","237","269","307","328","334","337","375","436","494","598","693","739","996","1117","1244","1303","1451","2064","2127","2206","2230","2957","2987","3534","4068","4830","5426","5945","6160","6315","6323","6748","7249","7694","7875","8218","8344") abundances <- read.table("relative_abundances_heatmap.txt", sep="\t") pdf('Fig3.pdf', height=8, width=9) #heatmap(relative_abundances, Rowv = NA, Colv = NA, col=cm.colors(256), scale="row", margins=c(5,5), revC, xlab="Sample", ylab="OTU", labRow=otu_num) abundances<-as.matrix(abundances) heatmap(abundances, Rowv=NA, Colv=NA, col=colorRampPalette(c("aliceblue","blue","green","orange","red"))(100), scale="none", margins=c(5,5), xlab="Sample", ylab="OTU", revC=T) dev.off() ##### ALDEx2 tests for significantly different OTUs at<-read.table("S1_Table_OTUs.txt", sep="\t", quote="", check.names=F, header=T, row.names=1, skip=1) # first, compare all samples based on diet CD<-c("CD_NBW_193642_","CD_NBW_193648_","CD_NBW_193649_","CD_NBW_211558_","CD_NBW_193647_","CD_NBW_199726_","CD_NBW_199731_","CD_NBW_205421_","CD_NBW_205431_","CD_LBW_193641_","CD_LBW_199724_","CD_LBW_199744_","CD_LBW_205420_","CD_LBW_211554_","CD_LBW_199730_","CD_LBW_205406_","CD_LBW_205419_","CD_LBW_205424_","CD_LBW_205430_") WD<-c("WD_NBW_205422_","WD_NBW_205423_","WD_NBW_185551_","WD_NBW_193635_","WD_NBW_193639_","WD_NBW_199718_","WD_LBW_185545_","WD_LBW_193634_","WD_LBW_199719_","WD_LBW_199720_","WD_LBW_199734_","WD_LBW_199721_","WD_LBW_199735_","WD_LBW_205429_") aldex.in<-at[,c(CD,WD)] conds<-c(rep("CD", length (CD)), rep("WD", length (WD))) x <- aldex(aldex.in, conds, mc.samples=128, test="t", effect=TRUE, include.sample.summary=FALSE, verbose=FALSE) x<-x[order(x$we.eBH),] write.table(x, file="aldex_diet", sep="\t", quote=FALSE, col.names=NA) # Could also test based on birth weight, or diet+birth weight combined # But only groups that are separable on the PCA are diet groups NBW<-c("CD_NBW_193642_","CD_NBW_193647_","CD_NBW_193648_","CD_NBW_193649_","CD_NBW_199731_","CD_NBW_205421_","CD_NBW_205431_","CD_NBW_211558_","CD_NBW_199726_","WD_NBW_185551_","WD_NBW_193635_","WD_NBW_193639_","WD_NBW_199718_","WD_NBW_205422_","WD_NBW_205423_") LBW<-c("CD_LBW_193641_","CD_LBW_199724_","CD_LBW_199730_","CD_LBW_199744_","CD_LBW_205406_","CD_LBW_205419_","CD_LBW_205420_","CD_LBW_205424_","CD_LBW_205430_","CD_LBW_211554_","WD_LBW_185545_","WD_LBW_193634_","WD_LBW_199719_","WD_LBW_199720_","WD_LBW_199721_","WD_LBW_199734_","WD_LBW_199735_","WD_LBW_205429_") aldex.in<-at[,c(NBW,LBW)] conds<-c(rep("NBW", length (NBW)), rep("LBW", length (LBW))) x <- aldex(aldex.in, conds, mc.samples=128, test="t", effect=TRUE, include.sample.summary=FALSE, verbose=FALSE) x<-x[order(x$we.eBH),] write.table(x, file=“aldex_bw”, sep="\t", quote=FALSE, col.names=NA)