###################################################################################################### # # # META-ANALYSIS OF MARINE BIODIVERSITY & ECOSYSTEM FUNCTIONING EXPERIMENTS # # # ####################################################################################################### #Author: Jon Lefcheck, Jarrett Byrnes, Lars Gamfeldt #Last updated: 16 January 2014 #Please contact Lars Gamfeldt (lars.gamfeldt@marecol.gu.se), Jon Lefcheck (jslefche@vims.edu), or #Jarrett Byrnes (jarrett.byrnes@umb.edu) before utilizing any part of this script or accompanying data ####################################################################################################### # TABLE OF CONTENTS # # Line 23: Required libraries # # Line 32: Importing and formatting the data # # Line 118: Exploring the data # # Line 170: Calculating summary log ratios # # Line 428: Functional form fitting # # # ####################################################################################################### library(car) #Calls: recode library(ggplot2) #Calls: ggplot library(gridExtra) #Calls: grid.arrange library(MuMIn) #Calls: AICc library(nlme) #Calls: lme library(plyr) #Calls: ddply library(reshape2) #Calls: melt, dcast ####################################################################################################### # IMPORTING AND FORMATTING THE DATA # ####################################################################################################### #Read in marine data set marine=read.table("./gamfeldt Appendix 2.txt") #Remove rows where Ycat is not consumption/production/biogeo_flux marine=droplevels(subset(marine,Ycat=="Consumption" | Ycat=="Production" | Ycat=="Biogeo_flux")) #Remove all rows where response is not the final point in a time series marine=droplevels(subset(marine,FinalT=="Y")) #Extract additive studies (for inclusion later) addmarine=droplevels(subset(marine,Des3!="S")) marine=droplevels(subset(marine,Des3=="S")) #Convert N, Y, and SD columns to numeric #First, extract names of columns for response means, N, and SD Y.colnames=colnames(marine)[ grep("Y",colnames(marine))[grep("Y",colnames(marine))>=78] ] N.colnames=colnames(marine)[ grep("N",colnames(marine))[grep("N",colnames(marine))>=78] ] SD.colnames=colnames(marine)[ grep("SD",colnames(marine))[grep("SD",colnames(marine))>=78] ] #Convert response values to numeric marine[,c(Y.colnames,N.colnames,SD.colnames)]=apply(marine[,c(Y.colnames,N.colnames,SD.colnames)],2, function(x) as.numeric(as.character(x)) ) #Shift points so all values are positive (in the event any responses are negative, multiply by -1 and add max value) marine[,Y.colnames]=t(apply(marine[,Y.colnames],1,function(x) { if(any(x<0,na.rm=TRUE)) -x+max(abs(x)) else x } )) #Convert column "Direction" into numbers marine=transform(marine,Direction=recode(Direction,"'Pos'='1';'Neg'='-1'")) marine$Direction=as.numeric(levels(marine$Direction)[marine$Direction]) #Recalculate log response ratios for LR1 = ln(avg mono/poly), LR2 = ln(max mono/poly), LR3 = ln(pred mono/poly) #First, extract vector of poly column names polyY.colnames=colnames(marine)[ grep("Y",colnames(marine))[grep("Y",colnames(marine))>109] ] polySD.colnames=colnames(marine)[ grep("SD",colnames(marine))[grep("SD",colnames(marine))>123] ] polyN.colnames=colnames(marine)[ grep("N",colnames(marine))[grep("N",colnames(marine))>79] ] #Now calculate log ratios marine=cbind(marine, LR1=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$Y1 ), VLR1=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2 apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N ( ( marine[,"SD1"]^2 )/( (marine[,"Y1"]^2)*marine[,"N1"] ) ), LR2=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$YEmonoLR2 ), VLR2=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2 apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N ( ( marine[,"SDEmonoLR2"]^2 )/( (marine[,"YEmonoLR2"]^2)*marine[,"NEmono"] ) ), LR3=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$YEmonoLR3 ), VLR3=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2 apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N ( ( marine[,"SDEmonoLR3"]^2 )/( (marine[,"YEmonoLR3"]^2)*marine[,"NEmono"] ) ) ) #Multiply sign by hypothesized direction #First, determine whether all responses are negative marine$Reverse=apply(marine[,Y.colnames],1,function(x) sum(na.omit(x))>0) #If Reverse=="TRUE" AND Direction=="-1" then multiply LR1 and LR3 by -1, otherwise do nothing marine[,c("LR1","LR3")]=lapply(c("LR1","LR3"),function(i) ifelse(marine$Reverse=="TRUE" & marine$Direction=="-1",marine[,i]*-1,marine[,i]) ) #Write output to a .csv #write.csv(marine,"Marine-meta LR1 LR2 LR3 with var--11-13-2013.csv") #Transform some variables for more intuitive plotting marine=transform(marine,Sys3=recode(Sys3,"'hard'='Hard Sub';'soft'='Soft Sub';'seagrass'='Seagrass'; 'pelagic'='Pelagic';'saltmarsh'='Salt Marsh'")) marine$Sys3=factor(marine$Sys3,levels=rev(c("Salt Marsh","Pelagic","Seagrass","Soft Sub","Hard Sub"))) marine=transform(marine,FTG=recode(FTG,"'C'='Carnivore';'D'='Detritivore';'H'='Herbivore'; 'M'='Mixed';'P'='Primary Prod.'")) marine$FTG=factor(marine$FTG,levels=rev(c("Mixed","Carnivore","Herbivore","Primary Prod."))) marine=transform(marine,Ycat=recode(Ycat, "'Production'='Production';'Consumption'='Consumption';'Biogeo_flux'='Biogeochemical\nFluxes'")) #Remove several entries because the expected direction of the diversity effect is not clear from #theory: #Bruno & O'Connor Entry 148 #Blake & Duffy 2011 Entries re: Zostera and sessile inverts marine=subset(marine,!Entry %in% c(148,202:203,207:208,212:213,217:218)) #write.csv(marine,"Marine meta final dataset.csv") ####################################################################################################### # EXPLORING THE DATA # ####################################################################################################### length(unique(marine$Study.id.marine)) #Number of studies length(marine$Expt.id.marine) #Number of experiments median(marine$Smax) #Median number of species number ddply(marine,"Ycat",summarize,median(Smax)) range(marine$Smax) #Range of species number ddply(marine,"Ycat",summarize,range(Smax)) #Look at distribution of studies and experiments by Ycat, FTG, etc. #Create frequency tables for Ycat and FTG of experiments and studies bar_FTG=count(marine,c("Study.id.marine","Ycat","FTG")) bar_FTG=count(bar_FTG,c("Ycat","FTG")); names(bar_FTG)[3]="expt.freq" study_bar_FTG=ddply(marine,c("Study.id.marine","Ycat","FTG"),nrow) bar_FTG$study.freq=count(study_bar_FTG,c("Ycat","FTG"))$freq ftg.plot=ggplot(bar_FTG,aes(x=FTG,y=study.freq,fill=Ycat,label=expt.freq))+geom_bar()+ geom_text(position=position_stack(width=1),col="white",vjust=1)+labs(x="\nFocal Trophic Group",y="Number of studies\n")+ theme_bw(base_size=18)+ theme(legend.position="none",panel.grid.major=element_blank(),panel.grid.minor=element_blank(), panel.background=element_rect(fill="white")) #Create frequency tables for Ycat and Sys3 of experiments and studies bar_Sys3=count(marine,c("Study.id.marine","Ycat","Sys3")) bar_Sys3=count(bar_Sys3,c("Ycat","Sys3")); names(bar_Sys3)[3]="expt.freq" study_bar_Sys3=ddply(marine,c("Study.id.marine","Ycat","Sys3"),nrow) bar_Sys3$study.freq=count(study_bar_Sys3,c("Ycat","Sys3"))$freq sys3.plot=ggplot(bar_Sys3,aes(x=Sys3,y=study.freq,fill=Ycat,label=expt.freq))+geom_bar()+ geom_text(position=position_stack(width=1),col="white",vjust=1)+labs(x="\nStudy System",y="")+theme_bw(base_size=18)+ theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(), panel.background=element_rect(fill="white"),legend.position=c(0.6,0.8),legend.background=element_rect(fill=NA),legend.title=element_blank()) grid.arrange(ftg.plot,sys3.plot,nrow=1) #11" x 6" #Map of studies and experiments coord=ddply(marine,c("Study.id.marine","Expt.id.marine","Lat","Lon"),length) coord=count(coord,c("Lat","Lon")) coord=coord[order(coord$freq),] #Plot map (7" x 4.5") ggplot()+labs(x="",y="")+ geom_polygon(data=map_data("world"),aes(x=long,y=lat,group=group),fill="grey75",col="grey75")+ geom_point(data=coord,aes(x=Lon,y=Lat,fill=freq),shape=24,size=4.5,col="black",width=1.8,alpha=0.9)+ scale_fill_gradient(high="dodgerblue4",low="dodgerblue",name="No.\nexpts")+ theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(), panel.background=element_rect(fill="white"), legend.position=c(0.1,0.35)) ####################################################################################################### # CALCULATING SUMMARY LOG RATIOS # ####################################################################################################### #Calculate raw log ratios based on various factors rawLR.list=lapply(list("Ycat", #By category c("Ycat","FTG"), #By category and focal trophic group c("Ycat","Sys3"), #By category and ecosystem type c("Ycat","across_trophic","trophic_dir"), #By category and trophic direction c("Ycat","FTG","Sys3")), #By category and focal trophic group and ecosystem type function(j) { ddply(marine,j,function(x) { LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3") data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])), ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])), ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } ) LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df) return(LR.df) } ) } ) names(rawLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & Trophic Direction","Ycat & FTG & Sys3") #Rename LR1 and LR3 for(i in seq_along(rawLR.list)) rawLR.list[[i]]=transform(rawLR.list[[i]],Log.ratio=recode(Log.ratio,"'LR1'='LRnet';'LR3'='LRext'")) #Set levels for plotting for(i in seq_along(rawLR.list)) rawLR.list[[i]][,"Log.ratio"]=factor(rawLR.list[[i]][,"Log.ratio"],levels=c("LRnet","LRext")) #lapply(seq_along(rawLR.list),function(i) write.csv(rawLR.list[[i]],paste("Raw LRs by ",names(rawLR.list)[i],".csv",sep="")) ) #Plot raw LRs by category plot.list=lapply(rawLR.list,function(i) { #First change levels of factors to expression for subscripts on LRs levels(i$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext]))) if("FTG" %in% names(i) & "Sys3" %in% names(i)) { levels(i$FTG)=c(expression(paste("Primary Prod.")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed"))) levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh"))) } else if("across_trophic" %in% names(i) & !"Sys3" %in% names(i)) { levels(i$across_trophic)=c(expression(paste("Between")),expression(paste("Within"))) levels(i$trophic_dir)=c(expression(paste("Bottom-up")),expression(paste("Neither")),expression(paste("Top-down"))) } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) { levels(i$FTG)=c(expression(paste("Primary Prod")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed"))) } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) { levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh"))) } else { } #Next create initial plot p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+ geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+ geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+ geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+ scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+ scale_color_manual(values=c("red","blue","green3"))+ scale_fill_manual(values=c("red","blue","green3"))+ scale_shape_manual(values=c(21,24,22))+ coord_flip()+theme_bw(base_size=24) #Now build out facets depending on which factors are in the dataframe if("FTG" %in% names(i) & "Sys3" %in% names(i)) { p=p+facet_grid(Sys3+FTG~Log.ratio,labeller=label_parsed) } else if("across_trophic" %in% names(i)) { p=p+facet_grid(across_trophic+trophic_dir~Log.ratio,labeller=label_parsed) } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) { p=p+facet_grid(FTG~Log.ratio,labeller=label_parsed) } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) { p=p+facet_grid(Sys3~Log.ratio,labeller=label_parsed) } else { p=p+facet_grid(~Log.ratio,labeller=label_parsed) } p=p+geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+ theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(), panel.grid.minor=element_blank()) gt=ggplot_gtable(ggplot_build(p)) gt$layout$clip[grep("panel",gt$layout$name)]="off" return(gt) } ) #This overwrites any object in the plotting space so need to save after each one! grid.draw(plot.list[[1]]) #6" x 5" grid.draw(plot.list[[2]]) #8" x 10.5" grid.draw(plot.list[[3]]) #8" x 12" grid.draw(plot.list[[4]]) #8" x 8" grid.draw(plot.list[[5]]) #Make separate plots for Ygen=="RD" and real_flux=="Y" marine.Ygen=marine levels(marine.Ygen$Ygen)=c("RD","SST/SSR","SST/SSR-1","SST/SSR") rawLR.Ygen=ddply(marine.Ygen,c("Ycat","Ygen"),function(x) { LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3") data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])), ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])), ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } ) LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df) return(LR.df) } ) rawLR.real_flux=ddply(marine,c("Ycat","real_flux"),function(x) { LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3") data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])), ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])), ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } ) LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df) return(LR.df) } ) #Rename levels for plotting levels(rawLR.Ygen$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext]))) levels(rawLR.Ygen$Ygen)=c(expression(paste("Rates")),expression(paste("States")),expression(paste("SST/SSR-1"))) levels(rawLR.real_flux$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext]))) levels(rawLR.real_flux$real_flux)=c(expression(paste("States")),expression(paste("Rates"))) #Plot LRs for categories: 9" x 6" ggplot(rawLR.Ygen,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+ geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+ geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+ geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+ scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+ scale_color_manual(values=c("red","blue","green3"))+ scale_fill_manual(values=c("red","blue","green3"))+ scale_shape_manual(values=c(21,24,22))+ coord_flip()+theme_bw(base_size=24)+ facet_grid(Ygen~Log.ratio,labeller=label_parsed)+ geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+ theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(), panel.grid.minor=element_blank()) #Plot LRs for stocks vs real fluxes: 8" x 6" ggplot(rawLR.real_flux,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+ geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+ geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+ geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+ scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+ scale_color_manual(values=c("red","blue","green3"))+ scale_fill_manual(values=c("red","blue","green3"))+ scale_shape_manual(values=c(21,24,22))+ coord_flip()+theme_bw(base_size=24)+ facet_grid(real_flux~Log.ratio,labeller=label_parsed)+ geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+ theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(), panel.grid.minor=element_blank()) # #Fit model from Hedges et al. to obtain variance-weighted estimates of log ratios by category # estLR.list=lapply(list("Ycat",c("Ycat","FTG"),c("Ycat","Sys3"),c("Ycat","FTG","Sys3")),function(j) { # ddply(marine,j,function(x) { # LR.list=lapply(list("LR1","LR2","LR3"),function(i) { # data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] # if(length(unique(data$Study.id.marine))>1) { # mod=rrma(formula(paste(i,"~1")),data=data,study_id=data$Study.id.marine,var_eff_size=data[,paste("V",i,sep="")],rho=0.2) # data.frame(Value=coef(mod)[1],n.expt=mod$n,n.studies=mod$k,ci.lb=mod$est[6],ci.ub=mod$est[7]) # } else { # mod=rma(data[,i],data[,paste("V",i,sep="")]) # data.frame(Value=coef(mod)[1],n.expt=mod$k,n.studies=1,ci.lb=mod$ci.lb,ci.ub=mod$ci.ub) } # } ) # LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR2","LR3"),LR.df) # return(LR.df) # } ) # } ) # names(estLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & FTG & Sys3") # #lapply(seq_along(estLR.list),function(i) write.csv(estLR.list[[i]],paste("Model LRs by ",names(estLR.list)[i],".csv",sep="")) ) # # #Plot estimated LRs by category # lapply(estLR.list,function(i) { # p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+geom_point(size=9,alpha=0.8)+geom_linerange(lwd=1.2)+ # geom_hline(xintercept=0,lwd=1.3,lty=2)+labs(x="",y="")+coord_flip()+theme_bw(base_size=24) # if("FTG" %in% names(i) & "Sys3" %in% names(i)) { # p+facet_grid(Sys3+FTG~Log.ratio) # } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) { # p+facet_grid(FTG~Log.ratio) # } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) { # p+facet_grid(Sys3~Log.ratio) # } else { p+facet_wrap(~Log.ratio) } # } ) # # #Calculate variance-weighted log ratios partitioned by category # varLR.list=lapply(list("Ycat",c("Ycat","FTG"),c("Ycat","Sys3"),c("Ycat","FTG","Sys3")),function(j) { # ddply(marine,j,function(x) { # LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3") # data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] # w=1/data[,paste("V",i,sep="")] # Q=sum(w*data[,i]^2)-(sum(w*data[,i])^2)/sum(w) # varhat=(Q-(length(data[,i])-1))/ # ( sum(w)-(sum(w^2)/sum(w)) ) # Wstar=1/(data[,i]+varhat) # data.frame(Value=sum(Wstar*data[,i])/sum(Wstar), # n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])), # ci.lb=sum(Wstar*data[,i])/sum(Wstar)-qt(0.975,length(data[,i])-1)*sqrt(abs(1/sum(Wstar))), # ci.ub=sum(Wstar*data[,i])/sum(Wstar)+qt(0.975,length(data[,i])-1)*sqrt(abs(1/sum(Wstar)))) } ) # LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df) # return(LR.df) # } ) } ) # names(varLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & FTG & Sys3") # #Rename LR1 and LR3 # for(i in seq_along(varLR.list)) varLR.list[[i]]=transform(varLR.list[[i]],Log.ratio=recode(Log.ratio,"'LR1'='LRnet';'LR3'='LRext'")) # #Set levels for plotting # for(i in seq_along(varLR.list)) varLR.list[[i]][,"Log.ratio"]=factor(varLR.list[[i]][,"Log.ratio"],levels=c("LRnet","LRext")) # #lapply(seq_along(varLR.list),function(i) write.csv(varLR.list[[i]],paste("Raw LRs by ",names(varLR.list)[i],".csv",sep="")) ) # # #Plot raw LRs by category # plot.list=lapply(varLR.list[1:3],function(i) { # #First change levels of factors to expression for subscripts on LRs # levels(i$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext]))) # if("FTG" %in% names(i) & "Sys3" %in% names(i)) { # levels(i$FTG)=c(expression(paste("Primary Prod.")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed"))) # levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh"))) # } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) { # levels(i$FTG)=c(expression(paste("Primary Prod")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed"))) # } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) { # levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh"))) # } else { } # #Next create initial plot # p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+geom_hline(xintercept=0,lwd=0.8,lty=2)+ # geom_point(size=9,alpha=0.6)+geom_linerange(lwd=1.2)+labs(x="",y="")+scale_y_continuous(limits=c(-6,7.05))+ # scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+ # scale_color_manual(values=c("red","blue","green3"))+ # coord_flip()+theme_bw(base_size=24) # #Now build out facets depending on which factors are in the dataframe # if("FTG" %in% names(i) & "Sys3" %in% names(i)) { # p=p+facet_grid(Sys3+FTG~Log.ratio,labeller=label_parsed) # } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) { # p=p+facet_grid(FTG~Log.ratio,labeller=label_parsed) # } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) { # p=p+facet_grid(Sys3~Log.ratio,labeller=label_parsed) # } else { p=p+facet_grid(~Log.ratio,labeller=label_parsed) } # p=p+geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+ # theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(), # panel.grid.minor=element_blank()) # gt=ggplot_gtable(ggplot_build(p)) # gt$layout$clip[grep("panel",gt$layout$name)]="off" # return(gt) # } ) # # #This overwrites any object in the plotting space so need to save after each one! # grid.draw(plot.list[[1]]) #9" x 4.5" # grid.draw(plot.list[[2]]) #10" x 12" # grid.draw(plot.list[[3]]) #10" x 14" # grid.draw(plot.list[[4]]) #Look at effect of studies with low 'observer degrees of freedom' marine.sensitivity=droplevels(subset(marine,marine$Ycat=="Consumption" & FTG=="Primary Prod.")) #Get LR1 and LR3 after dropping each study jackknife.df=ldply(levels(marine.sensitivity$Study),function(d) { x=subset(marine.sensitivity,Study!=d) LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3") data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] data.frame(Study=unique(marine.sensitivity[marine.sensitivity$Study==d,"Ref"]), Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])), ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])), ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } ) LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df) return(LR.df) } ) #Relabel for plotting levels(jackknife.df$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext]))) levels(jackknife.df$Study)[1]=c("Bracken & Stachowicz,\n Ecology 2006") #Plot LR confidence intervals: 9" x 6" ggplot(data=jackknife.df, aes(x=Study, y=Value, ymin=ci.lb, ymax=ci.ub, color=Study))+ geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+ geom_point(size=9,alpha=0.7)+ geom_errorbar(col="black",lwd=1,width=0.1)+ facet_grid(~Log.ratio,labeller=label_parsed)+labs(x="",y="")+ coord_flip()+theme_bw(base_size=24)+ theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(), panel.grid.minor=element_blank()) ####################################################################################################### # FUNCTIONAL FORM OF BEF RELATIONSHIP # ####################################################################################################### #Subset data and create response as proportional change in functioning with each increase in richness marine.reduced=ddply(subset(marine,Entry!=17),1,function(x) { #Exclude Gamfeldt et al 2005 outlier y=melt(x,id.vars=c(4:6,9,13:14,16,24),measure.vars=c(109:120)) #If any values are <0, then add the absolute value of lowest value + 1 to avoid negative ratios if(any(na.omit(y$value)<0)) y$value=y$value+abs(min(y$value,na.rm=T))+1 else(y$value=y$value) #If direction is negative, replace with inverse of values if(unique(y$Direction)==1) y$value=y$value else(y$value=1/y$value) z=cbind(y[,1:8],richness=as.numeric(gsub("\\D","",y$variable)),value=y$value/y[y$variable=="Y1","value"]) z=z[!is.na(z[,"value"]),] } ) #Remove all studies with <2 levels of richness marine.reduced=ddply(marine.reduced,1:9,function(x) if(length(x$value)<3) NULL else x) #Visually inspect the raw line plots (10" x 30") # do.call(grid.arrange,lapply(list("Production","Consumption","Biogeochemical Fluxes"),function(i) { # ggplot(subset(marine.reduced,Ycat==i),aes(x=richness,y=value))+geom_line()+geom_point()+facet_wrap(~Entry,scales="free")+ # geom_text(aes(x=1,y=Inf,label=Ydesc),hjust=0,vjust=1,size=4)+ # labs(title=paste(i))+theme_bw(base_size=18)+theme(strip.background=element_blank()) # } ) ) #Write function to fit models corresponding to different functional forms modFit=function(df) { df=df[!is.na(df$value) & !is.infinite(df$value),] df=groupedData(value~richness|Study.id.marine,data=df) #Fit different models Null=nlme(value~a,fixed=a~1,random=~a~1,start=c(a=0.2),data=df) Linear=nlme(value~a+b*richness,fixed=a+b~1,random=~a+b~1,start=c(a=1.5,b=1),data=df) Logarithmic=nlme(value~a+b*log(richness),fixed=a+b~1,random=~a+b~1,start=c(a=1,b=1),data=df) Power=nlme(value~a*richness^b,fixed=a+b~1,random=~a+b~1,start=c(a=0.2,b=2),data=df) #Exponential=nlme(value~exp(a+b*richness),fixed=a+b~1,random=~a+b~1,start=c(a=1,b=1),data=df) Saturating=nlme(value~(max(value)*richness)/(k+richness),fixed=k~1,random=k~1,start=c(k=1),data=df) #Return models in list return(list(Null=Null, Linear=Linear, Logarithmic=Logarithmic, Power=Power, #Exponential=Exponential, Saturating=Saturating) ) } #Write function to extract AIC values and weights from the model list obtained from function modFit getAICtab=function(modList) { mixedAICs=data.frame(AIC=round(sapply(modList,AICc),2)) mixedAICs=within(mixedAICs, { deltaAIC=round(AIC-min(AIC),1) modLik=exp(-0.5*deltaAIC) AICweight=round(modLik/sum(modLik),3) } ) } #Run for marine.reduced dataset and subset by Ycat mods=dlply(marine.reduced,"Ycat",modFit) lapply(mods,getAICtab) #Get parameter estimates for best models coefs=lapply(seq_along(mods),function(i) { df=dlply(marine.reduced,"Ycat")[[i]] df=df[!is.na(df$value) & !is.infinite(df$value),] df=groupedData(value~richness|Study.id.marine,data=df) bestmod.name=names(which.min(lapply(mods[[i]],AIC))) best.mod=mods[[i]][[bestmod.name]] #update(best.mod,data=df,start=c(a=fixef(best.mod)[1],b=fixef(best.mod)[2]),method="REML",verbose=T, # control=nlmeControl(maxIter=1000)) summary(best.mod)$tTable } ) names(coefs)=names(mods); coefs #Reorder objects for plotting mods=mods[c("Production","Consumption","Biogeochemical Fluxes")] do.call(grid.arrange,lapply(1:2,function(i) { #Do not plot Biogeochemical fluxes #First, extract the best model bestmod.name=names(which.min(lapply(mods[[i]],AIC))) bestmod=mods[[i]][[bestmod.name]] #Get the data for that model df=dlply(marine.reduced,"Ycat")[[names(mods)[i]]] #Create dataframe of predicted values for richness in increments of 0.1 to max level of richness for each experiment pred=do.call(rbind,lapply(unique(df$Study.id.marine),function(j) { data.frame(richness=seq(0.01,max(df[df$Study.id.marine==j,"richness"]),0.01),Study.id.marine=j) } ) ) #Grab coefficients from model for a and b a=coef(bestmod)[match(pred$Study.id.marine,rownames(coef(bestmod))),1] b=coef(bestmod)[match(pred$Study.id.marine,rownames(coef(bestmod))),2] #Grab fixed coefficients for overall trend and fitted values for individual studies using a and b above if(bestmod.name=="Linear") { pred$fixed=fixef(bestmod)[1]+fixef(bestmod)[2]*pred$richness pred$fitted=a+pred$richness*b } else if(bestmod.name=="Logarithmic") { pred$fixed=fixef(bestmod)[1]+fixef(bestmod)[2]*log(pred$richness) pred$fitted=a+b*log(pred$richness) } else if(bestmod.name=="Power") { pred$fixed=fixef(bestmod)[1]*pred$richness^fixef(bestmod)[2] pred$fitted=a*pred$richness^b } else if(bestmod.name=="Exponential") { pred$fixed=exp(fixef(bestmod)[1]+fixef(bestmod)[2]*pred$richness) pred$fitted=exp(a+b*pred$richness) } else { pred$fixed=(max(df$value)*pred$richness)/(fixef(bestmod)[1]+pred$richness) pred$fitted=(max(df$value)*pred$richness)/(a+pred$richness) } #Bootstrap 95% Confidence Intervals #Set number of bootstraps B=999 #Create dataframe to store predicted values boot.mat=matrix(NA,nrow=nrow(pred),ncol=B) #Get bootstrapped values and store in a matrix (include progress bar) pb=txtProgressBar(min=0,max=B,style=3) for(j in 1:B) { df2=df[sample(nrow(df),replace=T),] df2=df2[order(df2[,"richness"]),] df2=groupedData(value~richness|Study.id.marine,data=df2) bestmod2=try(update(bestmod,data=df2),TRUE) ifelse(isTRUE(class(bestmod2)=="try-error"),next,bestmod2) if(bestmod.name=="Linear") { boot.mat[,j]=fixef(bestmod2)[1]+fixef(bestmod2)[2]*pred$richness } else if(bestmod.name=="Logarithmic") { boot.mat[,j]=fixef(bestmod2)[1]+fixef(bestmod2)[2]*log(pred$richness) } else if(bestmod.name=="Power") { boot.mat[,j]=fixef(bestmod2)[1]*pred$richness^fixef(bestmod2)[2] } else if(bestmod.name=="Exponential") { boot.mat[,j]=exp(fixef(bestmod2)[1]+fixef(bestmod2)[2]*pred$richness) } else { boot.mat[,j]=(max(df$value)*pred$richness)/(fixef(bestmod2)[1]+pred$richness) } setTxtProgressBar(pb,j) } close(pb) #Extract 95% CIs on bootstrapped values CIboot.mat=adply(boot.mat,1,function(x) quantile(x,prob=c(0.025,0.5,0.975),na.rm=T) ) colnames(CIboot.mat)=c("iter","LL","M","UL") CIboot.mat$richness=pred[,1] #Prep dataframe for plotting boot.mat2=melt(boot.mat) colnames(boot.mat2)=c("index","B","value") boot.mat2$richness=pred$richness #Get rid of richness values that are not represented in the dataset (speed up plotting) pred=subset(pred,max(df$richness)>richness & richness>=1) boot.mat2=subset(boot.mat2,max(df$richness)>richness & richness >=1) CIboot.mat=subset(CIboot.mat,max(df$richness)>richness & richness >=1) #Vertical cross section density estimates for smoothing min_value=min(min(boot.mat2$value,na.rm=T),min(df$value,na.rm=T)) max_value=max(max(boot.mat2$value,na.rm=T),max(df$value,na.rm=T)) ylim=c(min_value,max_value) #Plot the results ggplot()+geom_point(data=df,aes_string(x='richness',y='value'),col="grey50",alpha=0.5)+ geom_line(data=pred,aes_string(x='richness',y='fitted',group='Study.id.marine'),col="grey70")+ geom_ribbon(data=CIboot.mat,aes_string(x='richness',ymax='UL',ymin='LL'),fill="red",alpha=0.2)+ geom_line(data=pred,aes_string(x='richness',y='fixed'),col="black",alpha=0.9,lwd=1.2)+ geom_line(data=CIboot.mat,aes_string(x='richness',y='UL'),color="red",lwd=1,lty=2)+ geom_line(data=CIboot.mat,aes_string(x='richness',y='LL'),color="red",lwd=1,lty=2)+ scale_x_continuous(breaks=seq(from=1,to=max(df$richness),by=2))+ labs(x="\nRichness",y=expression(paste(Y[s]," / ",Y[mono],"\n")),title=paste(LETTERS[i],") ",unique(df$Ycat),sep=""))+ theme_bw(base_size=18)+theme(legend.position="none",plot.title=element_text(size=18,hjust=0,vjust=1), panel.grid.major=element_blank(),panel.grid.minor=element_blank()) } ) ) #6" x 12"