######################################################################################################
#                                                                                                     #
#             META-ANALYSIS OF MARINE BIODIVERSITY & ECOSYSTEM FUNCTIONING EXPERIMENTS                #
#                                                                                                     #
#######################################################################################################

#Author: Jon Lefcheck, Jarrett Byrnes, Lars Gamfeldt
#Last updated: 16 January 2014

#Please contact Lars Gamfeldt (lars.gamfeldt@marecol.gu.se), Jon Lefcheck (jslefche@vims.edu), or 
#Jarrett Byrnes (jarrett.byrnes@umb.edu) before utilizing any part of this script or accompanying data

#######################################################################################################
#                                        TABLE OF CONTENTS                                            #
#   Line  23: Required libraries                                                                      #
#   Line  32: Importing and formatting the data                                                       #
#   Line 118: Exploring the data                                                                      #
#   Line 170: Calculating summary log ratios                                                          #
#   Line 428: Functional form fitting                                                                 #
#                                                                                                     #
#######################################################################################################

library(car) #Calls: recode
library(ggplot2) #Calls: ggplot
library(gridExtra) #Calls: grid.arrange
library(MuMIn) #Calls: AICc
library(nlme) #Calls: lme
library(plyr) #Calls: ddply
library(reshape2) #Calls: melt, dcast

#######################################################################################################
#                                IMPORTING AND FORMATTING THE DATA                                    #
#######################################################################################################

#Read in marine data set
marine=read.table("./gamfeldt Appendix 2.txt")
#Remove rows where Ycat is not consumption/production/biogeo_flux
marine=droplevels(subset(marine,Ycat=="Consumption" | Ycat=="Production" | Ycat=="Biogeo_flux"))
#Remove all rows where response is not the final point in a time series
marine=droplevels(subset(marine,FinalT=="Y"))
#Extract additive studies (for inclusion later)
addmarine=droplevels(subset(marine,Des3!="S"))
marine=droplevels(subset(marine,Des3=="S"))

#Convert N, Y, and SD columns to numeric
#First, extract names of columns for response means, N, and SD
Y.colnames=colnames(marine)[ grep("Y",colnames(marine))[grep("Y",colnames(marine))>=78] ]
N.colnames=colnames(marine)[ grep("N",colnames(marine))[grep("N",colnames(marine))>=78] ]
SD.colnames=colnames(marine)[ grep("SD",colnames(marine))[grep("SD",colnames(marine))>=78] ]
#Convert response values to numeric
marine[,c(Y.colnames,N.colnames,SD.colnames)]=apply(marine[,c(Y.colnames,N.colnames,SD.colnames)],2,
                                                    function(x) as.numeric(as.character(x)) )

#Shift points so all values are positive (in the event any responses are negative, multiply by -1 and add max value)
marine[,Y.colnames]=t(apply(marine[,Y.colnames],1,function(x) { if(any(x<0,na.rm=TRUE)) -x+max(abs(x)) else x } ))

#Convert column "Direction" into numbers
marine=transform(marine,Direction=recode(Direction,"'Pos'='1';'Neg'='-1'"))
marine$Direction=as.numeric(levels(marine$Direction)[marine$Direction])

#Recalculate log response ratios for LR1 = ln(avg mono/poly), LR2 = ln(max mono/poly), LR3 = ln(pred mono/poly)
#First, extract vector of poly column names
polyY.colnames=colnames(marine)[ grep("Y",colnames(marine))[grep("Y",colnames(marine))>109] ]
polySD.colnames=colnames(marine)[ grep("SD",colnames(marine))[grep("SD",colnames(marine))>123] ]
polyN.colnames=colnames(marine)[ grep("N",colnames(marine))[grep("N",colnames(marine))>79] ]
#Now calculate log ratios
marine=cbind(marine,
             LR1=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$Y1 ), 
             VLR1=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD
                    ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2
                        apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N
                    ( ( marine[,"SD1"]^2 )/( (marine[,"Y1"]^2)*marine[,"N1"] ) ),
             LR2=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$YEmonoLR2 ),
             VLR2=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD
                      ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2
                          apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N
                    ( ( marine[,"SDEmonoLR2"]^2 )/( (marine[,"YEmonoLR2"]^2)*marine[,"NEmono"] ) ),
             LR3=log( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])/marine$YEmonoLR3 ),
             VLR3=( ( apply(marine[,polySD.colnames],1,function(x) rev(x[is.finite(x)])[1])^2 )/ #Poly SD
                      ( ( apply(marine[,polyY.colnames],1,function(x) rev(x[is.finite(x)])[1])^2)* #Poly mean^2
                          apply(marine[,polyN.colnames],1,function(x) rev(x[is.finite(x)])[1]) ) )+ #Poly N
               ( ( marine[,"SDEmonoLR3"]^2 )/( (marine[,"YEmonoLR3"]^2)*marine[,"NEmono"] ) ) )
  
#Multiply sign by hypothesized direction
#First, determine whether all responses are negative
marine$Reverse=apply(marine[,Y.colnames],1,function(x) sum(na.omit(x))>0)
#If Reverse=="TRUE" AND Direction=="-1" then multiply LR1 and LR3 by -1, otherwise do nothing
marine[,c("LR1","LR3")]=lapply(c("LR1","LR3"),function(i) ifelse(marine$Reverse=="TRUE" & marine$Direction=="-1",marine[,i]*-1,marine[,i]) )

#Write output to a .csv
#write.csv(marine,"Marine-meta LR1 LR2 LR3 with var--11-13-2013.csv")

#Transform some variables for more intuitive plotting
marine=transform(marine,Sys3=recode(Sys3,"'hard'='Hard Sub';'soft'='Soft Sub';'seagrass'='Seagrass';
                                   'pelagic'='Pelagic';'saltmarsh'='Salt Marsh'"))
marine$Sys3=factor(marine$Sys3,levels=rev(c("Salt Marsh","Pelagic","Seagrass","Soft Sub","Hard Sub")))

marine=transform(marine,FTG=recode(FTG,"'C'='Carnivore';'D'='Detritivore';'H'='Herbivore';
                                   'M'='Mixed';'P'='Primary Prod.'"))
marine$FTG=factor(marine$FTG,levels=rev(c("Mixed","Carnivore","Herbivore","Primary Prod.")))

marine=transform(marine,Ycat=recode(Ycat,
      "'Production'='Production';'Consumption'='Consumption';'Biogeo_flux'='Biogeochemical\nFluxes'"))

#Remove several entries because the expected direction of the diversity effect is not clear from
  #theory:
    #Bruno & O'Connor Entry 148
    #Blake & Duffy 2011 Entries re: Zostera and sessile inverts

marine=subset(marine,!Entry %in% c(148,202:203,207:208,212:213,217:218))

#write.csv(marine,"Marine meta final dataset.csv")

#######################################################################################################
#                                        EXPLORING THE DATA                                           #
#######################################################################################################

length(unique(marine$Study.id.marine)) #Number of studies
length(marine$Expt.id.marine) #Number of experiments
median(marine$Smax) #Median number of species number
ddply(marine,"Ycat",summarize,median(Smax))
range(marine$Smax) #Range of species number
ddply(marine,"Ycat",summarize,range(Smax))

#Look at distribution of studies and experiments by Ycat, FTG, etc.
#Create frequency tables for Ycat and FTG of experiments and studies
bar_FTG=count(marine,c("Study.id.marine","Ycat","FTG"))
bar_FTG=count(bar_FTG,c("Ycat","FTG")); names(bar_FTG)[3]="expt.freq"
study_bar_FTG=ddply(marine,c("Study.id.marine","Ycat","FTG"),nrow)
bar_FTG$study.freq=count(study_bar_FTG,c("Ycat","FTG"))$freq

ftg.plot=ggplot(bar_FTG,aes(x=FTG,y=study.freq,fill=Ycat,label=expt.freq))+geom_bar()+
  geom_text(position=position_stack(width=1),col="white",vjust=1)+labs(x="\nFocal Trophic Group",y="Number of studies\n")+
  theme_bw(base_size=18)+
  theme(legend.position="none",panel.grid.major=element_blank(),panel.grid.minor=element_blank(),
        panel.background=element_rect(fill="white"))

#Create frequency tables for Ycat and Sys3 of experiments and studies
bar_Sys3=count(marine,c("Study.id.marine","Ycat","Sys3"))
bar_Sys3=count(bar_Sys3,c("Ycat","Sys3")); names(bar_Sys3)[3]="expt.freq"
study_bar_Sys3=ddply(marine,c("Study.id.marine","Ycat","Sys3"),nrow)
bar_Sys3$study.freq=count(study_bar_Sys3,c("Ycat","Sys3"))$freq

sys3.plot=ggplot(bar_Sys3,aes(x=Sys3,y=study.freq,fill=Ycat,label=expt.freq))+geom_bar()+
  geom_text(position=position_stack(width=1),col="white",vjust=1)+labs(x="\nStudy System",y="")+theme_bw(base_size=18)+
  theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(),
        panel.background=element_rect(fill="white"),legend.position=c(0.6,0.8),legend.background=element_rect(fill=NA),legend.title=element_blank())

grid.arrange(ftg.plot,sys3.plot,nrow=1) #11" x 6"

#Map of studies and experiments
coord=ddply(marine,c("Study.id.marine","Expt.id.marine","Lat","Lon"),length)
coord=count(coord,c("Lat","Lon"))

coord=coord[order(coord$freq),]

#Plot map (7" x 4.5")
ggplot()+labs(x="",y="")+
  geom_polygon(data=map_data("world"),aes(x=long,y=lat,group=group),fill="grey75",col="grey75")+
  geom_point(data=coord,aes(x=Lon,y=Lat,fill=freq),shape=24,size=4.5,col="black",width=1.8,alpha=0.9)+
  scale_fill_gradient(high="dodgerblue4",low="dodgerblue",name="No.\nexpts")+
  theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank(),
        panel.background=element_rect(fill="white"),
        legend.position=c(0.1,0.35))

#######################################################################################################
#                                   CALCULATING SUMMARY LOG RATIOS                                    #
#######################################################################################################

#Calculate raw log ratios based on various factors
rawLR.list=lapply(list("Ycat", #By category
                       c("Ycat","FTG"), #By category and focal trophic group
                       c("Ycat","Sys3"), #By category and ecosystem type
                       c("Ycat","across_trophic","trophic_dir"), #By category and trophic direction
                       c("Ycat","FTG","Sys3")), #By category and focal trophic group and ecosystem type
                       function(j) { 
  ddply(marine,j,function(x) {
    LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3")
      data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] 
      data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])),
                 ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])),
                 ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } )
    LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df)
    return(LR.df)
} ) } )
names(rawLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & Trophic Direction","Ycat & FTG & Sys3")
#Rename LR1 and LR3
for(i in seq_along(rawLR.list)) rawLR.list[[i]]=transform(rawLR.list[[i]],Log.ratio=recode(Log.ratio,"'LR1'='LRnet';'LR3'='LRext'")) 
#Set levels for plotting
for(i in seq_along(rawLR.list)) rawLR.list[[i]][,"Log.ratio"]=factor(rawLR.list[[i]][,"Log.ratio"],levels=c("LRnet","LRext"))
#lapply(seq_along(rawLR.list),function(i) write.csv(rawLR.list[[i]],paste("Raw LRs by ",names(rawLR.list)[i],".csv",sep="")) )

#Plot raw LRs by category
plot.list=lapply(rawLR.list,function(i) {
  #First change levels of factors to expression for subscripts on LRs
  levels(i$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext])))
  if("FTG" %in% names(i) & "Sys3" %in% names(i)) {
    levels(i$FTG)=c(expression(paste("Primary Prod.")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed")))
    levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh")))
  } else if("across_trophic" %in% names(i) & !"Sys3" %in% names(i)) {
    levels(i$across_trophic)=c(expression(paste("Between")),expression(paste("Within")))
    levels(i$trophic_dir)=c(expression(paste("Bottom-up")),expression(paste("Neither")),expression(paste("Top-down")))
    } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) {
    levels(i$FTG)=c(expression(paste("Primary Prod")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed")))
  } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) {
    levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh")))
  } else { }
  #Next create initial plot
  p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+
    geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+
    geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+
    geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+
    scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+
    scale_color_manual(values=c("red","blue","green3"))+
    scale_fill_manual(values=c("red","blue","green3"))+
    scale_shape_manual(values=c(21,24,22))+
    coord_flip()+theme_bw(base_size=24)
  #Now build out facets depending on which factors are in the dataframe
  if("FTG" %in% names(i) & "Sys3" %in% names(i)) {
    p=p+facet_grid(Sys3+FTG~Log.ratio,labeller=label_parsed)
  } else if("across_trophic" %in% names(i)) {
    p=p+facet_grid(across_trophic+trophic_dir~Log.ratio,labeller=label_parsed)
  } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) {
    p=p+facet_grid(FTG~Log.ratio,labeller=label_parsed)
  } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) {
    p=p+facet_grid(Sys3~Log.ratio,labeller=label_parsed)
  } else { p=p+facet_grid(~Log.ratio,labeller=label_parsed) }
  p=p+geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+
    theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(),
      panel.grid.minor=element_blank())
  gt=ggplot_gtable(ggplot_build(p))
  gt$layout$clip[grep("panel",gt$layout$name)]="off"
  return(gt)
} )

#This overwrites any object in the plotting space so need to save after each one!
grid.draw(plot.list[[1]]) #6" x 5"
grid.draw(plot.list[[2]]) #8" x 10.5"
grid.draw(plot.list[[3]]) #8" x 12"
grid.draw(plot.list[[4]]) #8" x 8"
grid.draw(plot.list[[5]])

#Make separate plots for Ygen=="RD" and real_flux=="Y"
marine.Ygen=marine
levels(marine.Ygen$Ygen)=c("RD","SST/SSR","SST/SSR-1","SST/SSR")

rawLR.Ygen=ddply(marine.Ygen,c("Ycat","Ygen"),function(x) {
  LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3")
    data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] 
    data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])),
               ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])),
               ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } )
  LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df)
  return(LR.df) } )

rawLR.real_flux=ddply(marine,c("Ycat","real_flux"),function(x) {
  LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3")
    data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] 
    data.frame(Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])),
               ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])),
               ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } )
  LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df)
  return(LR.df) } )

#Rename levels for plotting
levels(rawLR.Ygen$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext])))
levels(rawLR.Ygen$Ygen)=c(expression(paste("Rates")),expression(paste("States")),expression(paste("SST/SSR-1"))) 

levels(rawLR.real_flux$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext])))
levels(rawLR.real_flux$real_flux)=c(expression(paste("States")),expression(paste("Rates")))

#Plot LRs for categories: 9" x 6"
ggplot(rawLR.Ygen,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+
  geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+
  geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+
  geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+
  scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+
  scale_color_manual(values=c("red","blue","green3"))+
  scale_fill_manual(values=c("red","blue","green3"))+
  scale_shape_manual(values=c(21,24,22))+
  coord_flip()+theme_bw(base_size=24)+
  facet_grid(Ygen~Log.ratio,labeller=label_parsed)+
  geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+
  theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

#Plot LRs for stocks vs real fluxes: 8" x 6"
ggplot(rawLR.real_flux,aes(y=Value,x=Ycat,col=Ycat,fill=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+
  geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+
  geom_point(size=9,alpha=0.7)+#geom_point(size=9,width=2,col="black",fill=NA)+
  geom_errorbar(col="black",lwd=1,width=0.1)+labs(x="",y="")+scale_y_continuous(limits=c(-1.2,2.15))+
  scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+
  scale_color_manual(values=c("red","blue","green3"))+
  scale_fill_manual(values=c("red","blue","green3"))+
  scale_shape_manual(values=c(21,24,22))+
  coord_flip()+theme_bw(base_size=24)+
  facet_grid(real_flux~Log.ratio,labeller=label_parsed)+
  geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+
  theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

# #Fit model from Hedges et al. to obtain variance-weighted estimates of log ratios by category
# estLR.list=lapply(list("Ycat",c("Ycat","FTG"),c("Ycat","Sys3"),c("Ycat","FTG","Sys3")),function(j) {
#   ddply(marine,j,function(x) {
#     LR.list=lapply(list("LR1","LR2","LR3"),function(i) {   
#         data=x[!is.na(x[,i]) & !is.infinite(x[,i]),]  
#         if(length(unique(data$Study.id.marine))>1) {
#           mod=rrma(formula(paste(i,"~1")),data=data,study_id=data$Study.id.marine,var_eff_size=data[,paste("V",i,sep="")],rho=0.2)
#           data.frame(Value=coef(mod)[1],n.expt=mod$n,n.studies=mod$k,ci.lb=mod$est[6],ci.ub=mod$est[7]) 
#         } else {
#           mod=rma(data[,i],data[,paste("V",i,sep="")])
#           data.frame(Value=coef(mod)[1],n.expt=mod$k,n.studies=1,ci.lb=mod$ci.lb,ci.ub=mod$ci.ub) } 
#         } )
#     LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR2","LR3"),LR.df)
#     return(LR.df) 
#   } ) 
# } )
# names(estLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & FTG & Sys3")
# #lapply(seq_along(estLR.list),function(i) write.csv(estLR.list[[i]],paste("Model LRs by ",names(estLR.list)[i],".csv",sep="")) )
# 
# #Plot estimated LRs by category
# lapply(estLR.list,function(i) {
#   p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+geom_point(size=9,alpha=0.8)+geom_linerange(lwd=1.2)+
#     geom_hline(xintercept=0,lwd=1.3,lty=2)+labs(x="",y="")+coord_flip()+theme_bw(base_size=24)
#   if("FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     p+facet_grid(Sys3+FTG~Log.ratio)
#   } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) {
#     p+facet_grid(FTG~Log.ratio)
#   } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     p+facet_grid(Sys3~Log.ratio)
#   } else { p+facet_wrap(~Log.ratio) }
# } )
# 
# #Calculate variance-weighted log ratios partitioned by category
# varLR.list=lapply(list("Ycat",c("Ycat","FTG"),c("Ycat","Sys3"),c("Ycat","FTG","Sys3")),function(j) { 
#   ddply(marine,j,function(x) {
#     LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3")
#       data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] 
#       w=1/data[,paste("V",i,sep="")]
#       Q=sum(w*data[,i]^2)-(sum(w*data[,i])^2)/sum(w)
#       varhat=(Q-(length(data[,i])-1))/
#         ( sum(w)-(sum(w^2)/sum(w)) )
#       Wstar=1/(data[,i]+varhat)
#       data.frame(Value=sum(Wstar*data[,i])/sum(Wstar),
#                  n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])),
#                  ci.lb=sum(Wstar*data[,i])/sum(Wstar)-qt(0.975,length(data[,i])-1)*sqrt(abs(1/sum(Wstar))),
#                  ci.ub=sum(Wstar*data[,i])/sum(Wstar)+qt(0.975,length(data[,i])-1)*sqrt(abs(1/sum(Wstar)))) } )
#     LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df)
#     return(LR.df)
#   } ) } )
# names(varLR.list)=c("Ycat","Ycat & FTG","Ycat & Sys3","Ycat & FTG & Sys3")
# #Rename LR1 and LR3
# for(i in seq_along(varLR.list)) varLR.list[[i]]=transform(varLR.list[[i]],Log.ratio=recode(Log.ratio,"'LR1'='LRnet';'LR3'='LRext'")) 
# #Set levels for plotting
# for(i in seq_along(varLR.list)) varLR.list[[i]][,"Log.ratio"]=factor(varLR.list[[i]][,"Log.ratio"],levels=c("LRnet","LRext"))
# #lapply(seq_along(varLR.list),function(i) write.csv(varLR.list[[i]],paste("Raw LRs by ",names(varLR.list)[i],".csv",sep="")) )
# 
# #Plot raw LRs by category
# plot.list=lapply(varLR.list[1:3],function(i) {
#   #First change levels of factors to expression for subscripts on LRs
#   levels(i$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext])))
#   if("FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     levels(i$FTG)=c(expression(paste("Primary Prod.")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed")))
#     levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh")))
#   } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) {
#     levels(i$FTG)=c(expression(paste("Primary Prod")),expression(paste("Herbivore")),expression(paste("Carnivore")),expression(paste("Mixed")))
#   } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     levels(i$Sys3)=c(expression(paste("Hard Sub")),expression(paste("Soft Sub")),expression(paste("Seagrass")),expression(paste("Pelagic")),expression(paste("Salt Marsh")))
#   } else { }
#   #Next create initial plot
#   p=ggplot(i,aes(y=Value,x=Ycat,col=Ycat,shape=Ycat,ymin=ci.lb,ymax=ci.ub))+geom_hline(xintercept=0,lwd=0.8,lty=2)+
#     geom_point(size=9,alpha=0.6)+geom_linerange(lwd=1.2)+labs(x="",y="")+scale_y_continuous(limits=c(-6,7.05))+
#     scale_x_discrete(labels=c("Biogeochemical\nfluxes","Consumption","Production"))+
#     scale_color_manual(values=c("red","blue","green3"))+
#     coord_flip()+theme_bw(base_size=24)
#   #Now build out facets depending on which factors are in the dataframe
#   if("FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     p=p+facet_grid(Sys3+FTG~Log.ratio,labeller=label_parsed)
#   } else if("FTG" %in% names(i) & !"Sys3" %in% names(i)) {
#     p=p+facet_grid(FTG~Log.ratio,labeller=label_parsed)
#   } else if(!"FTG" %in% names(i) & "Sys3" %in% names(i)) {
#     p=p+facet_grid(Sys3~Log.ratio,labeller=label_parsed)
#   } else { p=p+facet_grid(~Log.ratio,labeller=label_parsed) }
#   p=p+geom_text(aes(x=Ycat,y=Value,label=paste(n.studies," (",n.expt,")",sep="")),hjust=0.45,vjust=2.3)+
#     theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(),
#           panel.grid.minor=element_blank())
#   gt=ggplot_gtable(ggplot_build(p))
#   gt$layout$clip[grep("panel",gt$layout$name)]="off"
#   return(gt)
# } )
# 
# #This overwrites any object in the plotting space so need to save after each one!
# grid.draw(plot.list[[1]]) #9" x 4.5"
# grid.draw(plot.list[[2]]) #10" x 12"
# grid.draw(plot.list[[3]]) #10" x 14"
# grid.draw(plot.list[[4]])  

#Look at effect of studies with low 'observer degrees of freedom'
marine.sensitivity=droplevels(subset(marine,marine$Ycat=="Consumption" & FTG=="Primary Prod."))
#Get LR1 and LR3 after dropping each study
jackknife.df=ldply(levels(marine.sensitivity$Study),function(d) {
  x=subset(marine.sensitivity,Study!=d)
  LR.list=lapply(list("LR1","LR3"),function(i) { #list("LR1","LR2","LR3")
    data=x[!is.na(x[,i]) & !is.infinite(x[,i]),] 
    data.frame(Study=unique(marine.sensitivity[marine.sensitivity$Study==d,"Ref"]),
               Value=mean(data[,i]),n.expt=length(data[,i]),n.studies=length(unique(data[,"Study.id.marine"])),
               ci.lb=mean(data[,i])-qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])),
               ci.ub=mean(data[,i])+qt(0.975,length(data[,i]))*sd(data[,i])/sqrt(length(data[,i])) ) } )
  LR.df=do.call(rbind,LR.list); LR.df=cbind(Log.ratio=c("LR1","LR3"),LR.df)
  return(LR.df) } ) 
#Relabel for plotting
levels(jackknife.df$Log.ratio)=c(expression(paste(LR[net])),expression(paste(LR[ext])))
levels(jackknife.df$Study)[1]=c("Bracken & Stachowicz,\n Ecology 2006")
#Plot LR confidence intervals: 9" x 6"
ggplot(data=jackknife.df, aes(x=Study, y=Value, ymin=ci.lb, ymax=ci.ub, color=Study))+
  geom_hline(xintercept=0,lwd=0.8,lty=1,col="grey90")+
  geom_point(size=9,alpha=0.7)+
  geom_errorbar(col="black",lwd=1,width=0.1)+
  facet_grid(~Log.ratio,labeller=label_parsed)+labs(x="",y="")+
  coord_flip()+theme_bw(base_size=24)+
  theme(panel.margin=unit(1,"lines"),legend.position="none",panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())
 
#######################################################################################################
#                                 FUNCTIONAL FORM OF BEF RELATIONSHIP                                 #
#######################################################################################################

#Subset data and create response as proportional change in functioning with each increase in richness
marine.reduced=ddply(subset(marine,Entry!=17),1,function(x) {  #Exclude Gamfeldt et al 2005 outlier
    y=melt(x,id.vars=c(4:6,9,13:14,16,24),measure.vars=c(109:120)) 
    #If any values are <0, then add the absolute value of lowest value + 1 to avoid negative ratios
    if(any(na.omit(y$value)<0)) y$value=y$value+abs(min(y$value,na.rm=T))+1 else(y$value=y$value)
    #If direction is negative, replace with inverse of values
    if(unique(y$Direction)==1) y$value=y$value else(y$value=1/y$value)
    z=cbind(y[,1:8],richness=as.numeric(gsub("\\D","",y$variable)),value=y$value/y[y$variable=="Y1","value"]) 
    z=z[!is.na(z[,"value"]),] } ) 
#Remove all studies with <2 levels of richness
marine.reduced=ddply(marine.reduced,1:9,function(x) if(length(x$value)<3) NULL else x)

#Visually inspect the raw line plots (10" x 30")
# do.call(grid.arrange,lapply(list("Production","Consumption","Biogeochemical Fluxes"),function(i) {
#   ggplot(subset(marine.reduced,Ycat==i),aes(x=richness,y=value))+geom_line()+geom_point()+facet_wrap(~Entry,scales="free")+
#     geom_text(aes(x=1,y=Inf,label=Ydesc),hjust=0,vjust=1,size=4)+
#     labs(title=paste(i))+theme_bw(base_size=18)+theme(strip.background=element_blank()) 
# } ) )

#Write function to fit models corresponding to different functional forms
modFit=function(df) {
  df=df[!is.na(df$value) & !is.infinite(df$value),]
  df=groupedData(value~richness|Study.id.marine,data=df)
  #Fit different models
  Null=nlme(value~a,fixed=a~1,random=~a~1,start=c(a=0.2),data=df)
  Linear=nlme(value~a+b*richness,fixed=a+b~1,random=~a+b~1,start=c(a=1.5,b=1),data=df)
  Logarithmic=nlme(value~a+b*log(richness),fixed=a+b~1,random=~a+b~1,start=c(a=1,b=1),data=df)
  Power=nlme(value~a*richness^b,fixed=a+b~1,random=~a+b~1,start=c(a=0.2,b=2),data=df)
  #Exponential=nlme(value~exp(a+b*richness),fixed=a+b~1,random=~a+b~1,start=c(a=1,b=1),data=df)
  Saturating=nlme(value~(max(value)*richness)/(k+richness),fixed=k~1,random=k~1,start=c(k=1),data=df)
  #Return models in list
  return(list(Null=Null,
              Linear=Linear, 
              Logarithmic=Logarithmic, 
              Power=Power, 
              #Exponential=Exponential, 
              Saturating=Saturating) ) }

#Write function to extract AIC values and weights from the model list obtained from function modFit
getAICtab=function(modList) {
  mixedAICs=data.frame(AIC=round(sapply(modList,AICc),2))
  mixedAICs=within(mixedAICs, {
    deltaAIC=round(AIC-min(AIC),1)
    modLik=exp(-0.5*deltaAIC)
    AICweight=round(modLik/sum(modLik),3) } ) } 

#Run for marine.reduced dataset and subset by Ycat
mods=dlply(marine.reduced,"Ycat",modFit)
lapply(mods,getAICtab)
#Get parameter estimates for best models
coefs=lapply(seq_along(mods),function(i) {
  df=dlply(marine.reduced,"Ycat")[[i]]
  df=df[!is.na(df$value) & !is.infinite(df$value),]
  df=groupedData(value~richness|Study.id.marine,data=df)
  bestmod.name=names(which.min(lapply(mods[[i]],AIC)))
  best.mod=mods[[i]][[bestmod.name]]
  #update(best.mod,data=df,start=c(a=fixef(best.mod)[1],b=fixef(best.mod)[2]),method="REML",verbose=T,
  #       control=nlmeControl(maxIter=1000))
  summary(best.mod)$tTable
  } )
names(coefs)=names(mods); coefs

#Reorder objects for plotting
mods=mods[c("Production","Consumption","Biogeochemical Fluxes")]
do.call(grid.arrange,lapply(1:2,function(i) { #Do not plot Biogeochemical fluxes
  #First, extract the best model
  bestmod.name=names(which.min(lapply(mods[[i]],AIC)))
  bestmod=mods[[i]][[bestmod.name]]
  #Get the data for that model
  df=dlply(marine.reduced,"Ycat")[[names(mods)[i]]]
  #Create dataframe of predicted values for richness in increments of 0.1 to max level of richness for each experiment
  pred=do.call(rbind,lapply(unique(df$Study.id.marine),function(j) {
    data.frame(richness=seq(0.01,max(df[df$Study.id.marine==j,"richness"]),0.01),Study.id.marine=j) } ) )
  #Grab coefficients from model for a and b
  a=coef(bestmod)[match(pred$Study.id.marine,rownames(coef(bestmod))),1]
  b=coef(bestmod)[match(pred$Study.id.marine,rownames(coef(bestmod))),2]
  #Grab fixed coefficients for overall trend and fitted values for individual studies using a and b above
  if(bestmod.name=="Linear") {
    pred$fixed=fixef(bestmod)[1]+fixef(bestmod)[2]*pred$richness 
    pred$fitted=a+pred$richness*b
  } else if(bestmod.name=="Logarithmic") {
    pred$fixed=fixef(bestmod)[1]+fixef(bestmod)[2]*log(pred$richness)
    pred$fitted=a+b*log(pred$richness)
  } else if(bestmod.name=="Power") {
    pred$fixed=fixef(bestmod)[1]*pred$richness^fixef(bestmod)[2]
    pred$fitted=a*pred$richness^b   
  } else if(bestmod.name=="Exponential") {
    pred$fixed=exp(fixef(bestmod)[1]+fixef(bestmod)[2]*pred$richness)
    pred$fitted=exp(a+b*pred$richness)
  } else {
    pred$fixed=(max(df$value)*pred$richness)/(fixef(bestmod)[1]+pred$richness)
    pred$fitted=(max(df$value)*pred$richness)/(a+pred$richness)
  }
  #Bootstrap 95% Confidence Intervals
  #Set number of bootstraps
  B=999
  #Create dataframe to store predicted values
  boot.mat=matrix(NA,nrow=nrow(pred),ncol=B)
  #Get bootstrapped values and store in a matrix (include progress bar)
  pb=txtProgressBar(min=0,max=B,style=3)
  for(j in 1:B) {
    df2=df[sample(nrow(df),replace=T),]
    df2=df2[order(df2[,"richness"]),]
    df2=groupedData(value~richness|Study.id.marine,data=df2)
    bestmod2=try(update(bestmod,data=df2),TRUE)
    ifelse(isTRUE(class(bestmod2)=="try-error"),next,bestmod2)
    if(bestmod.name=="Linear") {
      boot.mat[,j]=fixef(bestmod2)[1]+fixef(bestmod2)[2]*pred$richness 
    } else if(bestmod.name=="Logarithmic") {
      boot.mat[,j]=fixef(bestmod2)[1]+fixef(bestmod2)[2]*log(pred$richness)
    } else if(bestmod.name=="Power") {
      boot.mat[,j]=fixef(bestmod2)[1]*pred$richness^fixef(bestmod2)[2]
    } else if(bestmod.name=="Exponential") {
      boot.mat[,j]=exp(fixef(bestmod2)[1]+fixef(bestmod2)[2]*pred$richness)
    } else {
      boot.mat[,j]=(max(df$value)*pred$richness)/(fixef(bestmod2)[1]+pred$richness)
    } 
    setTxtProgressBar(pb,j) }
  close(pb)
  #Extract 95% CIs on bootstrapped values
  CIboot.mat=adply(boot.mat,1,function(x) quantile(x,prob=c(0.025,0.5,0.975),na.rm=T) )
  colnames(CIboot.mat)=c("iter","LL","M","UL")
  CIboot.mat$richness=pred[,1]
  #Prep dataframe for plotting
  boot.mat2=melt(boot.mat)
  colnames(boot.mat2)=c("index","B","value")
  boot.mat2$richness=pred$richness
  #Get rid of richness values that are not represented in the dataset (speed up plotting)
  pred=subset(pred,max(df$richness)>richness & richness>=1)
  boot.mat2=subset(boot.mat2,max(df$richness)>richness & richness >=1)
  CIboot.mat=subset(CIboot.mat,max(df$richness)>richness & richness >=1)
  #Vertical cross section density estimates for smoothing
  min_value=min(min(boot.mat2$value,na.rm=T),min(df$value,na.rm=T))
  max_value=max(max(boot.mat2$value,na.rm=T),max(df$value,na.rm=T))
  ylim=c(min_value,max_value)
  #Plot the results
  ggplot()+geom_point(data=df,aes_string(x='richness',y='value'),col="grey50",alpha=0.5)+    
    geom_line(data=pred,aes_string(x='richness',y='fitted',group='Study.id.marine'),col="grey70")+
    geom_ribbon(data=CIboot.mat,aes_string(x='richness',ymax='UL',ymin='LL'),fill="red",alpha=0.2)+
    geom_line(data=pred,aes_string(x='richness',y='fixed'),col="black",alpha=0.9,lwd=1.2)+
    geom_line(data=CIboot.mat,aes_string(x='richness',y='UL'),color="red",lwd=1,lty=2)+
    geom_line(data=CIboot.mat,aes_string(x='richness',y='LL'),color="red",lwd=1,lty=2)+
    scale_x_continuous(breaks=seq(from=1,to=max(df$richness),by=2))+
    labs(x="\nRichness",y=expression(paste(Y[s]," / ",Y[mono],"\n")),title=paste(LETTERS[i],") ",unique(df$Ycat),sep=""))+
    theme_bw(base_size=18)+theme(legend.position="none",plot.title=element_text(size=18,hjust=0,vjust=1),
                                 panel.grid.major=element_blank(),panel.grid.minor=element_blank())
} ) ) #6" x 12"