### SETUP (run this first) ############################################################## rm(list=ls()) # clear R environment cat("\014") # clear R console dev.off() # clear R plots getwd() # display the working directory setwd("/Users/robertholmberg/Desktop/R") # set the working directory library(gvlma) # load gvlma pkg for regression diagnostics library(car) # load car pkg for Box-Tidwell library(BaylorEdPsych) # load BaylorEdPsych pkg for glm pseudo R2 library(faraway) # load faraway pkg for half-normal plots library(psych) # load psych pkg for PCA clarksmeans <- read.csv("clarksmeans.csv") quadr <- (subset(clarksmeans, OTIE == "NONE", select = pCO2))^2 # create pCO2 squared variable ### MORTALITY ########################################################################### MORTframe <- data.frame(subset(clarksmeans, OTIE == "NONE", select = c(pCO2, MORT, REMAINING, STOCK))) # create dataframe for mortality data # GLM # plot(MORT/STOCK ~ pCO2, data = MORTframe) # display scatterplot mortprop <- cbind(MORTframe$MORT, MORTframe$REMAINING) # create proportion (successes, failures) boxTidwell(mortprop ~ pCO2, data = MORTframe) # perform Box-Tidwell; tests for nonlinearity of independent variable vs. logit transformation of dependent variable (log odds) MORTglm <- glm(mortprop ~ pCO2, data = MORTframe, family = binomial) # fit binomial logistic regression as generalized linear model halfnorm(residuals(MORTglm)) # create half-normal plot to check for outliers summary(MORTglm) # display summary statistics MORTglm$deviance/MORTglm$df.residual # appreciably >1 indicates overdispersion pchisq(sum(residuals(MORTglm, type = "pearson")^2), MORTglm$df.residual, lower = F) # hypothesis test for overdispersion MORTglmqb <- glm(mortprop ~ pCO2, data = MORTframe, family = quasibinomial) # fit quasibinomial model summary(MORTglmqb) # display summary statistics anova(MORTglmqb, test = "F") # p-value for quasibinomial model MORTglmqbq <- update(MORTglmqb, ~ . + quadr) # fit polynomial (quadratic) quasibinomial model anova(MORTglmqb, MORTglmqbq, test = "F") # compare linear and polynomial models ### SETTLEMENT ########################################################################## SETframe <- data.frame(subset(clarksmeans, OTIE == "NONE", select = c(pCO2, SETTLED, UNSETTLED, REMAINING))) # create dataframe for settlement data # GLM # plot(SETTLED/REMAINING ~ pCO2, data = SETframe) # display scatterplot setprop <- cbind(SETframe$SETTLED, SETframe$UNSETTLED) # create proportion (successes, failures) boxTidwell(setprop ~ pCO2, data = SETframe) # perform Box-Tidwell; tests for nonlinearity of independent variable vs. logit transformation of dependent variable (log odds) SETglm <- glm(setprop ~ pCO2, data = SETframe, family = binomial) # fit binomial logistic regression as generalized linear model halfnorm(residuals(SETglm)) # create half-normal plot to check for outliers summary(SETglm) # display summary statistics SETglm$deviance/SETglm$df.residual # appreciably >1 indicates overdispersion pchisq(sum(residuals(SETglm, type = "pearson")^2), SETglm$df.residual, lower = F) # hypothesis test for overdispersion SETglmq <- update(SETglm, ~ . + quadr) # fit polynomial model anova(SETglm, SETglmq, test = "Chisq") # compare linear and polynomial models anova(SETglm, test = "Chisq") # p-value for binomial logistic regression PseudoR2(SETglm) # Nagelkerke's R2 exp(SETglm$coef[2])-1 # beta (exp(SETglm$coef[2])-1)*100 # beta*100 SETglmCI <- confint(SETglm) # 95% confidence intervals exp(SETglmCI[2, 1])-1 # lwr CI exp(SETglmCI[2, 2])-1 # upr CI (exp(SETglmCI[2, 1])-1)*100 # lwr CI*100 (exp(SETglmCI[2, 2])-1)*100 # upr CI*100 ### STANDARD LENGTH ##################################################################### SLframe <- data.frame(subset(clarksmeans, OTIE == "NONE", select = c(pCO2, SL))) # create dataframe for standard length data # LM # plot(SL ~ pCO2, data = SLframe) # display scatterplot SLlm <- lm(SL ~ pCO2, data = SLframe) # fit linear model SLqm <- update(SLlm, ~ . + quadr) # fit polynomial model anova(SLlm, SLqm) # compare linear and polynomial models gvlma(SLlm) # linear model diagnostics summary(SLlm) # display summary statistics 100*SLlm$coef[2] # beta*100 confint(SLlm, level = 0.95) # 95% confidence intervals 100*confint(SLlm, level = 0.95) # 95% confidence intervals*100 ### OTOLITH MORPHOLOGY ################################################################## ### LS ################################################################################## LSframe <- data.frame(subset(clarksmeans, OTIE == "LS", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for left sagittae data ### PCA ### LSframe2 <- data.frame(subset(LSframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix LScor <- cor(LSframe2) # create correlation matrix LSpca <- principal(LScor, nfactors = length(which(eigen(LScor)$values > 1)), rotate = "varimax") # principal component analysis print(LSpca$loadings, cutoff = 0.001) # print component loadings LSscores <- factor.scores(LSframe2, LSpca)$scores # extract component scores ### LM ### # RC1 # plot(LSscores[,1] ~ pCO2, data = LSframe) # display scatterplot LSlm1 <- lm(LSscores[,1] ~ pCO2, data = LSframe) # fit linear model LSqm1 <- update(LSlm1, ~ . + quadr) # fit polynomial model anova(LSlm1, LSqm1) # compare linear and polynomial models gvlma(LSlm1) # linear model diagnostics summary(LSlm1) # display summary statistics 100*LSlm1$coefficients[2] # beta*100 confint(LSlm1, level = 0.95) # 95% confidence intervals 100*confint(LSlm1, level = 0.95) # 95% confidence intervals*100 # RC2 # plot(LSscores[,2] ~ pCO2, data = LSframe) # display scatterplot LSlm2 <- lm(LSscores[,2] ~ pCO2, data = LSframe) # fit linear model LSqm2 <- update(LSlm2, ~ . + quadr) # fit polynomial model anova(LSlm2, LSqm2) # compare linear and polynomial models gvlma(LSlm2) # linear model diagnostics summary(LSlm2) # display summary statistics ### RS ################################################################################## RSframe <- data.frame(subset(clarksmeans, OTIE == "RS", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for right sagittae data ### PCA ### RSframe2 <- data.frame(subset(RSframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix RScor <- cor(RSframe2) # create correlation matrix RSpca <- principal(RScor, nfactors = length(which(eigen(RScor)$values > 1)), rotate = "varimax") # principal component analysis print(RSpca$loadings, cutoff = 0.001) # print component loadings RSscores <- factor.scores(RSframe2, RSpca)$scores # extract component scores ### LM ### # RC1 # plot(RSscores[,1] ~ pCO2, data = RSframe) # display scatterplot RSlm1 <- lm(RSscores[,1] ~ pCO2, data = RSframe) # fit linear model RSqm1 <- update(RSlm1, ~ . + quadr) # fit polynomial model anova(RSlm1, RSqm1) # compare linear and polynomial models gvlma(RSqm1) # linear model diagnostics summary(RSqm1) # display summary statistics confint(RSqm1, level = 0.95) # 95% confidence intervals # RC2 # plot(RSscores[,2] ~ pCO2, data = RSframe) # display scatterplot RSlm2 <- lm(RSscores[,2] ~ pCO2, data = RSframe) # fit linear model RSqm2 <- update(RSlm2, ~ . + quadr) # fit polynomial model anova(RSlm2, RSqm2) # compare linear and polynomial models gvlma(RSlm2) # linear model diagnostics summary(RSlm2) # display summary statistics ### LL ################################################################################## LLframe <- data.frame(subset(clarksmeans, OTIE == "LL", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for left lapilli data ### PCA ### LLframe2 <- data.frame(subset(LLframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix LLcor <- cor(LLframe2) # create correlation matrix LLpca <- principal(LLcor, nfactors = length(which(eigen(LLcor)$values > 1)), rotate = "varimax") # principal component analysis print(LLpca$loadings, cutoff = 0.001) # print component loadings LLscores <- factor.scores(LLframe2, LLpca)$scores # extract component scores ### LM ### # RC1 # plot(LLscores[,1] ~ pCO2, data = LLframe) # display scatterplot LLlm1 <- lm(LLscores[,1] ~ pCO2, data = LLframe) # fit linear model LLqm1 <- update(LLlm1, ~ . + quadr) # fit polynomial model anova(LLlm1, LLqm1) # compare linear and polynomial models gvlma(LLlm1) # linear model diagnostics summary(LLlm1) # display summary statistics # RC2 # plot(LLscores[,2] ~ pCO2, data = LLframe) # display scatterplot LLlm2 <- lm(LLscores[,2] ~ pCO2, data = LLframe) # fit linear model LLqm2 <- update(LLlm2, ~ . + quadr) # fit polynomial model anova(LLlm2, LLqm2) # compare linear and polynomial models gvlma(LLqm2) # linear model diagnostics summary(LLqm2) # display summary statistics confint(LLqm2, level = 0.95) # 95% confidence intervals ### RL ################################################################################## RLframe <- data.frame(subset(clarksmeans, OTIE == "RL", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for right lapilli data ### PCA ### RLframe2 <- data.frame(subset(RLframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix RLcor <- cor(RLframe2) # create correlation matrix RLpca <- principal(RLcor, nfactors = length(which(eigen(RLcor)$values > 1)), rotate = "varimax") # principal component analysis print(RLpca$loadings, cutoff = 0.001) # print component loadings RLscores <- factor.scores(RLframe2, RLpca)$scores # extract component scores ### LM ### # RC1 # plot(RLscores[,1] ~ pCO2, data = RLframe) # display scatterplot RLlm1 <- lm(RLscores[,1] ~ pCO2, data = RLframe) # fit linear model RLqm1 <- update(RLlm1, ~ . + quadr) # fit polynomial model anova(RLlm1, RLqm1) # compare linear and polynomial models gvlma(RLlm1) # linear model diagnostics summary(RLlm1) # display summary statistics # RC2 # plot(RLscores[,2] ~ pCO2, data = RLframe) # display scatterplot RLlm2 <- lm(RLscores[,2] ~ pCO2, data = RLframe) # fit linear model RLqm2 <- update(RLlm2, ~ . + quadr) # fit polynomial model anova(RLlm2, RLqm2) # compare linear and polynomial models gvlma(RLlm2) # linear model diagnostics summary(RLlm2) # display summary statistics 100*RLlm2$coefficients[2] # beta*100 confint(RLlm2, level = 0.95) # 95% confidence intervals 100*confint(RLlm2, level = 0.95) # 95% confidence intervals*100 ### LA ################################################################################## LAframe <- data.frame(subset(clarksmeans, OTIE == "LA", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for left asterisci data ### PCA ### LAframe2 <- data.frame(subset(LAframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix LAcor <- cor(LAframe2, use = "complete.obs") # create correlation matrix LApca <- principal(LAcor, nfactors = length(which(eigen(LAcor)$values > 1)), rotate = "varimax") # principal component analysis print(LApca$loadings, cutoff = 0.001) # print component loadings LAscores <- factor.scores(na.omit(LAframe2), LApca)$scores # extract component scores LAscores <- rbind("49" = LAscores[1,], "50" = c(NA, NA), LAscores[2:11,]) # re-append NAs ### LM ### # RC1 # plot(LAscores[,1] ~ pCO2, data = LAframe) # display scatterplot LAlm1 <- lm(LAscores[,1] ~ pCO2, data = LAframe) # fit linear model LAqm1 <- update(LAlm1, ~ . + quadr) # fit polynomial model anova(LAlm1, LAqm1) # compare linear and polynomial models gvlma(LAlm1) # linear model diagnostics summary(LAlm1) # display summary statistics # RC2 # plot(LAscores[,2] ~ pCO2, data = LAframe) # display scatterplot LAlm2 <- lm(LAscores[,2] ~ pCO2, data = LAframe) # fit linear model LAqm2 <- update(LAlm2, ~ . + quadr) # fit polynomial model anova(LAlm2, LAqm2) # compare linear and polynomial models gvlma(LAlm2) # linear model diagnostics summary(LAlm2) # display summary statistics 100*LAlm2$coefficients[2] # beta*100 confint(LAlm2, level = 0.95) # 95% confidence intervals 100*confint(LAlm2, level = 0.95) # 95% confidence intervals*100 ### RA ################################################################################## RAframe <- data.frame(subset(clarksmeans, OTIE == "RA", select = c(pCO2, AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for right asterisci data ### PCA ### RAframe2 <- data.frame(subset(RAframe, select = c(AREASL, PERIMSL, CIRC, LATD, VISC))) # create dataframe for correlation matrix RAcor <- cor(RAframe2) # create correlation matrix RApca <- principal(RAcor, nfactors = length(which(eigen(RAcor)$values > 1)), rotate = "varimax") # principal component analysis print(RApca$loadings, cutoff = 0.001) # print component loadings RAscores <- factor.scores(RAframe2, RApca)$scores # extract component scores ### LM ### # RC1 # plot(RAscores[,1] ~ pCO2, data = RAframe) # display scatterplot RAlm1 <- lm(RAscores[,1] ~ pCO2, data = RAframe) # fit linear model RAqm1 <- update(RAlm1, ~ . + quadr) # fit polynomial model anova(RAlm1, RAqm1) # compare linear and polynomial models gvlma(RAlm1) # linear model diagnostics summary(RAlm1) # display summary statistics # RC2 # plot(RAscores[,2] ~ pCO2, data = RAframe) # display scatterplot RAlm2 <- lm(RAscores[,2] ~ pCO2, data = RAframe) # fit linear model RAqm2 <- update(RAlm2, ~ . + quadr) # fit polynomial model anova(RAlm2, RAqm2) # compare linear and polynomial models gvlma(RAlm2) # linear model diagnostics summary(RAlm2) # display summary statistics