---
title: 'Mandarin Training Study: High versus Low Speaker Variability'
output:
  html_notebook:
    toc: yes
    toc_depth: 4
  word_document:
    toc: yes
    toc_depth: '4'
  html_document:
    df_print: paged
    toc: yes
    toc_depth: '4'
date: "July, 2018"
---
# Load Packages and Helper Functions

## Packages 

```{r}
rm(list=ls())

suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(lmerTest))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(Matrix))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(fmsb))
suppressPackageStartupMessages(library(irr))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(theme_set(theme_bw()))
opts_chunk$set(fig.width=8, fig.height=5, echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)
```

## Helper Functions

### logodds

This function takes a percentage and returns the log odds. 

```{r}
logodds <- function(p){log(p/(100-p))}  
```

### SummarySE

This function can be found on the website "Cookbook for R".

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper

It summarizes data, giving count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).

- data: a data frame
- measurevar: the name of a column that contains the variable to be summarized
- groupvars: a vector containing names of columns that contain grouping variables
- na.rm: a boolean that indicates whether to ignore NA's
- conf.interval: the percent range of the confidence interval (default is 95%)

```{r}
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE, conf.interval = .95, .drop = TRUE) {require(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm = FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop = .drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm = na.rm),
          mean = mean   (xx[[col]], na.rm = na.rm),
          sd   = sd     (xx[[col]], na.rm = na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}
```

### SummarySEwithin

This function can be found on the  website "Cookbook for R".

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper 

From that website:

Summarizes data, handling within-subject variables by removing inter-subject variability. It will still work if there are no within-subject variables. Gives count, un-normed mean, normed mean (with same between-group mean), standard deviation, standard error of the mean, and confidence interval. If there are within-subject variables, it calculates adjusted values using the method from Morey (2008).

- data: a data frame
- measurevar: the name of a column that contains the variable to be summarized
- betweenvars: a vector containing names of columns that are between-subjects variables
- withinvars: a vector containing names of columns that are within-subjects variables
- idvar: the name of a column that identifies each subject (or matched subjects)
- na.rm: a boolean that indicates whether to ignore NA's
- conf.interval: the percent range of the confidence interval (default is 95%)

```{r}
summarySEwithin <- function(data = NULL, measurevar, betweenvars = NULL, withinvars = NULL,
                            idvar = NULL, na.rm = FALSE, conf.interval = .95, .drop = TRUE) {

  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop = FALSE],
    FUN = is.factor, FUN.VALUE = logical(1))

  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }

  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars = c(betweenvars, withinvars),
                     na.rm = na.rm, conf.interval = conf.interval, .drop = .drop)

  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL

  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop = .drop)

  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep ="")

  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars = c(betweenvars, withinvars),
                      na.rm = na.rm, conf.interval = conf.interval, .drop = .drop)

  # Apply correction from Morey (2008) to the standard error and confidence interval
  # Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop = FALSE], FUN = nlevels,
                           FUN.VALUE = numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor

  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}
```

### normDataWithin

This function is used by the SummarySEWithin function above. It can be found on the website "Cookbook for R".

http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper

From that website:

Norms the data within specified groups in a data frame; it normalizes each subject (identified by idvar) so that they have the same mean, within each group specified by betweenvars.

- data: a data frame
- idvar: the name of a column that identifies each subject (or matched subjects)
- measurevar: the name of a column that contains the variable to be summarized
- betweenvars: a vector containing names of columns that are between-subjects variables
- na.rm: a boolean that indicates whether to ignore NAs

```{r}
normDataWithin <- function(data = NULL, idvar, measurevar, betweenvars = NULL,
                           na.rm = FALSE, .drop = TRUE) {
    #library(plyr)

    # Measure var on left, idvar + between vars on right of formula.
    data.subjMean <- ddply(data, c(idvar, betweenvars), .drop = .drop,
     .fun = function(xx, col, na.rm) {
        c(subjMean = mean(xx[,col], na.rm = na.rm))
      },
      measurevar,
      na.rm
    )

    # Put the subject means with original data
    data <- merge(data, data.subjMean)

    # Get the normalized data in a new column
    measureNormedVar <- paste(measurevar, "_norm", sep = "")
    data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
                               mean(data[,measurevar], na.rm = na.rm)

    # Remove this subject mean column
    data$subjMean <- NULL

    return(data)
}
```

### myCenter

This function outputs the centered values of a variable, which can be a numeric variable, a factor, or a data frame. It was taken from
Florian Jaeger's blog - https://hlplab.wordpress.com/2009/04/27/centering-several-variables/.

From his blog:

- If the input is a numeric variable, the output is the centered variable

- If the input is a factor, the output is a numeric variable with centered factor level values. That is, the factor's levels are converted into numerical values in their inherent order (if not specified otherwise,  R defaults to alphanumerical order). More specifically, this centers any binary factor so that the value below 0 will be the 1st level of the original factor, and the value above 0 will be the 2nd level.

- If the input is a data frame or matrix, the output is a new matrix of the same dimension and with the centered values and column names that correspond to the colnames() of the input preceded by "c" (e.g. "Variable1" will be "cVariable1").

```{r}
myCenter = function(x) {
  if (is.numeric(x)) { return(x - mean(x, na.rm = T)) }
	if (is.factor(x)) {
		x= as.numeric(x)
		return(x - mean(x, na.rm = T))
	}
	if (is.data.frame(x) || is.matrix(x)) {
		m= matrix(nrow = nrow(x), ncol = ncol(x))
		colnames(m)= paste("c", colnames(x), sep = "")
	
		for (i in 1:ncol(x)) {
		
			m[,i] = myCenter(x[,i])
		}
		return(as.data.frame(m))
	}
}
```

### lizCenter

This function provides a wrapper around myCenter allowing you to center a specific list of variables from a data frame. 

- x: data frame
- listfname: a list of the variables to be centered (e.g. list(variable1, variable2))

The output is a copy of the data frame with a column (always a numeric variable) added for each of the centered variables. These columns are labelled with the each column's previous name, but with ".ct" appended (e.g., "variable1" will become "variable1.ct").


```{r}
lizCenter = function(x, listfname) 
{
	for (i in 1:length(listfname)) 
	{
		fname = as.character(listfname[i])
		x[paste(fname,".ct", sep="")] = myCenter(x[fname])
	}
		
	return(x)
}
```	

### lizContrasts

This function can be used to create two centered dummy variables which stand in place of a three-way factor (condition). This allows us to inspect each contrast separately, as well as their interactions with other factors. Other fixed effects in the model can be evaluated as the average effects across all levels of the factor.

The function takes a data frame (d), a factor from that database (condition), which must have three levels, and the name of the level of the factor which is to be used as the baseline for the contrasts (base level).

For example, if d is a data frame with a factor "condition" with three levels "("lex_skew", "lex_noskew", and  "mixed") then lizContrasts(d, d$condition, "lex_no_skew") returns a data frame with two (numeric) columns added labelled  "lex_noskew_VERSUS_lex_mixed" and "lex_noskew_VERSUS_lex_skew". 

Wherever you would normally use "condition"in a formula in an LME, it can be replaced by (lex_noskew_VERSUS_lex_mixed + "lex_noskew_VERSUS_lex_skew) e.g. ~ (a * condition) becomes ~ (a *  (lex_noskew_VERSUS_lex_mixed + lex_noskew_VERSUS_lex_skew)).

```{r}
lizContrasts = function(d, condition, baselevel) 
{
  
	condition = factor(condition)
  condition = relevel(condition, baselevel)

	a = (contrasts(condition)-apply(contrasts(condition),2,mean))
	d$dummy1[condition == rownames(a)[1]] <- a[1] 
	d$dummy1[condition == rownames(a)[2]] <- a[2] 
	d$dummy1[condition == rownames(a)[3]] <- a[3] 
	d$dummy2[condition == rownames(a)[1]] <- a[4] 
	d$dummy2[condition == rownames(a)[2]] <- a[5] 
	d$dummy2[condition == rownames(a)[3]] <- a[6] 

	name1 = paste(baselevel, rownames(a)[2],sep ="_VERSUS_")
	name2 = paste(baselevel, rownames(a)[3],sep ="_VERSUS_")

	d[name1] = d$dummy1	
	d[name2] = d$dummy2	

	d$dummy1 <-NULL	
	d$dummy2 <-NULL	
	
	return(d)
}
```  

### filter2

This is a function which filters a column of data removing values which a certain number of standard deviations above/below the mean for that participant, possibly in some condition/sub-condition.

- im: the input matrix (a data frame)
- svn: a list of the names of factors to be group by (subject name + one or more conditions)
- fn: the name of the column containing the data to be filtered
- lim: how many standard deviations above/below the mean to filter

The function returns an input matrix identical to the original input matrix but with additional columns giving the group means and the "filtered" data

```{r}
filter2 = function(im, svn, fn, lim)
{
  ## work out means for each subject for each word

x = list()
y = ""

for(n in svn) x = append(im[n],x)
for(n in svn) y = paste(y,n,sep = "_")

means = aggregate(im[fn], by = x, mean, na.rm = T)
head(means)
nocols = dim(means)[2]
colnames(means)[nocols] = "means"

sds = aggregate(im[fn], by = x, sd, na.rm = T)
head(sds)
nocols = dim(sds)[2]
colnames(sds)[nocols] = "sds"

gs = merge(means,sds)

## because if there is just one value it does not have a standard deviation and we do not want to just disregard all of these
gs$sds[is.na(gs$sds)] = 0 

gs$max = gs$means + lim*gs$sds
gs$min = gs$means - lim*gs$sds

im2 = merge(im, gs, sort = F)

im2[paste(fn, "filt", sep = "")] = im2[fn]
cn = dim(im2)[2] ## get colnumber (last one added)

im2[,cn][im2[,fn] > im2$max] = ""

im2[,cn][im2[,fn] < im2$min] = ""

im2[,cn] = as.numeric(im2[,cn])

names(im2)[names(im2) == "means"] = paste("mean", y, sep = "_") 
names(im2)[names(im2) =="sds"] = paste("sd", y, sep = "_") 
names(im2)[names(im2) =="max"] = paste("max", y, sep = "_") 
names(im2)[names(im2) =="min"] = paste("min", y, sep = "_") 

return(im2)
}
```  

### get_coeffs

This function allows us to inspect particular coefficients from the output of an LME model by putting them in table.

- x: the output returned when running lmer or glmer (i.e. an object of type lmerMod or glmerMod)
- list: a list of the names of the coefficients to be extracted (e.g. c("variable1", "variable1:variable2")) 

```{r}
get_coeffs <- function(x,list){(as.data.frame(summary(x)$coefficients)[list,])}
``` 

# Load Datasets

```{r}
setwd("datafiles")
pcpt <- read.csv("PCPT.csv")
cstc <-read.csv("CSTC-curve.csv", sep = ",", stringsAsFactors = F) 
dis <- read.csv("Discrimination.csv")
train <- read.csv("Training.csv")
PI <- read.csv("Picture Identification.csv")
PRO <- read.csv("Production_all.csv")
setwd("..")
```
# Individual Aptitude Task 1: Pitch Contour Perception Test (Section 3.2.1)

## Figure (Figure 3)

```{r}
pcpt$condition = factor(pcpt$condition, levels = c("0", "1", "2"), labels = c("LV", "HV", "HVB"))
pcpt$session = factor(pcpt$session, levels = c("1", "2"), labels = c("Pre-test", "Post-test"))
means = with(pcpt, aggregate(accuracy ~ subject + session + condition, FUN = mean))

x <- summarySEwithin(means, measurevar = "accuracy", betweenvars = c("condition"), withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

```{r}
p1 = ggplot (means, aes(x = condition, y = accuracy, fill = session)) 
p1 = p1 + geom_violin(colour = "black", scale = "count")
p1 = p1 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
p1 = p1 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
p1 = p1 + geom_errorbar(aes(ymin = accuracy-ci, ymax = accuracy+ci), width = .4, position = position_dodge(.9), data = x)
p1 = p1 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
p1 = p1 +theme_bw()+ labs(x = "Variability Condition", y = "Mean Accuracy")
print(p1)
```

## Statistical Analysis (Section 3.2.1)


```{r}
pcpt = lizContrasts(pcpt, pcpt$condition, "LV")

pcpt_glmer1 = glmer(accuracy ~ 
              session * (LV_VERSUS_HV + LV_VERSUS_HVB)
              + (session|subject),
              family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = pcpt)
```


```{r}
kable (round(summary(pcpt_glmer1)$coefficients, 6), digits = 3)
```


```{r}
datapcpt <- summarySEwithin(means, measurevar = "accuracy", withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
kable(datapcpt, digits = 3)
```

# Individual Aptitude Task 2: Categorisation of Synthesized Tonal Continua (Section 3.2.2)

We analysed data from the CSTC task adapted from Sadakata and McQueen (2014). We first attempted to follow the procedures in their paper. Specifically, to quantify performance in this task, each subjects' categorization curve was fitted to a logistic function using the Logistic Curve Fit function in SPSS and a slope coefficient was calculated (Joanisse, Manis, Keating, & Seidenberg, 2000) which was taken to indicate the participants ability to categorize the two tones, with smaller slopes indicating better performance (perfect slope = 0.042). Sadakata and McQueen (2014), report that they removed any participant with a slope measure greater than 1.2 from the analysis, suggesting that slopes above this threshold were considered to be poorly fit. However following this process with our participants, the majority were above this threshold (43 out of 60). Given this, we attempted an alternative method of deriving slope coefficients for each participant using a logistic mixed effect model (Schultz, Llanos, & Francis, 2003) with the predicted variable being which of two tones the participants chose on each trial and the predictor being the tone step presented on each trial (varying from 1-6 where 1 is most like Tone 2, and 6 is most like Tone 3). Random intercepts and slopes for tone step were fit by participant and the individual slope coefficients for each participant were extracted from the by-participant random slopes fit in the model. Because the random slopes represent adjustments to the fixed effect slope, more positive slopes represent sharper categorization responses, i.e. more sensitivity to differences in tone step, while more negative slopes represent flatter categorization responses, or in extreme cases reversed responses, and slopes close to 0 reflect responses close to the mean. These slopes could thus be taken to be indicators of individual differences. When we compared these slopes to the ones derived from SPSS we found that they were very similar. The same participants who failed the criteria of Sadakata and McQueen also had very shallow slopes using the logistic regression method. The analyses conducted in this script were done with the individual difference measure extracted from the logistic regression.

This figure shows the individual curves for each group. It also shows the raw data that the slopes are based on.

```{r}
cstc$session = factor(cstc$session, levels = c("1","2"), labels = c("Pre-test", "Post-test"))
cstc$condition = factor(cstc$condition, levels = c("0", "1", "2"), labels = c("LV", "HV", "HVB"))

# Two participants showed the completely reversed pattern. Here we reverse the coding of these 2 participants
cstc$category[cstc$subject == "5"|cstc$subject == "36"] <- abs(cstc$category[cstc$subject == "5"|cstc$subject == "36"] - 1)

ggplot(cstc, aes(x = step, y = category)) + 
  stat_smooth(aes(group = subject), method = "glm", method.args = list(family = "binomial"), size = 0.5, se = FALSE, show.legend = FALSE, color = "#cce2ef") + # individual regression lines
  stat_smooth(method = "glm", method.args = list(family = "binomial"), size = 2, se = FALSE, show.legend = FALSE, color = "#B01326") + # group mean regression line
  ylim(0, 1) + scale_x_continuous(breaks = 1:7) + facet_grid(~condition) +
  xlab("tone step") + ylab("Proportion Tone X responses") + theme_bw() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
```

## Extract slope coffcient for each participant and descriptives

```{r}
cstc$session <- as.numeric(cstc$session)
cstc$session.std <- scale(cstc$session)
cstc$step.std <- scale(cstc$step)
f1 <-  factor(c("HV", "LV"))
df1 <- data.frame(f1, subject = 1:40)
f1 <-  factor("HVB")
df2 <- data.frame(f1, subject = 41:60)
df3 <- rbind.fill(df1, df2)

lmfits_pre <- glmer(category ~ step.std + (step.std|subject), data = cstc[cstc$session == 1,], family = "binomial", control = glmerControl(optimizer = "bobyqa"))
pre = ranef(lmfits_pre)$subject
pre = cbind (pre, df3)
pre$session = "Pre-test"

lmfits_post <- glmer(category ~ step.std + (step.std|subject), data = cstc[cstc$session == 2,], family = "binomial", control = glmerControl(optimizer = "bobyqa"))
post = ranef(lmfits_post)$subject
post = cbind (post, df3)
post$session = "Post-test"
cstc2 <- rbind(pre,post)

names(cstc2)[names(cstc2) == "step.std"] <- "slope"
names(cstc2)[names(cstc2) == "(Intercept)"] <- "intercept"
names(cstc2)[names(cstc2) == "f1"] <- "condition"

# reorder the levels so that they match previous data set
cstc2$condition = factor (cstc2$condition, levels =  c("LV","HV","HVB") )
cstc2$session = factor (cstc2$session, levels =  c("Pre-test","Post-test") )

means = with(cstc2, aggregate(slope ~ subject + session + condition, FUN = mean))
x <- summarySEwithin(cstc2, measurevar = "slope", betweenvars = c("condition"), withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)

kable(x, digits = 2)
```

## Graph

```{r}

p2 = ggplot (means, aes(x = condition, y = slope, fill = session)) 
p2 = p2 + geom_violin(colour = "black", scale = "count")
p2 = p2 + scale_fill_manual(name = "Test session", values = c("Pre-test" = "grey", "Post-test" = "white"))
p2 = p2 + scale_x_discrete(labels = c("Low Variability", "High Variability", "High Varaibility Blocking"))
p2 = p2 + geom_errorbar(aes(ymin = slope-ci, ymax = slope+ci), width = .4, position = position_dodge(.9), data = x)
p2 = p2 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
p2 = p2 + theme_bw()
print(p2)

rm(means)
rm(x)
```


## Statistical Analysis

```{r}
cstc2 = lizContrasts(cstc2, cstc2$condition, "LV")
cstc_lm = lm(slope ~ session * (LV_VERSUS_HV + LV_VERSUS_HVB) , data = cstc2)
```

Look at differences between conditions at pre-test:

```{r}
round (get_coeffs(cstc_lm,c("LV_VERSUS_HV","LV_VERSUS_HVB")),3)
```



# Look at the relationship between PCPT and CSTC and set up ID_pre table

```{r}
ID_pre = merge(
  
  with(subset(pcpt, session %in% "Pre-test"), aggregate(accuracy ~ subject + session + condition, FUN = mean)),
  
  with(subset(cstc2, session %in% "Pre-test"), aggregate(slope ~ subject + session + condition, FUN = mean)), by = "subject")
ID_pre$aptitude_pcpt = ID_pre$accuracy *10
ID_pre$session = NULL
cor.test(ID_pre$slope, ID_pre$accuracy)
```


## Correlation Figure

```{r}
ggplot(ID_pre, aes(x = accuracy, y = slope, color = condition.x)) +
geom_point(size = 5) 
```


## Create a median split (this is used later in plotting, but not in analyses)

```{r}
ID_pre$aptitude2 = 1
ID_pre$aptitude2[ID_pre$accuracy < median(ID_pre$accuracy)] = 0

ID_pre$aptitude3 = 1
ID_pre$aptitude3[ID_pre$slope < median(ID_pre$slope)] = 0

ID_pre$aptitude2 = factor(ID_pre$aptitude2, levels = c("1", "0"), labels = c ("HA", "LA"))
ID_pre$aptitude3 = factor(ID_pre$aptitude3, levels = c("0", "1"), labels = c ("HA", "LA"))
```

# Production Reliability Analysis (Section 3.5.1.1)

## Checking the accuracy of ratings of production data by native speakers

```{r}
#Remove participant 48 
PRO = PRO[!(PRO$subject=="48"),]

PRO$session = factor(PRO$session, levels = c("pretest", "posttest", "nativespeaker", "picturenaming"), labels = c("Pre-test", "Post-test","Native-Speaker", "Picture-Naming"))
PC = subset(PRO, session %in% "Native-Speaker")

means = mean(PC$tone_score)
means2 = mean(PC$pinyin_score)
means3 = mean(PC$rating)
means4 = mean(PC$tone_score_rater1)
means5 = mean(PC$pinyin_score_rater1)
means6 = mean(PC$rating_rater1)
Rater2 = c(means, means2, means3)
Rater1 = c(means4, means5, means6)
Task = factor (c("Tone accuracy","Pinyin accuracy","Tone rating"))
Table = data.frame ("Task" = Task, "Rater 1" = Rater1, "Rater 2" = Rater2)
Table
rm(means, means2, means3, means4, means5, means6, Rater2, Rater1, Task)
```

## Checking the correlation between Rater1 and Rater2

These correlations are reported for ratings of the experimental data

```{r}
CWR = subset(PRO, session %in% c("Pre-test", "Post-test"))
CPN = subset(PRO, session %in% "Picture-Naming")
cor1 = Kappa.test(CWR$tone_score, CWR$tone_score_rater1, conf.level = 0.95) 
cor2 = Kappa.test(CWR$pinyin_score, CWR$pinyin_score_rater1, conf.level = 0.95) 
cor3 = icc(cbind(CWR$rating, CWR$rating_rater1), "twoway", "consistency")
cor4 = Kappa.test(CPN$tone_score, CPN$tone_score_rater1, conf.level = 0.95)
cor5 = Kappa.test(CPN$pinyin_score, CPN$pinyin_score_rater1, conf.level = 0.95)
cor6 = icc(cbind(CPN$rating, CPN$rating_rater1), "twoway", "consistency")
```

### Rater accuracy of tone identification in Word Repetition

```{r}
cor1
```
### Rater accuracy of pinyin identification in Word Repetition

```{r}
cor2
```

### Rater accuracy of tone rating in Word Repetition

```{r}
cor3
```

### Rater accuracy of tone identification in Picture Naming

```{r}
cor4
```

### Rater accuracy of pinyin identification in Picture Naming

```{r}
cor5
```

### Rater accuracy of tone rating in Picture Naming

```{r}
cor6
```

# Word Repetition (Section 3.5.1)

## Word Repetition - Tone accuracy (Section 3.5.1.2)

### Label Parameters and remove unidentifiable trials

```{r}
NWR = CWR[CWR$tone != 0, ]
NWR$condition = factor(NWR$condition, levels = c("0", "1","2"), labels = c("LV", "HV", "HVB"))
NWR$wordtype = factor(NWR$wordtype, levels = c("trained", "untrained"), labels = c("Trained Item", "Untrained Item"))

NWR2 = merge(NWR, ID_pre)
```

### Figures 

```{r}
means = with(NWR2, aggregate(tone_score ~ subject + session + condition + wordtype, FUN = mean))
meansb = with(NWR2, aggregate(tone_score ~ subject + session + condition + wordtype + aptitude2, FUN = mean))
means2b = with(NWR2, aggregate(tone_score ~ subject + session + condition + wordtype + aptitude3, FUN = mean))

xb <- summarySEwithin(means, measurevar = "tone_score", betweenvars = c("condition"), withinvars = c("wordtype", "session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdatab <- summarySEwithin(meansb, measurevar = "tone_score", betweenvars = c("condition", "aptitude2"), withinvars = c("session", "wordtype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdata2b <- summarySEwithin(means2b, measurevar = "tone_score", betweenvars = c("condition", "aptitude3"), withinvars = c("session", "wordtype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

#### Main Graph (Figure 7)

```{r}
WRp1 = ggplot (means, aes(x = condition, y = tone_score, fill = session)) 
WRp1 = WRp1 + facet_wrap(~wordtype, ncol = 2)
WRp1 = WRp1 + geom_violin(colour = "black", scale = "count")
WRp1 = WRp1 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp1 = WRp1 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp1 = WRp1 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = xb)
WRp1 = WRp1 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
WRp1 = WRp1 +theme_bw()+ ggtitle('Word Repetition: Tone accuracy')
WRp1 = WRp1 + labs(x = "Variability Condition", y = "Mean Accuracy")

print(WRp1)
```

#### Graph (Figure 12, Left panel)

```{r}
WRp2 = ggplot (meansb, aes(x = condition, y = tone_score, fill = session)) 
WRp2 = WRp2 + facet_wrap(~wordtype + aptitude2, ncol = 2)
WRp2 = WRp2 + geom_violin(colour = "black", scale = "count")
WRp2 = WRp2 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp2 = WRp2 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp2 = WRp2 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = graphdatab)
WRp2 = WRp2 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
WRp2 = WRp2 + theme_bw()+ ggtitle('Word Repetition: Tone accuracy')
WRp2 = WRp2 + labs(x = "Variability Condition", y = "Mean Accuracy")
WRp2.m = WRp2 + theme(legend.position = "NULL")

print(WRp2)
```

#### CSTC Graph (Not used)

```{r}
WRp3 = ggplot (means2b, aes(x = condition, y = tone_score, fill = session)) 
WRp3 = WRp3 + facet_wrap(~wordtype + aptitude3, ncol = 2)
WRp3 = WRp3 + geom_violin(colour = "black", scale = "count")
WRp3 = WRp3 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp3 = WRp3 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp3 = WRp3 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = graphdata2b)
WRp3 = WRp3 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
WRp3 = WRp3 + theme_bw()+ ggtitle('Word Repetition: Tone accuracy')
WRp3 = WRp3 + labs(x = "Variability Condition", y = "Mean Accuracy")

print(WRp3)
```

### Statistical Analysis

#### Main Model (without aptitude predictors; Section 3.5.1.2)

```{r}
NWR2 = lizCenter(NWR2, list("wordtype", "condition"))
NWR2 = lizContrasts(NWR2, NWR2$condition, "LV")

nw_glmer = glmer(tone_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session
              + (wordtype.ct * session||subject),
              family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NWR2)
```

##### Print out effects in pre-test

```{r}
kable(get_coeffs(nw_glmer, c("wordtype.ct", "LV_VERSUS_HV", "LV_VERSUS_HVB" )), digits = 3)
```

##### Print out key effects and relevant descriptive statistics of test-session and interactions with test-session (i.e. looking effects of of training)

```{r}
kable(get_coeffs(nw_glmer, c("sessionPost-test",
                            "wordtype.ct:sessionPost-test",
                            "LV_VERSUS_HV:sessionPost-test",
                            "LV_VERSUS_HVB:sessionPost-test",
                            "wordtype.ct:LV_VERSUS_HV:sessionPost-test",
                            "wordtype.ct:LV_VERSUS_HVB:sessionPost-test")), digits = 3)
```

```{r}
dataWRtone<- summarySEwithin(means, measurevar = "tone_score", withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
kable (dataWRtone, digits = 3)
```

#### Analysis with PCPT accuracy (Section 3.7)

```{r}
NWR2 = lizCenter(NWR2, list("session", "aptitude_pcpt", "slope", "wordtype"))

nw_glmer_pcpt = glmer(tone_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session.ct 
              + aptitude_pcpt.ct
              + aptitude_pcpt.ct : session.ct
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct 
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct : wordtype.ct
              + (wordtype.ct * session.ct||subject)
              , family="binomial" , 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NWR2)

```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model 

```{r}
kable(get_coeffs(nw_glmer_pcpt, c("session.ct",
                            "wordtype.ct:session.ct",
                            "LV_VERSUS_HV:session.ct",
                            "LV_VERSUS_HVB:session.ct",
                            "wordtype.ct:LV_VERSUS_HV:session.ct",
                            "wordtype.ct:LV_VERSUS_HVB:session.ct" )), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(nw_glmer_pcpt, c("aptitude_pcpt.ct",
                             "session.ct:aptitude_pcpt.ct",
                             "LV_VERSUS_HV:session.ct:aptitude_pcpt.ct",
                             "LV_VERSUS_HVB:session.ct:aptitude_pcpt.ct",
                             "wordtype.ct:LV_VERSUS_HV:session.ct:aptitude_pcpt.ct",
                             "wordtype.ct:LV_VERSUS_HVB:session.ct:aptitude_pcpt.ct")), digits = 3)
```

#### Analysis with CSTC slope (Section 3.7)

```{r}
#Model did not converge so remove slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct : wordtype.ct
nw_glmer_cstc = glmer(tone_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session.ct 
              + slope.ct
              + slope.ct : session.ct
              + slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct 
             + (wordtype.ct * session.ct|subject)
              , family = "binomial" , 
              control = glmerControl(optimizer = "bobyqa", optCtrl=list(maxfun=1e6)), 
              data = NWR2)
```

##### Check that get the same pattern of results for experimentally manipulated variables as in previous model 

```{r}
kable(get_coeffs(nw_glmer_cstc, c("session.ct", "wordtype.ct:session.ct", "wordtype.ct:LV_VERSUS_HV:session.ct", "wordtype.ct:LV_VERSUS_HVB:session.ct")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(nw_glmer_cstc, c("slope.ct", "session.ct:slope.ct", "LV_VERSUS_HV:session.ct:slope.ct","LV_VERSUS_HVB:session.ct:slope.ct")), digits = 3)
```

## Word Repetition - Pinyin Accuracy (Section 3.5.1.3)

### Figures 

```{r}
means = with(NWR2, aggregate(pinyin_score ~ subject + session + condition + wordtype, FUN = mean))
meansb = with(NWR2, aggregate(pinyin_score ~ subject + session + condition + wordtype + aptitude2, FUN = mean))
means2b = with(NWR2, aggregate(pinyin_score ~ subject + session + condition + wordtype + aptitude3, FUN = mean))

xb <- summarySEwithin(means, measurevar = "pinyin_score", betweenvars = c("condition"), withinvars = c("wordtype", "session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdatab <- summarySEwithin(meansb, measurevar = "pinyin_score", betweenvars = c("condition", "aptitude2"), withinvars = c("session", "wordtype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdata2b <- summarySEwithin(means2b, measurevar = "pinyin_score", betweenvars = c("condition", "aptitude3"), withinvars = c("session", "wordtype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

#### Main Graph (Figure 8)

```{r}
WRp9 = ggplot (means, aes(x = condition, y = pinyin_score, fill = session)) 
WRp9 = WRp9 + facet_wrap(~wordtype, ncol = 2)
WRp9 = WRp9 + geom_violin(colour = "black", scale = "count")
WRp9 = WRp9 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp9 = WRp9 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp9 = WRp9 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = xb)
WRp9 = WRp9 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
WRp9 = WRp9 + theme_bw() + ggtitle('Word Repetition: Pinyin accuracy')
WRp9 = WRp9 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(WRp9)
```

#### PCPT Graph (Figure 12, Right panel)

```{r}
WRp10 = ggplot (meansb, aes(x = condition, y = pinyin_score, fill = session)) 
WRp10 = WRp10 + facet_wrap(~wordtype + aptitude2, ncol = 2)
WRp10 = WRp10 + geom_violin(colour = "black", scale = "count")
WRp10 = WRp10 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp10 = WRp10 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp10 = WRp10 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = graphdatab)
WRp10 = WRp10 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
WRp10 = WRp10 + theme_bw()+ ggtitle('Word Repetition: Pinyin accuracy')
WRp10 = WRp10 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(WRp10)
```

#### CSTC Graph (Not used)

```{r}
WRp11 = ggplot (means2b, aes(x = condition, y = pinyin_score, fill = session)) 
WRp11 = WRp11 + facet_wrap(~wordtype + aptitude3, ncol = 2)
WRp11 = WRp11 + geom_violin(colour = "black", scale = "count")
WRp11 = WRp11 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
WRp11 = WRp11 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
WRp11 = WRp11 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = graphdata2b)
WRp11 = WRp11 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
WRp11 = WRp11 + theme_bw()+ ggtitle('Word Repetition: Pinyin accuracy')
WRp11 = WRp11 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(WRp11)
```

### Statistical Analysis

#### Main Model (without aptitude predictors; Section 3.5.1.3)

```{r}
NWR2 = lizCenter(NWR2, list("wordtype"))
NWR2 = lizContrasts(NWR2, NWR2$condition, "LV")

nw_glmer = glmer(pinyin_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session
              + (wordtype.ct * session||subject)
              , family="binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NWR2)
```

##### Print out effects at pre-test

```{r}
## possibly relevant at pre test 
kable(get_coeffs(nw_glmer, c("wordtype.ct", "LV_VERSUS_HV", "LV_VERSUS_HVB")), digits = 3)
```

##### Print out key effects of relevant descriptive statistics  test-session and interactions with test-session (i.e. looking effects of  training)"

```{r}
kable(get_coeffs(nw_glmer, c("sessionPost-test", "wordtype.ct:sessionPost-test", "LV_VERSUS_HV:sessionPost-test", "LV_VERSUS_HVB:sessionPost-test", "wordtype.ct:LV_VERSUS_HV:sessionPost-test", "wordtype.ct:LV_VERSUS_HVB:sessionPost-test")), digits = 3)
```

```{r}
dataWRpinyin <- summarySEwithin(means, measurevar = "pinyin_score", withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
kable (dataWRpinyin, digits = 3)
```

#### Analysis with PCPT accuracy (Section 3.7)

```{r}
NWR2 = lizCenter(NWR2, list("session", "accuracy", "slope"))

nw_glmer_pcpt = glmer(pinyin_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session.ct 
              + aptitude_pcpt.ct
              + aptitude_pcpt.ct : session.ct
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct 
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct : wordtype.ct
              + (wordtype.ct * session.ct||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NWR2)
```

##### Check that get the same pattern of results for experimentally manipulated variables as in previous model

```{r}
kable(get_coeffs(nw_glmer_pcpt, c("session.ct",
                            "wordtype.ct:session.ct",
                            "LV_VERSUS_HV:session.ct",
                            "LV_VERSUS_HVB:session.ct",
                            "wordtype.ct:LV_VERSUS_HV:session.ct",
                            "wordtype.ct:LV_VERSUS_HVB:session.ct" )), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(nw_glmer_pcpt, c("aptitude_pcpt.ct", "session.ct:aptitude_pcpt.ct", "LV_VERSUS_HV:session.ct:aptitude_pcpt.ct", "LV_VERSUS_HVB:session.ct:aptitude_pcpt.ct", "wordtype.ct:LV_VERSUS_HV:session.ct:aptitude_pcpt.ct", "wordtype.ct:LV_VERSUS_HVB:session.ct:aptitude_pcpt.ct")), digits = 3)
```

#### Analysis with CSTC slope 

```{r}
nw_glmer_cstc = glmer(pinyin_score ~ wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * session.ct 
              + slope.ct
              + slope.ct: aptitude_pcpt.ct
              + slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct 
              + slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct : wordtype.ct
              + (wordtype.ct * session.ct|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NWR2)
```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(nw_glmer_cstc, c("session.ct", "wordtype.ct:session.ct", "wordtype.ct:LV_VERSUS_HV:session.ct", "wordtype.ct:LV_VERSUS_HVB:session.ct")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(nw_glmer_cstc, c("slope.ct", "slope.ct:aptitude_pcpt.ct", "LV_VERSUS_HV:session.ct:slope.ct", "LV_VERSUS_HVB:session.ct:slope.ct", "wordtype.ct:LV_VERSUS_HV:session.ct:slope.ct", "wordtype.ct:LV_VERSUS_HVB:session.ct:slope.ct")), digits = 3)
```

# Three-interval Oddity Task (Section 3.4.1)

## Label Parameters

```{r}
dis$condition = factor(dis$condition, levels = c("0", "1", "2"), labels = c("LV", "HV", "HVB"))
dis$session = factor(dis$session, levels = c("1", "2"), labels = c("Pre-test", "Post-test"))
dis$trialtype = dis$voicetype = factor(dis$voicetype, levels = c("fff", "ffm", "fmf"), labels = c("Neutral", "Easy", "Hard"))
dis$wordtype = factor(dis$wordtype, levels = c("oldword", "newword"), labels = c("Trained Item", "Untrained Item"))
dis2 = merge(ID_pre, dis)
```

## Figures

```{r}
means = with(dis2, aggregate(score ~ subject + condition + voicetype + wordtype + session, FUN = mean))
meansb = with(dis2, aggregate(score ~ subject + condition + voicetype + wordtype + session + aptitude2+accuracy, FUN = mean))
means2b = with(dis2, aggregate(score ~ subject + condition + voicetype + wordtype + session + aptitude3+slope, FUN = mean))

graphdata <- summarySEwithin(meansb, measurevar = "score", betweenvars = c("condition", "aptitude2"), withinvars = c("wordtype", "session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)

graphdata2 <- summarySEwithin(means2b, measurevar = "score", betweenvars = c("condition", "aptitude3"), withinvars = c("wordtype","session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)

x2 <- summarySEwithin(means, measurevar = "score", betweenvars = c("condition"), withinvars = c("wordtype", "session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

### Main Graph (Figure 5)

```{r}
Dp1 = ggplot (means, aes(x = condition, y = score, fill = session)) 
Dp1 = Dp1 + facet_wrap(~ wordtype, ncol = 2)
Dp1 = Dp1 + geom_violin(colour = "black", scale = "count")
Dp1 = Dp1 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
Dp1 = Dp1 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
Dp1 = Dp1 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = x2)
Dp1 = Dp1 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
Dp1 = Dp1 + theme_bw() + ggtitle('Three-interval Oddity')
Dp1 = Dp1 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(Dp1)
```

### PCPT graph (Figure 10, top panel)

```{r}
Dp2 = ggplot (meansb, aes(x = condition, y = score, fill = session))
Dp2 = Dp2 + facet_wrap(~ wordtype + aptitude2, ncol = 2)
Dp2 = Dp2 + geom_violin(colour = "black", scale = "count")
Dp2 = Dp2 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
Dp2 = Dp2 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
Dp2 = Dp2 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = graphdata)
Dp2 = Dp2 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
Dp2 = Dp2 + theme_bw()+ ggtitle('Three-interval Oddity')
Dp2 = Dp2 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(Dp2)
```

### CSTC graph (Not included)

```{r}
Dp3 = ggplot (means2b, aes(x = condition, y = score, fill = session)) 
Dp3 = Dp3 + facet_wrap(~ wordtype + aptitude3, ncol = 2)
Dp3 = Dp3 + geom_violin(colour = "black", scale = "count")
Dp3 = Dp3 + scale_fill_manual(name = "Test Session", values = c("Pre-test" = "grey", "Post-test" = "white"), labels = c("Pre-test", "Post-test"))
Dp3 = Dp3 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
Dp3 = Dp3 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = graphdata2)
Dp3 = Dp3 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
Dp3 = Dp3 + theme_bw() + ggtitle('Three-interval Oddity')
Dp3 = Dp3 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(Dp3)
```

### PCPT Graph (Dot plot, not currently used)

```{r}
means.pre <- subset(meansb, session == "Pre-test")
means.pre = rename(means.pre, c(score = "prescore"))
means.pre$session = NULL
means.post <- subset(meansb, session == "Post-test")
means.post = rename(means.post, c(score ="postscore"))
means.post$session = NULL

means.x = merge(means.pre, means.post)

means.x$diff = means.x$postscore-means.x$prescore

Dp4 = ggplot(means.x, aes(x = accuracy, y = diff))
Dp4=Dp4 + geom_point(aes(shape = factor(condition)))
Dp4 = Dp4 + ggtitle('Three-interval Oddity')

print(Dp4)

rm(means.pre)
rm(means.post)
rm(means.x)
```

### CSTC Graph (Dot plot, not currently used)

```{r}
means2.pre <- subset(means2b, session == "Pre-test")
means2.pre = rename(means2.pre, c(score = "prescore"))
means2.pre$session = NULL
means2.post <- subset(means2b, session == "Post-test")
means2.post = rename(means2.post, c(score = "postscore"))
means2.post$session = NULL

means2.x = merge(means2.pre, means2.post)

means2.x$diff = means2.x$postscore-means2.x$prescore

Dp5 = ggplot(means2.x, aes(x = slope, y = diff))
Dp5=Dp5 + geom_point(aes(shape = factor(condition)))
Dp5 = Dp5 + ggtitle('Three-interval Oddity')
Dp5

rm(means2.pre)
rm(means2.post)
rm(means2.x)
```

## Statistical Analysis 

### Main model (without aptitude predictors; Section 3.4.1)

```{r}
dis2 = lizCenter(dis2, list("session", "wordtype", "aptitude_pcpt"))
dis2 = lizContrasts(dis2, dis2$condition, "LV")
dis2 = lizContrasts(dis2, dis2$voicetype, "Neutral")

d_glmer = glmer(score ~ session * wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) 
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard): session
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard) 
              + (session * wordtype.ct||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = dis2)
```

#### Print out key effects at pre-test session

```{r}
kable(get_coeffs(d_glmer, c("(Intercept)", "wordtype.ct", "LV_VERSUS_HV", "LV_VERSUS_HVB", "Neutral_VERSUS_Easy", "Neutral_VERSUS_Hard")), digits = 3)
```



#### Print out key effects relevant descriptive statistics of test-session and interactions with test-session (i.e. looking effects of  training)

```{r}
kable(get_coeffs(d_glmer, c("sessionPost-test", "sessionPost-test:wordtype.ct", "sessionPost-test:LV_VERSUS_HV",  "sessionPost-test:LV_VERSUS_HVB", "sessionPost-test:wordtype.ct:LV_VERSUS_HV", "sessionPost-test:wordtype.ct:LV_VERSUS_HVB")), digits = 3)
```

```{r}
dataD <- summarySEwithin(means, measurevar = "score", withinvars =  c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
kable (dataD, digits = 3)
```

#### Examine if learning is different for the easier/harder trial types and print out relevant descriptive statistics 

```{r}
round(get_coeffs(d_glmer, c("sessionPost-test:Neutral_VERSUS_Easy","sessionPost-test:Neutral_VERSUS_Hard")), 3)
```
```{r}
dataD2 <- summarySEwithin(means, measurevar = "score", withinvars =  c("session", "voicetype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
kable (dataD2, digits = 3)
```
### Analyses with PCPT accuracy (Section 3.7)

```{r}
dis2 = lizCenter(dis2, list("session.ct"))

d_glmer_pcpt = glmer(score ~ session.ct * wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) 
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard): session.ct 
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard) 
              + aptitude_pcpt.ct
              + aptitude_pcpt.ct: session.ct
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct
              + aptitude_pcpt.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct : wordtype.ct
              + (session.ct * wordtype.ct||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = dis2)
``` 

#### Print out key effects of test-session and interactions with test-session (i.e. looking effects of  training)

```{r}
kable(get_coeffs(d_glmer_pcpt, c("(Intercept)", "session.ct", "session.ct:wordtype.ct", "session.ct:LV_VERSUS_HV", "session.ct:LV_VERSUS_HVB", "session.ct:wordtype.ct:LV_VERSUS_HV",  "session.ct:wordtype.ct:LV_VERSUS_HVB")), digits = 3)
``` 

#### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
get_coeffs(d_glmer_pcpt, c("aptitude_pcpt.ct", "session.ct:aptitude_pcpt.ct", "session.ct:LV_VERSUS_HV:aptitude_pcpt.ct", "session.ct:LV_VERSUS_HVB:aptitude_pcpt.ct", "session.ct:wordtype.ct:LV_VERSUS_HV:aptitude_pcpt.ct", "session.ct:wordtype.ct:LV_VERSUS_HVB:aptitude_pcpt.ct"))
``` 

### Analyses-with CSTC slope

```{r}
dis2 = lizCenter(dis2, c("slope"))

d_glmer_cstc = glmer(score ~ session.ct * wordtype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) 
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard): session.ct 
              + (Neutral_VERSUS_Easy + Neutral_VERSUS_Hard) 
              + slope.ct
              + slope.ct: session.ct
              + slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct
              + slope.ct : (LV_VERSUS_HV + LV_VERSUS_HVB) : session.ct: wordtype.ct
              + (session.ct * wordtype.ct||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = dis2)
```

#### Print out key effects of test-session and interactions with test-session (i.e. looking effects of  training)

```{r}
kable(get_coeffs(d_glmer_cstc, c("(Intercept)", "session.ct", "session.ct:wordtype.ct", "session.ct:LV_VERSUS_HV", "session.ct:LV_VERSUS_HVB", "session.ct:wordtype.ct:LV_VERSUS_HV", "session.ct:wordtype.ct:LV_VERSUS_HVB")), digits = 3)
``` 

#### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
get_coeffs(d_glmer_cstc,  c("slope.ct", "session.ct:slope.ct", "session.ct:LV_VERSUS_HV:slope.ct", "session.ct:LV_VERSUS_HVB:slope.ct",   "session.ct:wordtype.ct:LV_VERSUS_HV:slope.ct", "session.ct:wordtype.ct:LV_VERSUS_HVB:slope.ct"))
``` 

# Training (Section 3.3)

## Parameters with labels

```{r}
## Note: the ID_train data frame already has aptitude2 and aptitude3 in 

train$condition = factor(train$condition, levels = c("0","2","1"), labels = c("LV", "HVB", "HV"))

train2 = merge(ID_pre, train)

means = with(train2, aggregate(score ~ subject + session + condition + aptitude2, FUN = mean))
means2 = with(train2, aggregate(score ~ subject + session + condition + aptitude3, FUN = mean))
```

## Figures

```{r}
x <- summarySEwithin(means, measurevar = "score", betweenvars = c("condition"), withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdata <- summarySEwithin(means, measurevar = "score", betweenvars = c("condition", "aptitude2"), withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval =.95)
graphdata2 <- summarySEwithin(means2, measurevar = "score", betweenvars = c("condition", "aptitude3"), withinvars = c("session"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

### Main Graph (Figure 4)

```{r}
Tp1 = ggplot(x, aes(x = session, y = score, group = condition, colour = condition)) + geom_line(aes(linetype = condition), size = 0.8) + geom_point(aes(shape = condition), size = 2.5)
Tp1 = Tp1 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .2)
Tp1 = Tp1 + scale_linetype_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
Tp1 = Tp1 + coord_cartesian(ylim = c(0.5, 1.0))
Tp1 = Tp1 + theme_bw()
Tp1 = Tp1 + ggtitle('Training')
Tp1 = Tp1 + theme(panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(colour = "white"))
Tp1 = Tp1 + labs(x = "Training Session", y = "Mean Accuracy")
print(Tp1)
```

### PCPT Graph (Figure 10, Bottom panel)

```{r}
Tp2 = ggplot(graphdata, aes(x = session, y = score, group = condition, colour = condition)) + geom_line(aes(linetype = condition),size = 0.8) + geom_point(aes(shape = condition), size = 2.5)
Tp2 = Tp2 + facet_wrap(~aptitude2, ncol = 2)
Tp2 = Tp2 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .2)
Tp2 = Tp2 + theme_bw()
Tp2 = Tp2 + scale_linetype_discrete(name = "Variablity Condtion", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = "Variablity Condtion", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))+ scale_colour_grey(name = "Variablity Condtion", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
Tp2 = Tp2 + coord_cartesian(ylim = c(0.5, 1.0))
Tp2 = Tp2 + ggtitle('Training')
Tp2 = Tp2 + theme(panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(colour = "white"))
Tp2 = Tp2 + labs(x = "Training Session", y = "Mean Accuracy")
print(Tp2)
```

### CSTC graph (Not used)

```{r}
Tp3 = ggplot(graphdata2, aes(x = session, y = score, group = condition, colour = condition)) + geom_line(aes(linetype = condition),size = 0.8) + geom_point(aes(shape = condition), size = 2.5)
Tp3 = Tp3 + facet_wrap(~ aptitude3, ncol = 2)
Tp3 = Tp3 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .2)
Tp3 = Tp3 + theme_bw()
Tp3 = Tp3 + scale_linetype_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
Tp3 = Tp3 + coord_cartesian(ylim = c(0.5, 1.0))
Tp3 = Tp3 + ggtitle('Training')
Tp3 = Tp3 + theme(panel.background = element_rect(fill = 'white'), panel.grid.major = element_line(colour = "white"))
Tp3 = Tp3 + labs(x = "Training Session", y = "Mean Accuracy")
print(Tp3)
```

## Statistical Analysis

### Main Model (without aptitude predictors; Section 3.3)

```{r}
## Note that a full model with all three conditions in didn't converge so model is split into two (converging) models
train2 = lizContrasts(train2, train2$condition, "LV")
train2$aptitude2 = relevel(train2$aptitude2, ref="LA")
train2 = lizCenter(train2, list("session","aptitude_pcpt", "aptitude2"))
train2.noHVB = lizCenter(subset(train2, condition != "HVB"), list("session", "condition", "aptitude_pcpt", "slope", "aptitude2"))
train2.noHV = lizCenter(subset(train2, condition != "HV"), list("session", "condition", "aptitude_pcpt", "slope","aptitude2"))

t_glmer_LVvHV = glmer(score ~ session.ct * condition.ct
              + (session||subject)
              , family="binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2.noHVB)

t_glmer_LVvHVB = glmer(score ~ session.ct * condition.ct
              + (session||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2.noHV)
```

#### Print key cofficients

```{r}
kable(summary(t_glmer_LVvHV)$coefficients, digits = 6)
kable(summary(t_glmer_LVvHVB)$coefficients, digits = 3)
```

### Analysis with PCPT accuracy (Section 3.7)
```{r}
# Note: models separate HV & HVB won't converge with both aptitude and session as predictors. The original model with all conditions but without session is used. 


t_glmer_pcpt = glmer(score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * aptitude_pcpt.ct 
                + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2)

```



#### Print key cofficients

```{r}
kable (round(summary(t_glmer_pcpt)$coefficients, 6), digits = 3)


```

### Extra analysis requested by reviewers, with categorical aptitude measure from PCPT accuracy

```{r}
# Note: models separate HV & HVB won't converge with both aptitude and session as predictors. The original model with all conditions but without session is used. 


t_glmer_pcpt2 = glmer(score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * aptitude2.ct 
                + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2)

```

#### Print key cofficients

```{r}
kable (round(summary(t_glmer_pcpt2)$coefficients, 6), digits = 3)
```

### Analysis with CSTC slope

```{r}
# Note: full model didn't converge, so removed slope.ct:condition.ct:session.ct

t_glmer_LVvHV_cstc = glmer(score ~ session.ct * condition.ct + slope.ct + (slope.ct : condition.ct)
              + (session||subject)
              , family = "binomial" , 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2.noHVB)

kable (round(summary(t_glmer_LVvHV_cstc)$coefficients, 6), digits = 3)

t_glmer_LVvHVB_cstc = glmer(score ~ session.ct * condition.ct + slope.ct + (slope.ct : condition.ct)
              + (session||subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = train2.noHV)

kable (round(summary(t_glmer_LVvHVB_cstc)$coefficients, 6), digits = 3)
```

# Picture Identification (Section 3.4.2)

## Label Parameters

```{r}
PI$condition = factor(PI$condition, levels = c("0", "1", "2"), labels = c("LV", "HV", "HVB"))
PI$voicetype = factor(PI$voicetype, levels = c("nv1", "tv1"), labels = c("Untrained Voice", "Trained voice"))

PI2 = merge(ID_pre, PI)
```

## Figures

```{r}
means = with(PI2, aggregate(score ~ subject + condition + voicetype, FUN = mean))
IDmeans = with (PI2, aggregate(aptitude_pcpt ~ subject + condition + voicetype, FUN = mean))
means = merge (means, IDmeans)
meansb = with(PI2, aggregate(score ~ subject + condition + voicetype + aptitude2, FUN = mean))
means2b = with(PI2, aggregate(score ~ subject + condition + voicetype + aptitude3, FUN = mean))

x <- summarySEwithin(means, measurevar = "score", betweenvars = c("condition"), withinvars = c("voicetype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdata <- summarySEwithin(meansb, measurevar = "score", betweenvars = c("condition", "aptitude2"), withinvars = c("voicetype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
graphdata2 <- summarySEwithin(means2b, measurevar = "score", betweenvars = c("condition", "aptitude3"), withinvars = c("voicetype"), idvar = "subject", na.rm = FALSE, conf.interval = .95)
```

### Main Graph (Figure 6)

```{r}
PIp1 = ggplot (means, aes(x = condition, y = score, fill = voicetype)) 
PIp1 = PIp1 + geom_violin(colour = "black", scale = "count")
PIp1 = PIp1 + scale_fill_manual(name = "Voice type", values = c("Untrained Voice" = "grey", "Trained voice" = "white"), labels = c("Untrained Voice", "Trained voice"))
PIp1 = PIp1 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PIp1 = PIp1 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = x)
PIp1 = PIp1 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PIp1 = PIp1 + ggtitle('Picture Identification')
PIp1 = PIp1 + labs(x = "Variability Condition", y = "Mean Accuracy")
PIp1 = PIp1 + theme_bw()
print(PIp1)
```

### Main Graph 2 (Scatter Plot, Figure 11, Bottom panel, right)

```{r}
PIp0.1m = ggplot(means, aes(aptitude_pcpt, score, shape = condition, colour = condition)) 
PIp0.1m = PIp0.1m + geom_point(aes (shape = condition), size = 2) + geom_smooth(se = FALSE, method = lm, aes(linetype = condition))
PIp0.1m = PIp0.1m + scale_linetype_discrete(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = " ", breaks = c("LV", "HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
PIp0.1m = PIp0.1m + labs(x = "PCPT Score", y = "Mean Accuracy") +  ggtitle(' ')
print(PIp0.1m)
```

### PCPT Graph (Figure 11 Bottom panel, left)

```{r}
PIp2 = ggplot (meansb, aes(x = condition, y = score, fill = voicetype)) 
PIp2 = PIp2 + facet_wrap(~aptitude2, ncol = 2)
PIp2 = PIp2 + geom_violin(colour = "black", scale = "count")
PIp2 = PIp2 + scale_fill_manual(name = "Voice type", values = c("Untrained Voice" = "grey", "Trained voice" = "white"), labels = c("Untrained Voice", "Trained voice"))
PIp2 = PIp2 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PIp2 = PIp2 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = graphdata)
PIp2 = PIp2 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PIp2 = PIp2 + ggtitle('Picture Identification')
PIp2 = PIp2 + labs(x = "Variability Condition", y = "Mean Accuracy")
PIp2 = PIp2 + theme_bw()
PIp2.m = PIp2 + theme(legend.position = c(0.8, 0.1), legend.key.size = unit(0.3,"cm"), legend.title = element_text(size=10), legend.text = element_text(size = 8))
print(PIp2)
```

### CSTC graph (Not used)

```{r}
PIp3 = ggplot (means2b, aes(x = condition, y = score, fill = voicetype)) 
PIp3 = PIp3 + facet_wrap(~aptitude3, ncol = 2)
PIp3 = PIp3 + geom_violin(colour = "black", scale = "count")
PIp3 = PIp3 + scale_fill_manual(name = "Voice type", values = c("Untrained Voice" = "grey", "Trained voice" = "white"), labels = c("Untrained Voice", "Trained voice"))
PIp3 = PIp3 + scale_x_discrete(labels = c("Low Variability", "High Variability", "High Varaibility Blocking"))
PIp3 = PIp3 + geom_errorbar(aes(ymin = score-ci, ymax = score+ci), width = .4, position = position_dodge(.9), data = graphdata2)
PIp3 = PIp3 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PIp3 = PIp3 + ggtitle('Picture Identification')
PIp3 = PIp3 + labs(x = "Variability Condition", y = "Mean Accuracy")
PIp3 = PIp3 + theme_bw()
print(PIp3)
```

## Statistical Analysis

### Main Model (without individual difference measures; Section 3.4.2)

```{r}
PI2 = lizCenter(PI2, list("voicetype", "aptitude_pcpt", "slope"))
PI2 = lizContrasts(PI2, PI2$condition, "LV")

p_glmer = glmer(score ~ voicetype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB)
              + (voicetype.ct|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = PI2)
```

#### Print out key effects

```{r}
kable(summary(p_glmer)$coefficients, digits = 3)
```

#### Re-fit the model with a separate slope for voicetyp.ct for each condition"

```{r}
PI2 = lizCenter(PI2, list("voicetype", "aptitude_pcpt", "slope"))
PI2 = lizContrasts(PI2, PI2$condition, "LV")

p_glmerb = glmer(score ~ 
                   voicetype.ct :  condition
                 +(LV_VERSUS_HV + LV_VERSUS_HVB)
                 + (voicetype.ct|subject)
                 , family = "binomial",
                 control = glmerControl(optimizer = "bobyqa"), 
                 data = PI2)
```

#### To check it is the same model

```{r}
anova(p_glmer, p_glmerb)
```

#### Print out effect of voice-type for each condition

```{r}
round(get_coeffs(p_glmerb, c("voicetype.ct:conditionLV", "voicetype.ct:conditionHV", "voicetype.ct:conditionHVB")), 3)
```

#### Re-fit the model with a separate slope for the contrast between conditions for trained and untrained nouns

```{r}
PI2 = lizCenter(PI2, list("voicetype", "aptitude_pcpt", "slope"))
PI2 = lizContrasts(PI2, PI2$condition, "LV")

p_glmerc = glmer(score ~ 
                voicetype : LV_VERSUS_HV 
               + voicetype : LV_VERSUS_HVB
               + voicetype.ct 
               + (voicetype.ct|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = PI2)
```

#### To check it is the same model

```{r}
anova(p_glmer, p_glmerc)
```

#### Print out the contrast between conditions for trained and untrained nouns

```{r}
kable(summary(p_glmerc)$coeff, digits = 3)
```

### Analysis with PCPT accuracy (Section 3.7)

```{r}
p_glmer_pcpt = glmer(score ~ voicetype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * aptitude_pcpt.ct
                + (voicetype.ct|subject)
                , family = "binomial", 
                control = glmerControl(optimizer = "bobyqa"), 
                data = PI2)
```

#### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(p_glmer_pcpt, c("voicetype.ct", "LV_VERSUS_HV", "LV_VERSUS_HVB", "voicetype.ct:LV_VERSUS_HV", "voicetype.ct:LV_VERSUS_HVB")), digits = 3)
```

#### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(p_glmer_pcpt, c("aptitude_pcpt.ct", "voicetype.ct:aptitude_pcpt.ct", "LV_VERSUS_HV:aptitude_pcpt.ct", "LV_VERSUS_HVB:aptitude_pcpt.ct", "voicetype.ct:LV_VERSUS_HV:aptitude_pcpt.ct", "voicetype.ct:LV_VERSUS_HVB:aptitude_pcpt.ct")), digits = 3)
```

### Analysis with CSTC slope

```{r}
p_glmer_cstc = glmer(score ~ voicetype.ct * (LV_VERSUS_HV + LV_VERSUS_HVB) * slope.ct
                + (voicetype.ct|subject)
                , family = "binomial", 
                control = glmerControl(optimizer = "bobyqa"), 
                data = PI2)
```

#### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(p_glmer_cstc, c("voicetype.ct", "LV_VERSUS_HV",  "LV_VERSUS_HVB", "voicetype.ct:LV_VERSUS_HV", "voicetype.ct:LV_VERSUS_HVB")), digits = 3)
```

#### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(p_glmer_cstc, c("slope.ct", "voicetype.ct:slope.ct", "LV_VERSUS_HV:slope.ct", "LV_VERSUS_HVB:slope.ct", "voicetype.ct:LV_VERSUS_HV:slope.ct", "voicetype.ct:LV_VERSUS_HVB:slope.ct")), digits = 3)
```

# Picture Naming (Section 3.5.2)

## Picture Naming - Tone accuracy

### Label parameters and remove unidentifiable trials

```{r}
NPN = CPN[CPN$tone != 0, ]
NPN$condition = factor(NPN$condition, levels = c("0", "1", "2"), labels = c("LV", "HV", "HVB"))
NPN2 = merge(ID_pre, NPN)
```

### Figures

```{r}
means = with(NPN2, aggregate(tone_score ~ subject + condition, FUN = mean))
IDmeans = with (NPN2, aggregate(aptitude_pcpt ~ subject + condition, FUN = mean))
means = merge (means, IDmeans)
meansb = with(NPN2, aggregate(tone_score ~ subject + condition + aptitude2, FUN = mean))
means2b = with(NPN2, aggregate(tone_score ~ subject + condition + aptitude3, FUN = mean))

xb <- summarySE(meansb, measurevar = "tone_score", groupvars = c("condition"), na.rm = FALSE, conf.interval = .95)
graphdatab <- summarySE(meansb, measurevar = "tone_score", groupvars = c("condition", "aptitude2"), na.rm = FALSE, conf.interval = .95)
graphdata2b <- summarySE(means2b, measurevar = "tone_score", groupvars = c("condition", "aptitude3"), na.rm = FALSE, conf.interval = .95)
```

#### Main Figure (Figure 9, left)

```{r}
PNp1 = ggplot (means, aes(x = condition, y = tone_score)) 
PNp1 = PNp1 + geom_violin(colour = "black", scale = "count")
PNp1 = PNp1 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp1 = PNp1 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = xb)
PNp1 = PNp1 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PNp1 = PNp1 +theme_bw() + ggtitle('Picture Naming - Tone accuracy')
PNp1 = PNp1 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp1)
```

#### Main Figure (Scatter plot, Figure 11, Top panel, right)

```{r}
PNp0.1 = ggplot(means, aes(aptitude_pcpt, tone_score,shape = condition, colour = condition)) 
PNp0.1 = PNp0.1 + geom_point(aes (shape = condition), size = 2) + geom_smooth(se = FALSE, method = lm, aes(linetype = condition))
PNp0.1 = PNp0.1 + scale_linetype_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
PNp0.1 = PNp0.1 + labs(x = "PCPT Score",y = NULL) + ggtitle(' ')

PNp0.1.m = PNp0.1 + scale_linetype_discrete(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
PNp0.1.m = PNp0.1.m + labs(x = "PCPT Score", y = "Mean Accuracy") + ggtitle(' ')

print(PNp0.1)
```

#### PCPT Graph (Figure 11 Top panel,left)

```{r}
PNp2 = ggplot (meansb, aes(x = condition, y = tone_score)) 
PNp2 = PNp2 + geom_violin(colour = "black", scale = "count")
PNp2 = PNp2 + facet_wrap(~aptitude2, ncol = 2)
PNp2 = PNp2 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp2 = PNp2 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = graphdatab)
PNp2 = PNp2 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
PNp2 = PNp2 + theme_bw()+ ggtitle('Picture Naming: Tone accuracy')
PNp2 = PNp2 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp2)
```

#### CSTC graph (Not used)

```{r}
PNp3 = ggplot (means2b, aes(x = condition,y = tone_score, fill = aptitude3)) 
PNp3 = PNp3 + geom_violin(colour = "black", scale = "count")
PNp3 = PNp3 + scale_fill_manual(name = "Aptitude", values = c("LA" = "grey", "HA" = "white"), labels = c("LA", "HA"))
PNp3 = PNp3 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp3 = PNp3 + geom_errorbar(aes(ymin = tone_score-ci, ymax = tone_score+ci), width = .4, position = position_dodge(.9), data = graphdata2b)
PNp3 = PNp3 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PNp3 = PNp3 + theme_bw()+ ggtitle('Picture Naming: Tone accuracy')
PNp3 = PNp3 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp3)
```

### Statistical Analysis

#### Main model (without aptitude predictors; Section 3.5.2)

```{r}
NPN2 = lizCenter(NPN2, list("aptitude_pcpt", "slope"))
NPN2 = lizContrasts(NPN2, NPN2$condition, "LV")

NPN_glmer = glmer(tone_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) 
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NPN2)
```

##### Print out effects

```{r}
kable(summary(NPN_glmer)$coefficients, digits = 3)
```

#### Analysis with PCPT accuracy (Section 3.7)

```{r}

NPN_glmer_pcpt = glmer(tone_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * aptitude_pcpt.ct
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NPN2)
```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(NPN_glmer_pcpt, c("(Intercept)", "LV_VERSUS_HV", "LV_VERSUS_HVB")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
summary(NPN_glmer_pcpt)$coeff
kable(get_coeffs(NPN_glmer_pcpt, c("aptitude_pcpt.ct", "LV_VERSUS_HV:aptitude_pcpt.ct", "LV_VERSUS_HVB:aptitude_pcpt.ct")), digits = 3)
```

#### Analysis with CSTC slope

```{r}
NPN_glmer_cstc = glmer(tone_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * slope.ct
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NPN2)
```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(NPN_glmer_cstc, c("(Intercept)", "LV_VERSUS_HV", "LV_VERSUS_HVB")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(NPN_glmer_cstc, c("slope.ct", "LV_VERSUS_HV:slope.ct", "LV_VERSUS_HVB:slope.ct")), digits = 3)
```

## Picture Naming - Pinyin Accuracy

### Figures

```{r}
means = with(NPN2, aggregate(pinyin_score ~ subject + condition, FUN = mean))
IDmeans = with (NPN2, aggregate(aptitude_pcpt ~ subject + condition, FUN = mean))
means = merge (means, IDmeans)
meansb = with(NPN2, aggregate(pinyin_score ~ subject + condition + aptitude2, FUN = mean))
means2b = with(NPN2, aggregate(pinyin_score ~ subject + condition + aptitude3, FUN = mean))

xb<- summarySE(meansb, measurevar = "pinyin_score", groupvars = c("condition"), na.rm = FALSE, conf.interval = .95)
graphdatab <- summarySE(meansb, measurevar = "pinyin_score", groupvars = c("condition", "aptitude2"), na.rm = FALSE, conf.interval = .95)
graphdata2b <- summarySE(means2b, measurevar = "pinyin_score", groupvars = c("condition", "aptitude3"), na.rm = FALSE, conf.interval = .95)
```

#### Main Graph (Figure 9, right)

```{r}
PNp7 = ggplot (means, aes(x = condition, y=pinyin_score)) 
PNp7 = PNp7 + geom_violin(colour = "black", scale = "count")
PNp7 = PNp7 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp7 = PNp7 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = xb)
PNp7 = PNp7 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PNp7 = PNp7 + theme_bw() + ggtitle('Picture Naming: Pinyin Accuracy')
PNp7 = PNp7 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp7)
```

#### Main Graph (Scatter plot, Figure 11, Middle panel, right)

```{r}
PNp0.2 = ggplot(means, aes(aptitude_pcpt, pinyin_score, colour = condition, shape = condition)) 
PNp0.2 = PNp0.2 + geom_point(aes (shape = condition), size = 2) + geom_smooth(se = FALSE, method = lm, aes(linetype = condition))
PNp0.2 = PNp0.2 + scale_linetype_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))+ scale_shape_discrete(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = "Variability Condition", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
PNp0.2 = PNp0.2 + labs(x = "PCPT Score", y = NULL) +  ggtitle('Scatter Plot')

PNp0.2.m = PNp0.2 + scale_linetype_discrete(name = " ", breaks = c("LV", "HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_shape_discrete(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB")) + scale_colour_grey(name = " ", breaks = c("LV","HV", "HVB"), labels = c("LV", "HV", "HVB"))
PNp0.2.m = PNp0.2.m + labs(x = "PCPT Score", y = "Mean Accuracy") + ggtitle(' ')

print(PNp0.2)
```

#### PCPT Graph (Figure 11, Middle panel,left)

```{r}
PNp8 = ggplot (meansb, aes(x = condition,y = pinyin_score)) 
PNp8 = PNp8 + geom_violin(colour = "black", scale = "count")
PNp8 = PNp8 + facet_wrap(~aptitude2, ncol = 2)
PNp8 = PNp8 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp8 = PNp8 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = graphdatab)
PNp8 = PNp8 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2,position = position_dodge(.9))
PNp8 = PNp8 +theme_bw()+ ggtitle('Picture Naming: Pinyin Accuracy')
PNp8 = PNp8 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp8)
```

#### CSTC Graph (Not used)

```{r}
PNp9 = ggplot (means2b, aes(x = condition, y = pinyin_score, fill = aptitude3)) 
PNp9 = PNp9 + geom_violin(colour = "black", scale = "count")
PNp9 = PNp9 + scale_fill_manual(name = "aptitude3", values = c("LA" = "grey", "HA" = "white"), labels = c("LA", "HA"))
PNp9 = PNp9 + scale_x_discrete(labels = c("LV", "HV", "HVB"))
PNp9 = PNp9 + geom_errorbar(aes(ymin = pinyin_score-ci, ymax = pinyin_score+ci), width = .4, position = position_dodge(.9), data = graphdata2b)
PNp9 = PNp9 + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, position = position_dodge(.9))
PNp9 = PNp9 + theme_bw()+ ggtitle('Picture Naming: Pinyin Accuracy')
PNp9 = PNp9 + labs(x = "Variability Condition", y = "Mean Accuracy")
print(PNp9)
```

### Statistical Analysis

#### Main Model (without individual aptitude measures; section 3.5.2)

```{r}
NPN2 = lizCenter(NPN2, list("aptitude_pcpt", "slope"))
NPN2 = lizContrasts(NPN2, NPN2$condition, "LV")

NPN_glmer = glmer(pinyin_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) 
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NPN2)
```

#### Print out key effects

```{r}
kable(summary(NPN_glmer)$coefficients, digits = 3)
```

#### Analysis with PCPT accuracy (Section 3.7)

```{r}
NPN_glmer_pcpt = glmer(pinyin_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * aptitude_pcpt.ct
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"), 
              data = NPN2)
```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(NPN_glmer_pcpt, c("(Intercept)","LV_VERSUS_HV", "LV_VERSUS_HVB")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
summary(NPN_glmer_pcpt)$coeff
kable(get_coeffs(NPN_glmer_pcpt, c("aptitude_pcpt.ct", "LV_VERSUS_HV:aptitude_pcpt.ct", "LV_VERSUS_HVB:aptitude_pcpt.ct")), digits = 3)
```

#### Analysis with CSTC slope

```{r}
NPN_glmer_cstc = glmer(pinyin_score ~ (LV_VERSUS_HV + LV_VERSUS_HVB) * slope.ct
              + (1|subject)
              , family = "binomial", 
              control = glmerControl(optimizer = "bobyqa"),  
              data = NPN2)
```

##### Check that we get the same pattern of results for the experimentally manipulated variables as in the previous model

```{r}
kable(get_coeffs(NPN_glmer_cstc, c("(Intercept)","LV_VERSUS_HV", "LV_VERSUS_HVB")), digits = 3)
```

##### Print out the effect of individual aptitude and key interactions with aptitude

```{r}
kable(get_coeffs(NPN_glmer_cstc, c("slope.ct", "LV_VERSUS_HV:slope.ct", "LV_VERSUS_HVB:slope.ct")), digits = 3)
```