# R version: 4.2.0 library(tidyverse) library(lme4) library(lmerTest) library(ez) library(gridExtra) load(paste0("CF0_VOT_correlation.Rdata")) ## get number of included data points per position with(corr, table(var_age, c.type, position)) ## remove 9 outliers corr <- corr %>% filter(scaled.f0.mean>=-3.5) %>% filter(scaled.f0.mean<=4.2) %>% filter(VOT.rel<=0.55) %>% filter(!bundle == "WB0002_w_a0006MS_FoKoIn_lo_Teckel_2") ## group-level correlation for scaled f0 & normalized VOT (Pearson’s r) corr %>% group_by(var_age, position) %>% summarise(corr = cor(VOT.rel, scaled.f0.mean)) %>% ungroup() ## model correlation differences at individual level ## initial fortis stops ini = corr %>% subset(position == "initial") # fit model i.vot = lmer(scaled.f0.mean ~ VOT.rel + (1 + VOT.rel|session) + (1 + VOT.rel|word), data = ini) # extract speaker-specific slopes from model z.vot = coef(i.vot)[[1]] # create columns for speaker group and session labs.vot = str_trunc(rownames(z.vot), 4, "center", ellipsis = ".") labs.vot[labs.vot=="SD.a"]="SD" labs.vot[labs.vot=="SD.j"]="SD" labs.vot[labs.vot=="SD"]=0 labs.vot[labs.vot=="WB.a"]=1 labs.vot[labs.vot=="WB.j"]=2 labs.vot = as.factor(labs.vot) levels(labs.vot) = c("Standard","Bavarian_a","Bavarian_j") vpn = factor(rownames(z.vot)) # write slopes, speaker group and session into df Initial.df = data.frame(CF0 = z.vot[,2], session = vpn, var_age = labs.vot) ## medial fortis stops med = corr %>% subset(position == "medial") # fit model m.vot = lmer(scaled.f0.mean ~ VOT.rel + (1 + VOT.rel|session) + (1 + VOT.rel|word), data = med) # remove outliers based on model residuals to address convergence failure # homoscedasticity plot(m.vot) # normal distribution of residuals qqnorm(resid(m.vot)) qqline(resid(m.vot)) # add column for residuals med$resid = resid(m.vot) # define sd = 2.5 sd = 2.5*sd(resid(m.vot)) # add column for outliers med$outs = ifelse(abs(med$resid)>sd, 1, 0) # plot outliers plot(med$resid, col = med$outs+1, pch = 16, ylim = c(-4, 4)) # remove 7 outliers med_clean = med[!med$outs, ] # plot again plot(med_clean$resid, col = med_clean$outs+1, pch = 16, ylim = c(-4, 4)) # model again m.vot_clean = lmer(scaled.f0.mean ~ VOT.rel + (1 + VOT.rel|session) + (1 + VOT.rel|word), data = med_clean) # extract speaker-specific slopes from model z.vot = coef(m.vot_clean)[[1]] # create columns for speaker group and session labs.vot = str_trunc(rownames(z.vot), 4, "center", ellipsis = ".") labs.vot[labs.vot=="SD.a"]="SD" labs.vot[labs.vot=="SD.j"]="SD" labs.vot[labs.vot=="SD"]=0 labs.vot[labs.vot=="WB.a"]=1 labs.vot[labs.vot=="WB.j"]=2 labs.vot = as.factor(labs.vot) levels(labs.vot) = c("Standard","Bavarian_a","Bavarian_j") vpn = factor(rownames(z.vot)) # write slopes, speaker group and session into df Medial.df = data.frame(CF0 = z.vot[,2], session = vpn, var_age = labs.vot) ### # Figure 3: Correlation plot ### ## group-level correlation fig1 = ggplot(corr) + aes(y = scaled.f0.mean, x = VOT.rel) + geom_point(aes(color = var_age, shape = var_age)) + geom_smooth(aes(linetype = var_age, color = var_age), method = "lm", se = F) + scale_shape_manual(values = c(16, 2, 3), name = "Speaker group", breaks = c("Bavarian_a", "Bavarian_j", "Standard"), labels = c("Bavarian, o.", "Bavarian, y.", "Standard")) + scale_color_manual(values=c("slategrey", "slategrey", "black"), name = "Speaker group", breaks = c("Bavarian_a", "Bavarian_j", "Standard"), labels = c("Bavarian, o.", "Bavarian, y.", "Standard")) + scale_linetype_manual(values = c("solid", "dashed", "longdash"), name = "Speaker group", breaks = c("Bavarian_a", "Bavarian_j", "Standard"), labels = c("Bavarian, o.", "Bavarian, y.", "Standard")) + facet_grid(~position) + labs(y = "Mean scaled f0", x = "Normalized VOT") + theme_linedraw() + theme(legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = 15), axis.title = element_text(size = 17), axis.text = element_text(size = 14), strip.text = element_text(color = "black", face = "bold", size = 17), strip.background = element_rect(fill = "white")) ## speaker-specific slopes # combine dataframes for plotting Initial.df$position = rep("initial", nrow(Initial.df)) Medial.df$position = rep("medial", nrow(Medial.df)) Plot.df = rbind(Initial.df, Medial.df) fig2 = ggplot(data = Plot.df) + geom_boxplot() + aes(x = var_age, y = CF0) + scale_x_discrete(limits = c("Bavarian_a", "Bavarian_j", "Standard"), labels=c("Bavarian_a" = "Bavarian, o.", "Bavarian_j" = "Bavarian, y.", "Standard" = "Standard")) + geom_hline(aes(yintercept=0)) + facet_grid(~position) + labs(y = "Slope") + theme_linedraw() + theme(axis.title.x = element_blank(), axis.title.y = element_text(size = 17), axis.text = element_text(size = 14), strip.text = element_blank(), strip.background = element_blank()) ## save combined correlation plot with ggsave() as PNG, height = 7, width = 8 fig = grid.arrange(fig1, fig2, nrow = 2, heights = c(1, 0.8)) ### ## Correlation statistics at individual level ### ## ANOVA with slope ("CF0") as DV, speaker group ("var_age") as IV and speaker ("session") as random factor ## Pairwise comparisons for initial fortis stops # WB age groups temp = Initial.df$var_age == "Standard" ini_wb.age <- ezANOVA(Initial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA ini_wb.age[,4:5] = round(ini_wb.age[,4:5],3) ini_wb.age # SG vs. WB younger temp = Initial.df$var_age == "Bavarian_a" ini_wb.y_sd <- ezANOVA(Initial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA ini_wb.y_sd[,4:5] = round(ini_wb.y_sd[,4:5],3) ini_wb.y_sd # SG vs. WB older temp = Initial.df$var_age == "Bavarian_j" ini_wb.o_sd <- ezANOVA(Initial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA ini_wb.o_sd[,4:5] = round(ini_wb.o_sd[,4:5],3) ini_wb.o_sd ## Pairwise comparisons for medial fortis stops # WB age groups temp = Medial.df$var_age == "Standard" med_wb.age <- ezANOVA(Medial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA med_wb.age[,4:5] = round(med_wb.age[,4:5],3) med_wb.age # SG vs. WB younger temp = Medial.df$var_age == "Bavarian_a" med_wb.y_sd <- ezANOVA(Medial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA med_wb.y_sd[,4:5] = round(med_wb.y_sd[,4:5],3) med_wb.y_sd # SG vs. WB older temp = Medial.df$var_age == "Bavarian_j" med_wb.o_sd <- ezANOVA(Medial.df[!temp,], .(CF0), .(session), between = .(var_age))$ANOVA med_wb.o_sd[,4:5] = round(med_wb.o_sd[,4:5],3) med_wb.o_sd