# R version: 4.2.0 library(tidyverse) library(lme4) library(lmerTest) library(emmeans) td = read.table("CF0_trackdata.txt") ## get number of included data points per position td %>% distinct(var_age, position, bundle) %>% group_by(var_age, position) %>% count() ### ## Figure 1: CF0 plot ### # new facet label names for var_age labs <- c("Bavarian, o.", "Bavarian, y.", "Standard") names(labs) <- c("Bavarian_a", "Bavarian_j", "Standard") # save this plot with ggsave() as PNG, height = 7, width = 10.5 f0.plot <- ggplot(td, aes( x = times_norm, y = scaledF0, colour = c.type, linetype = c.type ) ) + geom_smooth(method = "loess") + scale_colour_grey(start = 0.6, end = 0.1) + labs(x = "Normalized time", y = "Scaled f0") + facet_grid(position~var_age, labeller = labeller(var_age = labs)) + theme_linedraw() + theme(legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = 18), axis.title = element_text(size = 20), axis.text = element_text(size = 18), strip.text = element_text(color = "black", face = "bold", size = 20), strip.background = element_rect(fill = "white")) ## get f0 means over first 25% of vowel td.25 <- td %>% filter(times_norm <= 0.25) %>% group_by(bundle, session, position, POA, c.type, repetition, freq, var_age, word) %>% summarise(scaled.f0.mean = mean(scaledF0, na.rm = TRUE)) %>% ungroup() ### ## Initial position: CF0 model ### ## p-values in decimel options(scipen = 999) ## check structure of factors for modelling # SESSION table(td.25$c.type, td.25$session) # consonant type is distributed within session # WORD table(td.25$var_age, td.25$word) # speaker group is distributed within word ## include only stops in initial position td.25_ini = td.25 %>% subset(position == "initial") ## full model scaled f0 scaledf0.25_ini <- lmer( scaled.f0.mean ~ c.type * var_age + (1 + c.type | session) + (1 + var_age | word), data = td.25_ini) ## check basic model assumptions # homoscedasticity plot(scaledf0.25_ini) # normal distribution of residuals qqnorm(resid(scaledf0.25_ini)) qqline(resid(scaledf0.25_ini)) # add column for residuals td.25_ini$resid = resid(scaledf0.25_ini) # define sd = 2.5 sd = 2.5*sd(resid(scaledf0.25_ini)) # add column for outliers td.25_ini$outs = ifelse(abs(td.25_ini$resid)>sd, 1, 0) # plot outliers plot(td.25_ini$resid, col = td.25_ini$outs+1, pch = 16, ylim = c(-5, 7)) # remove 21 outliers td.25_ini_clean = td.25_ini[!td.25_ini$outs, ] # plot again plot(td.25_ini_clean$resid, col = td.25_ini_clean$outs+1, pch = 16, ylim = c(-5, 7)) ## model again scaledf0.25_ini_clean <- lmer( scaled.f0.mean ~ c.type * var_age + (1 + c.type | session) + (1 + var_age | word), data = td.25_ini_clean) ## use step to address convergence failure step(scaledf0.25_ini_clean) scaledf0.25_ini_clean_best <- lmer( scaled.f0.mean ~ c.type + var_age + (1 + c.type | session) + (1 | word) + c.type:var_age, data = td.25_ini_clean) ## recheck model assumptions # homoscedasticity plot(scaledf0.25_ini_clean_best) # normal distribution of residuals qqnorm(resid(scaledf0.25_ini_clean_best)) qqline(resid(scaledf0.25_ini_clean_best)) ## get final model outcomes anova(scaledf0.25_ini_clean_best) ## Table 1 # difference in consonant type by speaker group scaledf0_ini_clean_best.emm <- emmeans(scaledf0.25_ini_clean_best, ~c.type|var_age, lmer.df = "satterthwaite") pairs(scaledf0_ini_clean_best.emm) ### ## Medial position: CF0 model ### ## include only stops in medial position td.25_med = td.25 %>% subset(position == "medial") ## full model scaled f0 scaledf0.25_med <- lmer( scaled.f0.mean ~ c.type * var_age + (1 + c.type | session) + (1 + var_age | word), data = td.25_med) ## use step to address convergence failure anova(get_model(step(scaledf0.25_med)))