# R version: 4.2.0 library(tidyverse) library(lme4) library(lmerTest) library(emmeans) lm = read.table("VOT_landmarks.txt") ## get number of included data points per position with(lm, table(var_age, position)) ## compute VOT lm$VOT <- lm$VOT.end - lm$clos.end ## compute duration of word, closure phase & stop lm$word.dur <- lm$word.end - lm$word.start lm$clos.dur <- lm$clos.end - lm$clos.start lm$stop.dur <- lm$clos.dur + lm$VOT ## normalization of VOT lm$normbasis <- lm$word.dur - lm$stop.dur lm$VOT.rel <- lm$VOT/lm$normbasis ### ## Figure 2: VOT plot ### # save this plot with ggsave() as PNG, height = 3.5, width = 7 vot.plot <- ggplot (data = lm) + aes (y = VOT.rel, x = var_age, color = c.type) + scale_x_discrete(limits = c("Bavarian_a", "Bavarian_j", "Standard"), labels=c("Bavarian_a" = "Bavarian, o.", "Bavarian_j" = "Bavarian, y.", "Standard" = "Standard")) + geom_boxplot () + facet_wrap(~position) + scale_fill_grey() + scale_colour_grey(start = 0.6, end = 0.2) + labs(y = "Normalized VOT") + theme_linedraw() + theme(legend.position = "top", legend.title = element_blank(), legend.text = element_text(size = 13), axis.title.x = element_blank(), axis.title.y = element_text(size = 14), axis.text = element_text(size = 12), strip.text = element_text(color = "black", face = "bold", size = 14), strip.background = element_rect(fill = "white")) ### ## VOT model ### ## p-values in decimel options(scipen = 999) ## check structure of factors for modelling # SESSION table(lm$c.type, lm$session) # consonant type is distributed within session table(lm$position, lm$session) # position is distributed within session # WORD table(lm$var_age, lm$word) # speaker grooup is distributed within word ## full model all stops VOT <- lmer( VOT.rel ~ c.type * position * var_age + (1 + c.type + position | session) + (1 + var_age | word), data = lm) #step(VOT) returns full model as most parsimonious ## check basic model assumptions # homoscedasticity plot(VOT) # normal distribution of residuals qqnorm(resid(VOT)) qqline(resid(VOT)) # add column for residuals lm$resid = resid(VOT) # define sd = 2.5 sd = 2.5*sd(resid(VOT)) # add column for outliers lm$outs = ifelse(abs(lm$resid)>sd, 1, 0) # plot outliers plot(lm$resid, col = lm$outs+1, pch = 16, ylim = c(-0.3, 0.3)) # remove 52 outliers lm_clean = lm[!lm$outs, ] # plot again plot(lm_clean$resid, col = lm_clean$outs+1, pch = 16, ylim = c(-0.3, 0.3)) ## model again VOT_clean <- lmer( VOT.rel ~ c.type * position * var_age + (1 + c.type + position | session) + (1 + var_age | word), data = lm_clean) ## recheck model assumptions # homoscedasticity plot(VOT_clean) # normal distribution of residuals qqnorm(resid(VOT_clean)) qqline(resid(VOT_clean)) ## get final model outcomes anova(VOT_clean) ## Table 2 (selected/rearranged values) # difference in consonant type by position and/or speaker group VOT_clean.emm <- emmeans(VOT_clean, ~c.type:var_age|position, lmer.df = "satterthwaite") pairs(VOT_clean.emm) pairs(emmeans(VOT_clean, ~c.type|position + var_age, lmer.df = "satterthwaite"))