# R version: 4.2.0 library(tidyverse) library(ggpubr) library(car) ################ ### DATA ## load final data sets for sonorant and obstruent tokens data_son = read.table("Son_V1C2_trackdata_sd_ws_zh_younger.txt") land_son = read.table("Son_V1C2_landmarks_sd_ws_zh_younger.txt") data_obs = read.table("Obs_V1_trackdata_sd_ws_zh_younger.txt") land_obs = read.table("Obs_V1_landmarks_sd_ws_zh_younger.txt") ## combine sonorant and obstruent data data = rbind(data_obs, data_son) %>% arrange(bundle) ## Table 1 # get number of included productions per category (i.e., data set and variety) data %>% distinct(C2.type, variety, bundle) %>% group_by(C2.type, variety) %>% # for each level of Category count() %>% ungroup() # repeat counting, this time additionally split by syllable structure data %>% distinct(C2.type, variety, V1.quantity, bundle) %>% group_by(C2.type, variety, V1.quantity) %>% # for each level of Category count() %>% ungroup() ################ ### PREANALYSIS ## calculate time-normalized position of V1C2-segment boundary for sonorant data # use original time stamps of times_norm == 0 and == 1 as f0-start/-end vowelend.norm = (land_son$V1.end - land_son$times_orig_zero)/(land_son$times_orig_one - land_son$times_orig_zero) # write into table v.df.norm = tibble(v.end.norm = vowelend.norm, bundle = land_son$bundle, variety = land_son$variety, V1.quantity = land_son$V1.quantity) ## Table 2 # get mean proportional vowel end per category (i.e., variety and syllable structure) vmean.end.norm = v.df.norm %>% group_by(variety, V1.quantity) %>% summarise(mean = mean(v.end.norm)) %>% mutate(C2.type = "Sonorant") %>% ungroup() ## Figure 3 # create character vectors for labels vowel.quant <- c( 'long' = "open/long", 'short' = "closed/short" ) cons.type <- c( 'Sonorant' = "Vowel+sonorant-sequences", 'Obstruent' = "Vowels preceding obstruents" ) # save this plot with ggsave() as PNG, height = 7, width = 10 e <- ggplot(data, aes(t.norm, scaledF0)) e + geom_smooth(aes(linetype = variety), method = "loess", span = 0.75, color = "black") + geom_hline(aes(yintercept = 0), linetype = 3, color = "azure4") + geom_vline(data = vmean.end.norm, aes(xintercept = mean, linetype = variety)) + facet_grid(V1.quantity ~ C2.type, labeller = labeller(V1.quantity = as_labeller(vowel.quant), C2.type = as_labeller(cons.type))) + scale_linetype_manual(values = c("solid", "dotdash", "dashed"), name = "Variety", breaks = c("German_Standard", "Austrian_Standard", "Swiss_Standard"), labels = c("Munich Standard German", "Vienna Standard German", "Zurich Standard German")) + labs(x = "Normalized time", y = "Scaled f0") + theme_linedraw() + guides(linetype = guide_legend(override.aes = list(fill = NA))) + theme(legend.justification = c(0, 0), legend.position = c(0.01, 0.01), legend.text = element_text(size = 17), legend.title = element_blank(), axis.text = element_text(size = 16), axis.title = element_text(size = 20), strip.text = element_text(color = "black", size = 20), strip.background = element_rect(fill = "white")) ################ ### CORRELATION ## add columns for normalization of V1C2 duration (in case of sonorants) and V1 duration (in case of obstruents) land_son$V1C2dur = land_son$C2.end - land_son$V1.start land_son$targdur = land_son$targ.end - land_son$targ.start land_son$V1C2dur.norm = land_son$V1C2dur/land_son$targdur land_obs$V1dur = land_obs$V1.end - land_obs$V1.start land_obs$targdur = land_obs$targ.end - land_obs$targ.start land_obs$V1dur.norm = land_obs$V1dur/land_obs$targdur ## count & remove time points == 0 or == 1 for correlation plots and stats of both data sets with(land_son, table(variety, times_norm_secmaxSmooth == 0)) with(land_son, table(variety, times_norm_secmaxSmooth == 1)) land_son_mid <- land_son %>% subset(times_norm_secmaxSmooth != 0 & times_norm_secmaxSmooth != 1) with(land_obs, table(variety, times_norm_secmaxSmooth == 0)) with(land_obs, table(variety, times_norm_secmaxSmooth == 1)) land_obs_mid <- land_obs %>% subset(times_norm_secmaxSmooth != 0 & times_norm_secmaxSmooth != 1) ## further exclude 4 outliers from sonorant data and 8 outliers from obstruent data land_son_mid_clean = land_son_mid %>% filter(!bundle=="WS_0008_Hoehle_normal_03") %>% filter(!bundle=="SD_0009_Dinner_normal_04") %>% filter(!bundle=="SH_ZH_1030_Hoehle_normal_05") %>% filter(!bundle=="SH_ZH_0026_Hoelle_normal_03") land_obs_mid_clean = land_obs_mid %>% filter(!bundle=="SH_ZH_0025_wieder_normal_02") %>% filter(!bundle=="SH_ZH_0026_Kater_normal_03") %>% filter(!bundle=="SH_ZH_0026_wieder_normal_03") %>% filter(!bundle=="SH_ZH_1029_Bieter_normal_04") %>% filter(!bundle=="SH_ZH_0024_Pute_normal_04") %>% filter(!bundle=="SH_ZH_0023_bitter_normal_02") %>% filter(!bundle=="SH_ZH_0025_bitter_normal_05") %>% filter(!bundle=="SH_ZH_1030_Pudding_normal_04") ## Figure 10 # save this plot with ggsave() as PNG, height = 5, width = 10 # plot sonorant data first e_son <- ggplot(land_son_mid_clean, aes(times_norm_secmaxSmooth, V1C2dur.norm)) corr_son <- e_son + geom_point(aes(color = variety, shape = V1.quantity)) + geom_smooth(aes(color = variety, linetype = V1.quantity), method = lm, se = F) + theme_linedraw() + scale_color_manual(values = c("gray20", "gray60", "gray78"), name = "Variety", breaks = c("German_Standard", "Austrian_Standard", "Swiss_Standard"), labels = c("Munich Standard German", "Vienna Standard German", "Zurich Standard German")) + scale_shape_manual(values = c(16, 17), name = "Syllable structure", breaks = c("long", "short"), labels = c("open/long", "closed/short")) + scale_linetype_manual(values = c("solid", "dashed"), name = "Syllable structure", breaks = c("long", "short"), labels = c("open/long", "closed/short")) + guides(linetype = guide_legend(override.aes= list(color = "black"))) + labs(x = "Norm. time points of smooth maxima", y = "Norm. duration of V+S-sequence") + theme(axis.title = element_text(size = 14), legend.title = element_text(size = 11, face = "italic"), legend.text = element_text(size = 10)) # repeat plotting for obstruent data e_obs <- ggplot(land_obs_mid_clean, aes(times_norm_secmaxSmooth, V1dur.norm)) corr_obs <- e_obs + geom_point(aes(color = variety, shape = V1.quantity)) + geom_smooth(aes(color = variety, linetype = V1.quantity), method = lm, se = F) + theme_linedraw() + scale_color_manual(values = c("gray20", "gray60", "gray78"), name = "Variety", breaks = c("German_Standard", "Austrian_Standard", "Swiss_Standard"), labels = c("Munich Standard German", "Vienna Standard German", "Zurich Standard German")) + scale_shape_manual(values = c(16, 17), name = "Syllable structure", breaks = c("long", "short"), labels = c("open/long", "closed/short")) + scale_linetype_manual(values = c("solid", "dashed"), name = "Syllable structure", breaks = c("long", "short"), labels = c("open/long", "closed/short")) + guides(linetype = guide_legend(override.aes= list(color = "black"))) + labs(x = "Norm. time points of smooth maxima", y = "Norm. duration of VbO") + theme(axis.title = element_text(size = 14), legend.title = element_text(size = 11, face = "italic"), legend.text = element_text(size = 10)) # combine both correlation plots into one figure corr <- ggarrange(corr_son + rremove("xlab"), corr_obs + rremove("xlab"), ncol = 2, nrow = 1, labels = c("A", "B"), common.legend = TRUE, legend="top") annotate_figure(corr, bottom = text_grob("Norm. time points of smooth maxima", size = 14)) ## Table 4 # get Pearson's r for both data sets (per category; i.e., split by variety and syllable structure) land_son_mid_clean %>% group_by(variety, V1.quantity) %>% summarise(corr = cor(times_norm_secmaxSmooth, V1C2dur.norm)) %>% ungroup() land_obs_mid_clean %>% group_by(variety, V1.quantity) %>% summarise(corr = cor(times_norm_secmaxSmooth, V1dur.norm)) %>% ungroup() ## subset both data sets into speaker groups and by underlying syllable structure for correlation tests # sonorant data son_vsg_long = land_son_mid_clean %>% subset(variety == "Austrian_Standard" & V1.quantity == "long") son_vsg_short = land_son_mid_clean %>% subset(variety == "Austrian_Standard" & V1.quantity == "short") son_msg_long = land_son_mid_clean %>% subset(variety == "German_Standard" & V1.quantity == "long") son_msg_short = land_son_mid_clean %>% subset(variety == "German_Standard" & V1.quantity == "short") son_zsg_long = land_son_mid_clean %>% subset(variety == "Swiss_Standard" & V1.quantity == "long") son_zsg_short = land_son_mid_clean %>% subset(variety == "Swiss_Standard" & V1.quantity == "short") # obstruent data obs_vsg_long = land_obs_mid_clean %>% subset(variety == "Austrian_Standard" & V1.quantity == "long") obs_vsg_short = land_obs_mid_clean %>% subset(variety == "Austrian_Standard" & V1.quantity == "short") obs_msg_long = land_obs_mid_clean %>% subset(variety == "German_Standard" & V1.quantity == "long") obs_msg_short = land_obs_mid_clean %>% subset(variety == "German_Standard" & V1.quantity == "short") obs_zsg_long = land_obs_mid_clean %>% subset(variety == "Swiss_Standard" & V1.quantity == "long") obs_zsg_short = land_obs_mid_clean %>% subset(variety == "Swiss_Standard" & V1.quantity == "short") ## check residual (error) variance # sonorant data son_vsg_long.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_vsg_long) ncvTest(son_vsg_long.m) #p-value > 0.05 —> homoscedasticity son_vsg_short.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_vsg_short) ncvTest(son_vsg_short.m) #p-value > 0.05 —> homoscedasticity son_msg_long.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_msg_long) ncvTest(son_msg_long.m) #p-value < 0.05 —> heteroscedasticity son_msg_short.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_msg_short) ncvTest(son_msg_short.m) #p-value > 0.05 —> homoscedasticity son_zsg_long.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_zsg_long) ncvTest(son_zsg_long.m) #p-value > 0.05 —> homoscedasticity son_zsg_short.m <- lm(V1C2dur.norm ~ times_norm_secmaxSmooth, data = son_zsg_short) ncvTest(son_zsg_short.m) #p-value < 0.05 —> heteroscedasticity # obstruent data obs_vsg_long.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_vsg_long) ncvTest(obs_vsg_long.m) #p-value > 0.05 —> homoscedasticity obs_vsg_short.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_vsg_short) ncvTest(obs_vsg_short.m) #p-value > 0.05 —> heteroscedasticity obs_msg_long.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_msg_long) ncvTest(obs_msg_long.m) #p-value < 0.05 —> homoscedasticity obs_msg_short.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_msg_short) ncvTest(obs_msg_short.m) #p-value > 0.05 —> homoscedasticity obs_zsg_long.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_zsg_long) ncvTest(obs_zsg_long.m) #p-value > 0.05 —> homoscedasticity obs_zsg_short.m <- lm(V1dur.norm ~ times_norm_secmaxSmooth, data = obs_zsg_short) ncvTest(obs_zsg_short.m) #p-value < 0.05 —> homoscedasticity ## test significance of correlation # use non-parametric correlation statistics for both data sets due to heteroscedasticity in some subsets (see above) # sonorant data: negative correlation expected, therefore alternative = "less" with(son_vsg_long, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) with(son_vsg_short, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) with(son_msg_long, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) with(son_msg_short, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) with(son_zsg_long, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) with(son_zsg_short, cor.test(times_norm_secmaxSmooth, V1C2dur.norm, method = "spearman", alternative = "less")) # obstruent data: positive correlation expected, therefore alternative = "greater" with(obs_vsg_long, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater")) with(obs_vsg_short, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater")) with(obs_msg_long, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater")) with(obs_msg_short, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater")) with(obs_zsg_long, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater")) with(obs_zsg_short, cor.test(times_norm_secmaxSmooth, V1dur.norm, method = "spearman", alternative = "greater"))