1 Underlying data set

1.1 Speaker groups

df %>% count(variety, age, speaker) %>% count (variety, age)

1.2 Tokens per speech rate

df %>% count(rate)

1.3 Tokens per target word

df %>% count(word_group, target_word) %>% arrange(word_group, desc(n)) %>% select(-word_group)

1.4 Matrix of target words

df %>%
  count(word_group, category, target_word) %>%
  select (-n) %>% 
  spread(category, target_word)

1.5 Types and tokens per quantity category

df %>% count(category, target_word) %>% count(category)
df %>% count(category)

2 Closure-norm (word-normalized stop closure duration)

2.1 Plot (Figure 4.1)

df %>%
  ggplot() +
  aes(x = category, y = dur_c / dur_word) +
  facet_grid(cols = vars(speaker_group_label),
             rows = vars(rate)) +
  geom_boxplot(aes(fill = legal)) +
  ylab(bquote("closure"["norm"])) +
  xlab("Quantity category") +
  scale_x_discrete(labels = c("V:C:" = "V:C:\ne.g. Bieter",
                              "V:C"  = "V:C\ne.g. wieder",
                              "VC"   = "VC\ne.g. Widder",
                              "VC:"  = "VC:\ne.g. bitter"
  )) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  scale_fill_manual(name = "Phonotactically",
                    values = c("red", "green"))

2.2 Model: Normalized closure duration

emm_options(pbkrtest.limit = nrow(df))
emm_options(lmerTest.limit = nrow(df))

general_closure.lmer = df %>%
  mutate (speaker_group = factor(speaker_group,
                                 ordered = T,
                                 levels = c("Y_SD", "Y_WB", "O_WB"))) %>%
  lmer(data = .,
       dur_c / dur_word
       ~ speaker_group * category * rate +
         
         (category + rate | speaker) +
         (speaker_group + rate | target_word)
  )
## boundary (singular) fit: see help('isSingular')
general_closure.anova = anova(general_closure.lmer)

print(general_closure.anova)
## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq  Mean Sq NumDF  DenDF F value    Pr(>F)
## speaker_group               0.043382 0.021691     2   42.1 12.2464 6.433e-05
## category                    0.116567 0.038856     3   22.9 21.9373 6.272e-07
## rate                        0.008194 0.008194     1   36.8  4.6263  0.038116
## speaker_group:category      0.048138 0.008023     6   32.3  4.5297  0.001943
## speaker_group:rate          0.023508 0.011754     2  110.5  6.6361  0.001898
## category:rate               0.008341 0.002780     3   36.4  1.5697  0.213366
## speaker_group:category:rate 0.011324 0.001887     6 7117.4  1.0656  0.380713
##                                
## speaker_group               ***
## category                    ***
## rate                        *  
## speaker_group:category      ** 
## speaker_group:rate          ** 
## category:rate                  
## speaker_group:category:rate    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2.1 Pairwise comparisons for Section 4.3.1 (Closure duration)

emmeans(general_closure.lmer,
        pairwise ~ speaker_group | category,
        lmer.df = "satterthwaite")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## category = V:C:
##  speaker_group emmean     SE   df lower.CL upper.CL
##  Y_SD           0.140 0.0180 24.4   0.1023    0.177
##  Y_WB           0.114 0.0188 24.1   0.0752    0.153
##  O_WB           0.146 0.0198 23.8   0.1052    0.187
## 
## category = V:C::
##  speaker_group emmean     SE   df lower.CL upper.CL
##  Y_SD           0.210 0.0209 31.1   0.1671    0.252
##  Y_WB           0.255 0.0217 30.3   0.2112    0.300
##  O_WB           0.265 0.0227 29.3   0.2185    0.311
## 
## category = VC:
##  speaker_group emmean     SE   df lower.CL upper.CL
##  Y_SD           0.148 0.0246 27.2   0.0972    0.198
##  Y_WB           0.156 0.0255 26.7   0.1034    0.208
##  O_WB           0.248 0.0268 26.1   0.1927    0.303
## 
## category = VC::
##  speaker_group emmean     SE   df lower.CL upper.CL
##  Y_SD           0.271 0.0189 35.2   0.2325    0.309
##  Y_WB           0.335 0.0196 34.2   0.2952    0.375
##  O_WB           0.342 0.0204 32.9   0.3004    0.383
## 
## Results are averaged over the levels of: rate 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## 
## $contrasts
## category = V:C:
##  contrast    estimate     SE   df t.ratio p.value
##  Y_SD - Y_WB  0.02562 0.0164 29.8   1.561  0.2778
##  Y_SD - O_WB -0.00649 0.0181 28.0  -0.357  0.9322
##  Y_WB - O_WB -0.03211 0.0113 39.9  -2.836  0.0191
## 
## category = V:C::
##  contrast    estimate     SE   df t.ratio p.value
##  Y_SD - Y_WB -0.04563 0.0207 42.6  -2.200  0.0827
##  Y_SD - O_WB -0.05506 0.0224 39.7  -2.461  0.0471
##  Y_WB - O_WB -0.00942 0.0163 44.3  -0.578  0.8323
## 
## category = VC:
##  contrast    estimate     SE   df t.ratio p.value
##  Y_SD - Y_WB -0.00820 0.0233 36.3  -0.352  0.9342
##  Y_SD - O_WB -0.10019 0.0255 33.5  -3.933  0.0011
##  Y_WB - O_WB -0.09199 0.0172 45.1  -5.345  <.0001
## 
## category = VC::
##  contrast    estimate     SE   df t.ratio p.value
##  Y_SD - Y_WB -0.06393 0.0196 46.2  -3.265  0.0057
##  Y_SD - O_WB -0.07098 0.0209 44.4  -3.399  0.0040
##  Y_WB - O_WB -0.00705 0.0161 41.4  -0.437  0.9003
## 
## Results are averaged over the levels of: rate 
## Degrees-of-freedom method: satterthwaite 
## P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(general_closure.lmer,
        pairwise ~ rate | speaker_group,
        lmer.df = "satterthwaite")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## speaker_group = Y_SD:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.188 0.0108 36.3    0.166    0.210
##  fast    0.196 0.0117 36.3    0.172    0.220
## 
## speaker_group = Y_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.216 0.0113 34.8    0.193    0.239
##  fast    0.214 0.0119 35.7    0.190    0.238
## 
## speaker_group = O_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.249 0.0119 33.4    0.225    0.273
##  fast    0.251 0.0123 34.5    0.226    0.276
## 
## Results are averaged over the levels of: category 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## 
## $contrasts
## speaker_group = Y_SD:
##  contrast      estimate      SE   df t.ratio p.value
##  normal - fast -0.00777 0.00202 91.2  -3.850  0.0002
## 
## speaker_group = Y_WB:
##  contrast      estimate      SE   df t.ratio p.value
##  normal - fast  0.00191 0.00203 92.5   0.945  0.3469
## 
## speaker_group = O_WB:
##  contrast      estimate      SE   df t.ratio p.value
##  normal - fast -0.00263 0.00203 92.5  -1.299  0.1971
## 
## Results are averaged over the levels of: category 
## Degrees-of-freedom method: satterthwaite

2.2.2 Pairwise comparisons for Section 4.3.3 (Subsection: Lenition in fast speech)

emmeans(general_closure.lmer,
        pairwise ~ rate | category | speaker_group,
        lmer.df = "satterthwaite")
## $emmeans
## category = V:C, speaker_group = Y_SD:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.135 0.0175 24.9   0.0988    0.171
##  fast    0.144 0.0187 24.3   0.1056    0.183
## 
## category = V:C:, speaker_group = Y_SD:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.207 0.0203 31.7   0.1657    0.248
##  fast    0.213 0.0217 30.9   0.1683    0.257
## 
## category = VC, speaker_group = Y_SD:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.145 0.0238 27.6   0.0957    0.193
##  fast    0.151 0.0256 27.3   0.0983    0.203
## 
## category = VC:, speaker_group = Y_SD:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.266 0.0184 35.8   0.2285    0.303
##  fast    0.276 0.0196 35.0   0.2362    0.316
## 
## category = V:C, speaker_group = Y_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.112 0.0185 24.5   0.0742    0.151
##  fast    0.115 0.0192 24.2   0.0758    0.155
## 
## category = V:C:, speaker_group = Y_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.256 0.0213 30.5   0.2120    0.299
##  fast    0.255 0.0222 30.4   0.2101    0.301
## 
## category = VC, speaker_group = Y_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.159 0.0251 26.9   0.1076    0.211
##  fast    0.153 0.0262 27.0   0.0988    0.206
## 
## category = VC:, speaker_group = Y_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.337 0.0192 34.4   0.2978    0.376
##  fast    0.333 0.0200 34.4   0.2923    0.374
## 
## category = V:C, speaker_group = O_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.143 0.0196 24.1   0.1020    0.183
##  fast    0.149 0.0201 23.9   0.1081    0.191
## 
## category = V:C:, speaker_group = O_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.261 0.0225 29.4   0.2150    0.307
##  fast    0.269 0.0230 29.6   0.2217    0.316
## 
## category = VC, speaker_group = O_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.249 0.0266 26.1   0.1942    0.303
##  fast    0.247 0.0273 26.4   0.1908    0.303
## 
## category = VC:, speaker_group = O_WB:
##  rate   emmean     SE   df lower.CL upper.CL
##  normal  0.343 0.0202 32.9   0.3020    0.384
##  fast    0.341 0.0207 33.3   0.2986    0.383
## 
## Degrees-of-freedom method: satterthwaite 
## Confidence level used: 0.95 
## 
## $contrasts
## category = V:C, speaker_group = Y_SD:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.009278 0.00356 180  -2.604  0.0100
## 
## category = V:C:, speaker_group = Y_SD:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.005457 0.00383 183  -1.424  0.1560
## 
## category = VC, speaker_group = Y_SD:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.006156 0.00463 184  -1.329  0.1855
## 
## category = VC:, speaker_group = Y_SD:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.010189 0.00332 171  -3.071  0.0025
## 
## category = V:C, speaker_group = Y_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.003083 0.00364 196  -0.848  0.3977
## 
## category = V:C:, speaker_group = Y_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast  0.000174 0.00383 182   0.045  0.9638
## 
## category = VC, speaker_group = Y_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast  0.006659 0.00463 184   1.438  0.1522
## 
## category = VC:, speaker_group = Y_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast  0.003909 0.00331 170   1.179  0.2398
## 
## category = V:C, speaker_group = O_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.006968 0.00359 187  -1.939  0.0541
## 
## category = V:C:, speaker_group = O_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast -0.007850 0.00383 184  -2.047  0.0420
## 
## category = VC, speaker_group = O_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast  0.001968 0.00465 187   0.423  0.6727
## 
## category = VC:, speaker_group = O_WB:
##  contrast       estimate      SE  df t.ratio p.value
##  normal - fast  0.002322 0.00333 173   0.698  0.4862
## 
## Degrees-of-freedom method: satterthwaite

3 VOT-norm (word-normalized voice onset time)

3.1 Plot (Figure 4.2)

df %>%
  ggplot() +
  aes(x = category, y = dur_aspiration / dur_word) +
  facet_grid(cols = vars(speaker_group_label),
             rows = vars(rate)) +
  geom_boxplot(aes(fill = legal)) +
  ylab(bquote("VOT"["norm"])) +
  xlab("Quantity category") +
  scale_x_discrete(labels = c("V:C:" = "V:C:\ne.g. Bieter",
                              "V:C"  = "V:C\ne.g. wieder",
                              "VC"   = "VC\ne.g. Widder",
                              "VC:"  = "VC:\ne.g. bitter"
  )) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  scale_fill_manual(name = "Phonotactically",
                    values = c("red", "green"))