df %>% count(variety, age, speaker) %>% count (variety, age)
df %>% count(rate)
df %>% count(word_group, target_word) %>% arrange(word_group, desc(n)) %>% select(-word_group)
df %>%
count(word_group, category, target_word) %>%
select (-n) %>%
spread(category, target_word)
df %>% count(category, target_word) %>% count(category)
df %>% count(category)
df %>%
ggplot() +
aes(x = category, y = dur_c / dur_word) +
facet_grid(cols = vars(speaker_group_label),
rows = vars(rate)) +
geom_boxplot(aes(fill = legal)) +
ylab(bquote("closure"["norm"])) +
xlab("Quantity category") +
scale_x_discrete(labels = c("V:C:" = "V:C:\ne.g. Bieter",
"V:C" = "V:C\ne.g. wieder",
"VC" = "VC\ne.g. Widder",
"VC:" = "VC:\ne.g. bitter"
)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(name = "Phonotactically",
values = c("red", "green"))
emm_options(pbkrtest.limit = nrow(df))
emm_options(lmerTest.limit = nrow(df))
general_closure.lmer = df %>%
mutate (speaker_group = factor(speaker_group,
ordered = T,
levels = c("Y_SD", "Y_WB", "O_WB"))) %>%
lmer(data = .,
dur_c / dur_word
~ speaker_group * category * rate +
(category + rate | speaker) +
(speaker_group + rate | target_word)
)
## boundary (singular) fit: see help('isSingular')
general_closure.anova = anova(general_closure.lmer)
print(general_closure.anova)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## speaker_group 0.043382 0.021691 2 42.1 12.2464 6.433e-05
## category 0.116567 0.038856 3 22.9 21.9373 6.272e-07
## rate 0.008194 0.008194 1 36.8 4.6263 0.038116
## speaker_group:category 0.048138 0.008023 6 32.3 4.5297 0.001943
## speaker_group:rate 0.023508 0.011754 2 110.5 6.6361 0.001898
## category:rate 0.008341 0.002780 3 36.4 1.5697 0.213366
## speaker_group:category:rate 0.011324 0.001887 6 7117.4 1.0656 0.380713
##
## speaker_group ***
## category ***
## rate *
## speaker_group:category **
## speaker_group:rate **
## category:rate
## speaker_group:category:rate
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(general_closure.lmer,
pairwise ~ speaker_group | category,
lmer.df = "satterthwaite")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## category = V:C:
## speaker_group emmean SE df lower.CL upper.CL
## Y_SD 0.140 0.0180 24.4 0.1023 0.177
## Y_WB 0.114 0.0188 24.1 0.0752 0.153
## O_WB 0.146 0.0198 23.8 0.1052 0.187
##
## category = V:C::
## speaker_group emmean SE df lower.CL upper.CL
## Y_SD 0.210 0.0209 31.1 0.1671 0.252
## Y_WB 0.255 0.0217 30.3 0.2112 0.300
## O_WB 0.265 0.0227 29.3 0.2185 0.311
##
## category = VC:
## speaker_group emmean SE df lower.CL upper.CL
## Y_SD 0.148 0.0246 27.2 0.0972 0.198
## Y_WB 0.156 0.0255 26.7 0.1034 0.208
## O_WB 0.248 0.0268 26.1 0.1927 0.303
##
## category = VC::
## speaker_group emmean SE df lower.CL upper.CL
## Y_SD 0.271 0.0189 35.2 0.2325 0.309
## Y_WB 0.335 0.0196 34.2 0.2952 0.375
## O_WB 0.342 0.0204 32.9 0.3004 0.383
##
## Results are averaged over the levels of: rate
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
##
## $contrasts
## category = V:C:
## contrast estimate SE df t.ratio p.value
## Y_SD - Y_WB 0.02562 0.0164 29.8 1.561 0.2778
## Y_SD - O_WB -0.00649 0.0181 28.0 -0.357 0.9322
## Y_WB - O_WB -0.03211 0.0113 39.9 -2.836 0.0191
##
## category = V:C::
## contrast estimate SE df t.ratio p.value
## Y_SD - Y_WB -0.04563 0.0207 42.6 -2.200 0.0827
## Y_SD - O_WB -0.05506 0.0224 39.7 -2.461 0.0471
## Y_WB - O_WB -0.00942 0.0163 44.3 -0.578 0.8323
##
## category = VC:
## contrast estimate SE df t.ratio p.value
## Y_SD - Y_WB -0.00820 0.0233 36.3 -0.352 0.9342
## Y_SD - O_WB -0.10019 0.0255 33.5 -3.933 0.0011
## Y_WB - O_WB -0.09199 0.0172 45.1 -5.345 <.0001
##
## category = VC::
## contrast estimate SE df t.ratio p.value
## Y_SD - Y_WB -0.06393 0.0196 46.2 -3.265 0.0057
## Y_SD - O_WB -0.07098 0.0209 44.4 -3.399 0.0040
## Y_WB - O_WB -0.00705 0.0161 41.4 -0.437 0.9003
##
## Results are averaged over the levels of: rate
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(general_closure.lmer,
pairwise ~ rate | speaker_group,
lmer.df = "satterthwaite")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## speaker_group = Y_SD:
## rate emmean SE df lower.CL upper.CL
## normal 0.188 0.0108 36.3 0.166 0.210
## fast 0.196 0.0117 36.3 0.172 0.220
##
## speaker_group = Y_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.216 0.0113 34.8 0.193 0.239
## fast 0.214 0.0119 35.7 0.190 0.238
##
## speaker_group = O_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.249 0.0119 33.4 0.225 0.273
## fast 0.251 0.0123 34.5 0.226 0.276
##
## Results are averaged over the levels of: category
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
##
## $contrasts
## speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.00777 0.00202 91.2 -3.850 0.0002
##
## speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.00191 0.00203 92.5 0.945 0.3469
##
## speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.00263 0.00203 92.5 -1.299 0.1971
##
## Results are averaged over the levels of: category
## Degrees-of-freedom method: satterthwaite
emmeans(general_closure.lmer,
pairwise ~ rate | category | speaker_group,
lmer.df = "satterthwaite")
## $emmeans
## category = V:C, speaker_group = Y_SD:
## rate emmean SE df lower.CL upper.CL
## normal 0.135 0.0175 24.9 0.0988 0.171
## fast 0.144 0.0187 24.3 0.1056 0.183
##
## category = V:C:, speaker_group = Y_SD:
## rate emmean SE df lower.CL upper.CL
## normal 0.207 0.0203 31.7 0.1657 0.248
## fast 0.213 0.0217 30.9 0.1683 0.257
##
## category = VC, speaker_group = Y_SD:
## rate emmean SE df lower.CL upper.CL
## normal 0.145 0.0238 27.6 0.0957 0.193
## fast 0.151 0.0256 27.3 0.0983 0.203
##
## category = VC:, speaker_group = Y_SD:
## rate emmean SE df lower.CL upper.CL
## normal 0.266 0.0184 35.8 0.2285 0.303
## fast 0.276 0.0196 35.0 0.2362 0.316
##
## category = V:C, speaker_group = Y_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.112 0.0185 24.5 0.0742 0.151
## fast 0.115 0.0192 24.2 0.0758 0.155
##
## category = V:C:, speaker_group = Y_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.256 0.0213 30.5 0.2120 0.299
## fast 0.255 0.0222 30.4 0.2101 0.301
##
## category = VC, speaker_group = Y_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.159 0.0251 26.9 0.1076 0.211
## fast 0.153 0.0262 27.0 0.0988 0.206
##
## category = VC:, speaker_group = Y_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.337 0.0192 34.4 0.2978 0.376
## fast 0.333 0.0200 34.4 0.2923 0.374
##
## category = V:C, speaker_group = O_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.143 0.0196 24.1 0.1020 0.183
## fast 0.149 0.0201 23.9 0.1081 0.191
##
## category = V:C:, speaker_group = O_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.261 0.0225 29.4 0.2150 0.307
## fast 0.269 0.0230 29.6 0.2217 0.316
##
## category = VC, speaker_group = O_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.249 0.0266 26.1 0.1942 0.303
## fast 0.247 0.0273 26.4 0.1908 0.303
##
## category = VC:, speaker_group = O_WB:
## rate emmean SE df lower.CL upper.CL
## normal 0.343 0.0202 32.9 0.3020 0.384
## fast 0.341 0.0207 33.3 0.2986 0.383
##
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
##
## $contrasts
## category = V:C, speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.009278 0.00356 180 -2.604 0.0100
##
## category = V:C:, speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.005457 0.00383 183 -1.424 0.1560
##
## category = VC, speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.006156 0.00463 184 -1.329 0.1855
##
## category = VC:, speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.010189 0.00332 171 -3.071 0.0025
##
## category = V:C, speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.003083 0.00364 196 -0.848 0.3977
##
## category = V:C:, speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.000174 0.00383 182 0.045 0.9638
##
## category = VC, speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.006659 0.00463 184 1.438 0.1522
##
## category = VC:, speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.003909 0.00331 170 1.179 0.2398
##
## category = V:C, speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.006968 0.00359 187 -1.939 0.0541
##
## category = V:C:, speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast -0.007850 0.00383 184 -2.047 0.0420
##
## category = VC, speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.001968 0.00465 187 0.423 0.6727
##
## category = VC:, speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## normal - fast 0.002322 0.00333 173 0.698 0.4862
##
## Degrees-of-freedom method: satterthwaite
df %>%
ggplot() +
aes(x = category, y = dur_aspiration / dur_word) +
facet_grid(cols = vars(speaker_group_label),
rows = vars(rate)) +
geom_boxplot(aes(fill = legal)) +
ylab(bquote("VOT"["norm"])) +
xlab("Quantity category") +
scale_x_discrete(labels = c("V:C:" = "V:C:\ne.g. Bieter",
"V:C" = "V:C\ne.g. wieder",
"VC" = "VC\ne.g. Widder",
"VC:" = "VC:\ne.g. bitter"
)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(name = "Phonotactically",
values = c("red", "green"))
emm_options(pbkrtest.limit = nrow(df))
emm_options(lmerTest.limit = nrow(df))
normalized_vot.lmer = df %>%
mutate (speaker_group = factor(speaker_group,
ordered = T,
levels = c("Y_SD", "Y_WB", "O_WB"))) %>%
lmer(data = .,
dur_aspiration / dur_word
~ speaker_group * category * rate +
(category + rate | speaker) +
(speaker_group + rate | target_word)
)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.014128 (tol = 0.002, component 1)
normalized_vot.anova = anova(normalized_vot.lmer)
print(normalized_vot.anova)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## speaker_group 0.001735 0.0008674 2 32.6 0.7842 0.46488
## category 0.035573 0.0118577 3 26.5 10.7204 8.644e-05
## rate 0.000610 0.0006097 1 106.8 0.5513 0.45943
## speaker_group:category 0.046282 0.0077137 6 34.6 6.9739 6.417e-05
## speaker_group:rate 0.004183 0.0020914 2 192.2 1.8908 0.15374
## category:rate 0.008637 0.0028789 3 197.2 2.6028 0.05318
## speaker_group:category:rate 0.004970 0.0008283 6 7117.0 0.7489 0.61026
##
## speaker_group
## category ***
## rate
## speaker_group:category ***
## speaker_group:rate
## category:rate .
## speaker_group:category:rate
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(normalized_vot.lmer,
pairwise ~ category | speaker_group,
lmer.df = "satterthwaite")
## NOTE: Results may be misleading due to involvement in interactions
## $emmeans
## speaker_group = Y_SD:
## category emmean SE df lower.CL upper.CL
## V:C 0.0319 0.00780 45.4 0.0162 0.0476
## V:C: 0.0966 0.00946 44.9 0.0775 0.1156
## VC 0.0424 0.00998 45.0 0.0224 0.0625
## VC: 0.0925 0.00805 45.2 0.0763 0.1088
##
## speaker_group = Y_WB:
## category emmean SE df lower.CL upper.CL
## V:C 0.0358 0.00755 45.5 0.0206 0.0510
## V:C: 0.0637 0.00921 43.7 0.0451 0.0822
## VC 0.0542 0.00961 45.2 0.0348 0.0735
## VC: 0.0666 0.00783 44.1 0.0508 0.0824
##
## speaker_group = O_WB:
## category emmean SE df lower.CL upper.CL
## V:C 0.0352 0.00956 40.1 0.0159 0.0546
## V:C: 0.0585 0.01119 44.6 0.0359 0.0810
## VC 0.0761 0.01237 38.5 0.0510 0.1011
## VC: 0.0702 0.00957 44.1 0.0509 0.0895
##
## Results are averaged over the levels of: rate
## Degrees-of-freedom method: satterthwaite
## Confidence level used: 0.95
##
## $contrasts
## speaker_group = Y_SD:
## contrast estimate SE df t.ratio p.value
## V:C - V:C: -0.06469 0.00987 40.0 -6.554 <.0001
## V:C - VC -0.01058 0.01054 36.2 -1.003 0.7484
## V:C - VC: -0.06067 0.00932 40.9 -6.513 <.0001
## V:C: - VC 0.05411 0.01107 37.6 4.889 0.0001
## V:C: - VC: 0.00402 0.00795 24.7 0.506 0.9570
## VC - VC: -0.05009 0.01007 34.4 -4.972 0.0001
##
## speaker_group = Y_WB:
## contrast estimate SE df t.ratio p.value
## V:C - V:C: -0.02787 0.00942 41.6 -2.960 0.0251
## V:C - VC -0.01840 0.01000 37.9 -1.841 0.2707
## V:C - VC: -0.03081 0.00890 42.4 -3.461 0.0066
## V:C: - VC 0.00947 0.01051 39.3 0.901 0.8045
## V:C: - VC: -0.00294 0.00740 25.2 -0.397 0.9784
## VC - VC: -0.01240 0.00952 36.0 -1.303 0.5673
##
## speaker_group = O_WB:
## contrast estimate SE df t.ratio p.value
## V:C - V:C: -0.02325 0.01279 32.2 -1.818 0.2837
## V:C - VC -0.04085 0.01397 29.3 -2.924 0.0317
## V:C - VC: -0.03500 0.01200 33.0 -2.916 0.0306
## V:C: - VC -0.01760 0.01454 30.3 -1.210 0.6256
## V:C: - VC: -0.01175 0.01120 22.8 -1.049 0.7231
## VC - VC: 0.00585 0.01348 28.1 0.434 0.9721
##
## Results are averaged over the levels of: rate
## Degrees-of-freedom method: satterthwaite
## P value adjustment: tukey method for comparing a family of 4 estimates
possible_boundaries = seq(0, 280, by = 1)
# (Pre-allocation) Generate a data frame of the correct size with an empty
# column "classified_correctly". That column will be filled in the for loop
# below.
all_boundary_data_normal = tidyr::crossing(speaker = df$speaker, boundary = possible_boundaries) %>%
mutate(classified_correctly = NA)
for (current_row in rownames(all_boundary_data_normal)) {
current_speaker = all_boundary_data_normal[current_row,]$speaker
current_boundary = all_boundary_data_normal[current_row,]$boundary
df %>%
filter(rate == "normal") %>%
filter(speaker == current_speaker) %>%
mutate (classified_correctly = case_when((consonant_strength == "Fortis" & dur_c >= current_boundary) ~ "correct",
(consonant_strength == "Fortis" & dur_c < current_boundary) ~ "incorrect",
(consonant_strength == "Lenis" & dur_c >= current_boundary) ~ "incorrect",
(consonant_strength == "Lenis" & dur_c < current_boundary) ~ "correct")) %>%
count(classified_correctly) %>%
spread(classified_correctly, n) %>%
mutate(classified_correctly = correct / (correct + incorrect)) -> df_augmented
all_boundary_data_normal[current_row, "classified_correctly"] = df_augmented$classified_correctly
}
best_boundary_data_normal = all_boundary_data_normal %>%
group_by(speaker) %>%
top_n(1, classified_correctly) %>%
group_by(speaker) %>%
summarise(min_boundary = min(boundary),
max_boundary = max(boundary),
boundary = mean(c(min(boundary),
max(boundary))),
classified_correctly = first(classified_correctly)) %>%
mutate(rate = "normal")
#### The same again for fast speech
# (Pre-allocation) Generate a data frame of the correct size with an empty
# column "classified_correctly". That column will be filled in the for loop
# below.
all_boundary_data_fast = tidyr::crossing(speaker = df$speaker, boundary = possible_boundaries) %>%
mutate(classified_correctly = NA)
for (current_row in rownames(all_boundary_data_fast)) {
current_speaker = all_boundary_data_fast[current_row,]$speaker
current_boundary = all_boundary_data_fast[current_row,]$boundary
df %>%
filter(rate == "fast") %>%
filter(speaker == current_speaker) %>%
mutate (classified_correctly = case_when((consonant_strength == "Fortis" & dur_c >= current_boundary) ~ "correct",
(consonant_strength == "Fortis" & dur_c < current_boundary) ~ "incorrect",
(consonant_strength == "Lenis" & dur_c >= current_boundary) ~ "incorrect",
(consonant_strength == "Lenis" & dur_c < current_boundary) ~ "correct")) %>%
count(classified_correctly) %>%
spread(classified_correctly, n) %>%
mutate(classified_correctly = correct / (correct + incorrect)) -> df_augmented
all_boundary_data_fast[current_row, "classified_correctly"] = df_augmented$classified_correctly
}
best_boundary_data_fast = all_boundary_data_fast %>%
group_by(speaker) %>%
top_n(1, classified_correctly) %>%
group_by(speaker) %>%
summarise(min_boundary = min(boundary),
max_boundary = max(boundary),
boundary = mean(c(min(boundary),
max(boundary))),
classified_correctly = first(classified_correctly)) %>%
mutate(rate = "fast")
best_boundary_data_all = rbind (best_boundary_data_normal,
best_boundary_data_fast)
left_join(x = best_boundary_data_all,
y = select(df, speaker, age, variety, speaker_group_label),
by = "speaker") %>%
distinct %>%
ggplot() +
facet_wrap(vars(speaker_group_label)) +
aes (x = reorder(rate, desc(rate)), y = boundary, group = speaker) +
geom_point() +
geom_line() +
ylab("Optimal category boundary [ms]") +
xlab("Speech rate")
df_dispersion_difference_with_target_word = df %>%
gather(key = "measure", value = "value", dur_c_norm_word) %>%
group_by(measure, variety, age, speaker_group, speaker_group_label, category, legal, rate, speaker, target_word) %>%
summarise(coefficient_of_variation = sd(value, na.rm = TRUE) / mean(value, na.rm = TRUE)) %>%
ungroup() %>%
spread(rate, coefficient_of_variation) %>%
mutate(change_in_dispersion = fast - normal)
## `summarise()` has grouped output by 'measure', 'variety', 'age',
## 'speaker_group', 'speaker_group_label', 'category', 'legal', 'rate', 'speaker'.
## You can override using the `.groups` argument.
df_dispersion_difference_without_target_word = df %>%
gather(key = "measure", value = "value", dur_c_norm_word) %>%
group_by(measure, variety, age, speaker_group, speaker_group_label, category, legal, rate, speaker) %>%
summarise(coefficient_of_variation = sd(value, na.rm = TRUE) / mean(value, na.rm = TRUE)) %>%
ungroup() %>%
spread(rate, coefficient_of_variation) %>%
mutate(change_in_dispersion = fast - normal)
## `summarise()` has grouped output by 'measure', 'variety', 'age',
## 'speaker_group', 'speaker_group_label', 'category', 'legal', 'rate'. You can
## override using the `.groups` argument.
df_dispersion_difference_without_target_word %>%
filter(measure == "dur_c_norm_word") %>%
mutate(speaker_group = factor(speaker_group,
levels = c("Y_SD", "Y_WB", "O_WB"),
labels = c("Standard German", "Dialect, younger", "Dialect, older"))) %>%
ggplot() +
aes(x = category, y = change_in_dispersion) +
facet_grid(cols = vars(speaker_group),
scales = "free_y") +
geom_point() +
geom_boxplot(alpha = 0.8,
outlier.shape = NA,
coef = 0) +
ylab("Category expansion") +
xlab("") +
scale_x_discrete(labels = c("V:C:" = "V:C:\ne.g. Bieter",
"V:C" = "V:C\ne.g. wieder",
"VC" = "VC\ne.g. Widder",
"VC:" = "VC:\ne.g. bitter"
)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
scale_fill_manual(name = "Phonotactically",
values = c("red", "green")) +
coord_cartesian(ylim = c(-0.15, 0.3)) +
geom_hline (aes(yintercept = 0))
emm_options(pbkrtest.limit = nrow(df_dispersion_difference_with_target_word))
emm_options(lmerTest.limit = nrow(df_dispersion_difference_with_target_word))
dispersion_difference.lmer = df_dispersion_difference_with_target_word %>%
filter(measure == "dur_c_norm_word") %>%
mutate (speaker_group = factor(speaker_group,
ordered = T,
levels = c("Y_SD", "Y_WB", "O_WB"))) %>%
lmer(data = .,
change_in_dispersion
~ speaker_group * category +
(category | speaker) +
(speaker_group | target_word)
)
## boundary (singular) fit: see help('isSingular')
dispersion_difference.anova = anova(dispersion_difference.lmer)
print(dispersion_difference.anova)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## speaker_group 0.032333 0.0161667 2 21.801 1.8137 0.1868
## category 0.003040 0.0010133 3 23.487 0.1137 0.9512
## speaker_group:category 0.078655 0.0131092 6 25.183 1.4707 0.2283
# Calculate the baseline of lenis tokens for fortis target words, normal rate
typical_lenis_upper_bound_normal = df_normal %>%
filter(consonant_strength == "Lenis") %>%
group_by(speaker, place_of_articulation) %>%
summarise(typical_lenis_upper_bound = quantile(dur_c / dur_word, probs = 0.75))
## `summarise()` has grouped output by 'speaker'. You can override using the
## `.groups` argument.
# Calculate the baseline of lenis tokens for fortis target words, fast rate
typical_lenis_upper_bound_fast = df_fast %>%
filter(consonant_strength == "Lenis") %>%
group_by(speaker, place_of_articulation) %>%
summarise(typical_lenis_upper_bound = quantile(dur_c / dur_word, probs = 0.75))
## `summarise()` has grouped output by 'speaker'. You can override using the
## `.groups` argument.
# Calculate the baseline of fortis tokens for lenis target words, normal rate
typical_fortis_lower_bound_normal = df_normal %>%
filter(consonant_strength == "Fortis") %>%
group_by(speaker, place_of_articulation) %>%
summarise(typical_fortis_lower_bound = quantile(dur_c / dur_word, probs = 0.25))
## `summarise()` has grouped output by 'speaker'. You can override using the
## `.groups` argument.
# Calculate the baseline of fortis tokens for lenis target words, fast rate
typical_fortis_lower_bound_fast = df_fast %>%
filter(consonant_strength == "Fortis") %>%
group_by(speaker, place_of_articulation) %>%
summarise(typical_fortis_lower_bound = quantile(dur_c / dur_word, probs = 0.25))
## `summarise()` has grouped output by 'speaker'. You can override using the
## `.groups` argument.
flo_per_place_of_articulation_normal_rate_fortis_as_target =
# FLO is short for fortis–lenis overlap
left_join(df_normal,
typical_lenis_upper_bound_normal,
by = c("speaker", "place_of_articulation")) %>%
mutate(inside_typical_lenis_region = case_when(dur_c/dur_word <= typical_lenis_upper_bound ~ "inside",
dur_c/dur_word > typical_lenis_upper_bound ~ "outside")) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength) %>%
mutate (number_of_tokens_in_group = n()) %>%
group_by(variety,
age,
speaker,
speaker_group_label,
place_of_articulation,
consonant_strength,
number_of_tokens_in_group,
inside_typical_lenis_region) %>%
summarise(absolute_frequency = n()) %>%
mutate (relative_frequency = absolute_frequency / number_of_tokens_in_group) %>%
select(-absolute_frequency) %>%
spread(inside_typical_lenis_region, relative_frequency, fill = 0) %>%
filter(consonant_strength == "Fortis") %>%
ungroup %>%
mutate (speech_rate = "normal")
## `summarise()` has grouped output by 'variety', 'age', 'speaker',
## 'speaker_group_label', 'place_of_articulation', 'consonant_strength',
## 'number_of_tokens_in_group'. You can override using the `.groups` argument.
flo_per_place_of_articulation_fast_rate_fortis_as_target =
# FLO is short for fortis–lenis overlap
left_join(df_fast,
typical_lenis_upper_bound_fast,
by = c("speaker", "place_of_articulation")) %>%
mutate(inside_typical_lenis_region = case_when(dur_c/dur_word <= typical_lenis_upper_bound ~ "inside",
dur_c/dur_word > typical_lenis_upper_bound ~ "outside")) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength) %>%
mutate (number_of_tokens_in_group = n()) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength,
number_of_tokens_in_group,
inside_typical_lenis_region) %>%
summarise(absolute_frequency = n()) %>%
mutate (relative_frequency = absolute_frequency / number_of_tokens_in_group) %>%
select(-absolute_frequency) %>%
spread(inside_typical_lenis_region, relative_frequency, fill = 0) %>%
filter(consonant_strength == "Fortis") %>%
ungroup %>%
mutate (speech_rate = "fast")
## `summarise()` has grouped output by 'variety', 'age', 'speaker_group_label',
## 'speaker', 'place_of_articulation', 'consonant_strength',
## 'number_of_tokens_in_group'. You can override using the `.groups` argument.
flo_per_place_of_articulation_all_rates_fortis_as_target = rbind(flo_per_place_of_articulation_normal_rate_fortis_as_target,
flo_per_place_of_articulation_fast_rate_fortis_as_target)
flo_per_word_normal_rate_fortis_as_target =
# FLO is short for fortis–lenis overlap
left_join(df_normal,
typical_lenis_upper_bound_normal,
by = c("speaker", "place_of_articulation")) %>%
mutate(inside_typical_lenis_region = case_when(dur_c/dur_word <= typical_lenis_upper_bound ~ "inside",
dur_c/dur_word > typical_lenis_upper_bound ~ "outside")) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength,
target_word) %>%
mutate (number_of_tokens_in_group = n()) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength,
target_word,
number_of_tokens_in_group,
inside_typical_lenis_region) %>%
summarise(absolute_frequency = n()) %>%
mutate (relative_frequency = absolute_frequency / number_of_tokens_in_group) %>%
select(-absolute_frequency) %>%
spread(inside_typical_lenis_region, relative_frequency, fill = 0) %>%
filter(consonant_strength == "Fortis") %>%
mutate (speech_rate = "normal") %>%
ungroup
## `summarise()` has grouped output by 'variety', 'age', 'speaker_group_label',
## 'speaker', 'place_of_articulation', 'consonant_strength', 'target_word',
## 'number_of_tokens_in_group'. You can override using the `.groups` argument.
flo_per_word_fast_rate_fortis_as_target =
# FLO is short for fortis–lenis overlap
left_join(df_fast,
typical_lenis_upper_bound_fast,
by = c("speaker", "place_of_articulation")) %>%
mutate(inside_typical_lenis_region = case_when(dur_c/dur_word <= typical_lenis_upper_bound ~ "inside",
dur_c/dur_word > typical_lenis_upper_bound ~ "outside")) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength,
target_word) %>%
mutate (number_of_tokens_in_group = n()) %>%
group_by(variety,
age,
speaker_group_label,
speaker,
place_of_articulation,
consonant_strength,
target_word,
number_of_tokens_in_group,
inside_typical_lenis_region) %>%
summarise(absolute_frequency = n()) %>%
mutate (relative_frequency = absolute_frequency / number_of_tokens_in_group) %>%
select(-absolute_frequency) %>%
spread(inside_typical_lenis_region, relative_frequency, fill = 0) %>%
filter(consonant_strength == "Fortis") %>%
mutate (speech_rate = "fast") %>%
ungroup
## `summarise()` has grouped output by 'variety', 'age', 'speaker_group_label',
## 'speaker', 'place_of_articulation', 'consonant_strength', 'target_word',
## 'number_of_tokens_in_group'. You can override using the `.groups` argument.
flo_per_word_all_rates_fortis_as_target = rbind(flo_per_word_normal_rate_fortis_as_target,
flo_per_word_fast_rate_fortis_as_target)
ggplot(flo_per_place_of_articulation_normal_rate_fortis_as_target) +
aes(y = inside, x = place_of_articulation) +
facet_grid(cols = vars(speaker_group_label)) +
coord_cartesian(ylim = c(-0.1,1)) +
ylab("Fortis–lenis overlap") +
xlab("Place of articulation") +
geom_jitter(aes(color = place_of_articulation)) +
scale_color_discrete(name = "Place of articulation") +
scale_y_continuous(breaks = seq(from = 0, to = 1, by = 0.2))
for (current_poa in c("alveolar")) {
flo_per_word_normal_rate_fortis_as_target %>%
filter(place_of_articulation == current_poa) %>%
ggplot() +
aes(y = inside, x = target_word) +
facet_grid(cols = vars(speaker_group_label)) +
coord_cartesian(ylim = c(-0.1,1)) +
ylab("Fortis–lenis overlap") +
xlab("Target word") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
geom_jitter(aes(color = target_word)) +
scale_color_discrete(name = "Target word") +
scale_y_continuous(breaks = seq(from = 0, to = 1, by = 0.2)) -> plot
print(plot)
}