Load libraries and data:
# for time_in_sequence() function
library(tidyverse)
library(gridExtra)
library(lmerTest)
library(emmeans)
library(emuR)
library(glmmTMB)
library(bbmle)
library(emmeans)
#pfad = "/Volumes/vdata/ERC2/Nasals23/analysis"
# pfad = "V:/Nasals23/analysis"
#pfad = "/Users/jmh/b/current/nt_nd/analysis"
#pfademu = "/Users/jmh/b/current/nt_nd"
pfad = "/Users/cunha/Documents/projects/MRI_E/language/submitted_v2/data_LMU/files"
pfademu = "/Users/cunha/Documents/projects/MRI_E/DB/version_oct2024"
# directory for saving figures
#pfadfigs = "/Volumes/vdata/ERC2/Nasals23/ntfigs"
#pfadfigs = "/Users/jmh/b/current/nt_nd/analysis/figs"
#pfadfigs2 = pfadfigs
pfadfigs2 = "Users/cunha/Documents/projects/MRI_E/language/submitted_v2/figs2"
load(file = file.path(pfad,
"erc_mrinasals_concatsigs_prenasalraising_v3_voicing_prevowelVokalCodapostcoda_A_tdn__IEAVe__i_pbfszS_01.Rda"))This is just speaker and trial number pasted together
dt.df = df_erc_prenasalraising %>%
mutate(Nasality =
case_when(substring(coda, 1, 1) %in% c("m", "n", "N") ~ "n",
TRUE ~ "o")) %>%
# change 'vowel labels
mutate(vowel = case_when(vowel == "{" ~ "æ",
vowel == "E" ~ "É›",
vowel == "V" ~ "ʌ",
vowel == "I" ~ "ɪ",
vowel == "ei" ~ "eɪ")) %>%
# change the factor order for vowels
mutate(vowel = factor(vowel,
levels =
c("æ", "eɪ", "ʌ", "ɛ", "ɪ")))
table(dt.df$Nasality)##
## n o
## 724228 333263
##
## æ eɪ ʌ ɛ ɪ
## 213916 235366 189961 210871 207377
# read in word table
words = read.table(file.path(pfad, "codeprosortho_list_corx.txt"))
# strip off final two characters in dt.df$labs
n = nchar(as.character(dt.df$labs))
w = substring(dt.df$labs, 1, n-2)
# match this to the first column of words
m = match(w, words[,1])
# check there are no NAs
any(is.na(m))## [1] FALSE
# these are the word labels
wfin = words[m,2]
# bind them in to dt.df
dt.df$orth = factor(wfin)
# divide USE speakers in 3 and 4 regions
table(dt.df$speaker)##
## S1 S10 S11 S12 S13 S14 S15 S16 S17 S18 S2 S20 S21
## 23860 21605 25493 25885 25575 25897 21329 24415 22544 18264 19352 22734 23653
## S22 S23 S24 S25 S26 S27 S28 S29 S3 S30 S31 S32 S33
## 19413 24744 27563 28316 23121 22452 25289 28080 25359 25054 22787 26655 29123
## S34 S35 S36 S37 S38 S39 S4 S40 S42 S43 S44 S45 S5
## 26483 23477 22702 24938 32066 28602 23464 27342 19538 26998 23416 22898 29201
## S6 S7 S8 S9
## 27408 21213 23657 25526
# 3 US regions - US_northeast, US_south, US_west
dt.df= dt.df %>%
mutate(region3 = recode (speaker, "S1" = "BR", "S10" = "BR","S11" = "BR","S12" = "BR","S13" = "BR",
"S14" = "US_northeast","S15" = "BR","S16" = "BR","S17" = "BR","S18" = "BR",
"S2" = "US_south","S20"="BR", "S21" = "BR","S22" = "BR","S23" = "BR","S24" = "US_northeast",
"S25" = "US_west", "S26" = "BR", "S27" = "US_west", "S28" = "US_south","S29" = "BR",
"S3" = "US_south","S30" = "US_northeast","S31" = "US_northeast","S32" = "BR","S33" = "BR",
"S34" = "US_northeast","S35" = "US_northeast",
"S36" = "BR","S37" = "BR","S38" = "BR","S39" = "BR","S4" = "US_west","S5" = "US_west",
"S6" = "US_northeast","S7" = "US_west", "S8" = "BR","S9" = "BR",
"S40" = "BR", "S42" = "US_west","S43" = "BR", "S44" = "BR","S45" = "BR"))
# split US_northeast (7/16 speakers)
# in US_Atlantic and Midland to obtain roughtly the same amount of speakers per group
dt.df= dt.df %>%
mutate(region4 = recode (speaker, "S1" = "BR", "S10" = "BR","S11" = "BR","S12" = "BR","S13" = "BR",
"S14" = "US_Atlantic","S15" = "BR","S16" = "BR","S17" = "BR","S18" = "BR",
"S2" = "US_south","S20"="BR", "S21" = "BR","S22" = "BR","S23" = "BR","S24" = "US_midland",
"S25" = "US_west", "S26" = "BR", "S27" = "US_west", "S28" = "US_south","S29" = "BR",
"S3" = "US_south","S30" = "US_Atlantic","S31" = "US_midland","S32" = "BR","S33" = "BR",
"S34" = "US_Atlantic","S35" = "US_midland",
"S36" = "BR","S37" = "BR","S38" = "BR","S39" = "BR","S4" = "US_west","S5" = "US_west",
"S6" = "US_midland","S7" = "US_west", "S8" = "BR","S9" = "BR",
"S40" = "BR", "S42" = "US_west","S43" = "BR", "S44" = "BR","S45" = "BR"))
table(dt.df$region3)##
## BR US_northeast US_south US_west
## 664638 178669 70000 144184
##
## BR US_Atlantic US_south US_midland US_west
## 664638 77434 70000 101235 144184
load(file.path(pfad,
"erc_mrinasals_allsubjects_artaku_allsegandsig_table_v2_rsnonan_addlang_addtt_sig.Rda"))
dim(df_erc_prenasalraising)## [1] 12415 174
# code for segment identification, as before
# code for segment identification, as before
df_erc_prenasalraising = df_erc_prenasalraising %>%
mutate(rowind = paste(speaker, trialnumber, sep="."))
df_erc = df_erc_prenasalraising %>%
select(velumopening_gesture_on,
velumopening_gesture_off,
velumopening_maxvel_on,
velumopening_maxvel_off,
velumopening_maxcon_on,
velumopening_nucleus_on,
velumopening_nucleus_off,
velumopening_nucleus_dur,
velum2USV_velumopening_maxvel_onset,
velum2USV_velumopening_maxvel_offset,
velum2US_velumopening_maxcon_onset,
velum2US_velumopening_nucleus_midpoint,
velum2US_velumopening_gesture_onset,
Vokal_on,
rowind
)
# join to dt.df
dt.df = left_join(dt.df, df_erc,
by = "rowind")
rm(df_erc, df_erc_prenasalraising)
# obtain time series offset
timeseriesoffset = dt.df %>%
group_by (rowind) %>%
filter(segment == "Vokal") %>%
slice_head(n = 1) %>%
select(time_in_sequence, rowind) %>%
rename(timeseriesoffset = time_in_sequence) %>%
ungroup()
# join to dt.df
dt.df = left_join(dt.df, timeseriesoffset, by = "rowind")
# obtain lineuptime and subtract
dt.df = dt.df %>%
mutate(
lineuptime = Vokal_on - timeseriesoffset,
velumopening_gesture_on =
velumopening_gesture_on - lineuptime,
velumopening_gesture_off =
velumopening_gesture_off - lineuptime,
velumopening_maxvel_on =
velumopening_maxvel_on - lineuptime,
velumopening_maxvel_off =
velumopening_maxvel_off - lineuptime,
velumopening_maxcon_on =
velumopening_maxcon_on - lineuptime,
velumopening_nucleus_on =
velumopening_nucleus_on - lineuptime,
velumopening_nucleus_off =
velumopening_nucleus_off - lineuptime)load(file.path(pfad,
"erc_mrinasals_allsubjects_artaku_allsegandsig_table_v2_rsnonan_addlang_addtt_sig.Rda"))
dim(df_erc_prenasalraising)## [1] 12415 174
# code for segment identification, as before
df_erc_prenasalraising = df_erc_prenasalraising %>%
mutate(rowind = paste(speaker, trialnumber, sep="."))
# code for segment identification, as before
df_erc_prenasalraising = df_erc_prenasalraising %>%
mutate(rowind = paste(speaker, trialnumber, sep="."))
df_erc = df_erc_prenasalraising %>%
select(alveolarconstriction_maxcon_on,
alveolarconstriction_nucleus_on,
alveolarconstriction_nucleus_off,
alveolarconstriction_maxvel_on,
alveolarconstriction_maxvel_off,
alveolarconstriction_nucleus_dur,
alveolarconstriction_maxvel_dur,
# displacements
alvUS_alveolarconstriction_maxvel_onset,
alvUS_alveolarconstriction_maxvel_offset,
alvUS_alveolarconstriction_maxcon_onset,
# position at gesture onset
alvUS_alveolarconstriction_gesture_onset,
# velocities
alvUSV_alveolarconstriction_maxvel_onset,
alvUSV_alveolarconstriction_maxvel_offset,
Vokal_on,
rowind)
# left join to dt.df
dt.df = left_join(dt.df, df_erc,
by = "rowind")
rm(df_erc, df_erc_prenasalraising)
# subtract lineuptime
dt.df = dt.df %>%
mutate(alveolarconstriction_maxcon_on =
alveolarconstriction_maxcon_on - lineuptime,
alveolarconstriction_nucleus_off =
alveolarconstriction_nucleus_off - lineuptime,
alveolarconstriction_nucleus_on =
alveolarconstriction_nucleus_on - lineuptime,
alveolarconstriction_maxvel_on =
alveolarconstriction_maxvel_on - lineuptime,
alveolarconstriction_maxvel_off =
alveolarconstriction_maxvel_off - lineuptime) times.df = dt.df %>%
group_by(rowind) %>%
filter(segment == "Vokal") %>%
slice_tail(n=1) %>%
mutate(endv_time = time_in_sequence,
midv_time = (endv_time+timeseriesoffset)/2) %>%
select(rowind, midv_time, endv_time) %>%
ungroup()
# now join this to dt.df
dt.df = left_join(dt.df, times.df,
by = "rowind")df has 4 columns that were obtained with the Emu script from the
database ercnasals_audio_emuDB containing acoustic segmentations right
at the end of this document. The four columns in periodicV.df.txt are:
Vstart: start of the vowel as defined in the database tier VokalVend: end fo the vowel as defined in the database tier VokalVstart2: acoustic onset of periodicity of the vowel extracted from
the database tier vowelforVend2: end fo the vowel as defined in the database tier vowelfor
= Vstartdf = read.table(file.path(pfad, "periodicV.df.txt"))
# make rowind match with that of dt.df
df$rowind = as.character(substring(df$rowind, 3,
nchar(df$rowind)))
# join to dt.df
dt.df = left_join(dt.df, df, by="rowind")Verify that Vend - Vstart is the same as
endv_time - timeseriesoffset. They are except for a 2 ms difference on
average (i.e. a rounding error).
# dt.df %>%
# mutate(v1 = Vend - Vstart,
# v2 = endv_time - timeseriesoffset,
#d = v1 - v2) %>%
# summarise(mean(d), sd(d))Now define the onset of periodicity in relation to timeseriesoffset.
To do this, subtract Vstart from V2start then add that to
timeseriesoffset
dt.df = dt.df %>%
# this is the duration from the start of the vowel to the the beginning of periodicity
mutate(d = V2start - Vstart,
# add this duration to `timeseriesoffset` which is also the start of the vowel but on a different time scale
Vperiodiconset = timeseriesoffset + d)Verify for any segment that Vperiodiconset corresponds to the start of
the vowel in the vowelfor tier. This is done for segment 1.51 which
is a production of ‘pant’. This says that the duration from the start of
the vowel to the onset of periodicity is 0.1065625 s.
#speaker.i = 1
#trial.i = 51
#i = paste(speaker.i, trial.i, sep=".")
#dt.df %>%
# filter(rowind %in% i) %>%
# mutate(d = Vperiodiconset - timeseriesoffset) %>%
# pull(d) %>% unique()Get the corresponding bundle from the Emu database. This matches the above time exactly.
Add an additional column, Vowelonset2 which has the same start times
as timeseriesoffset for /p, b/ initial words, otherwise
Vperiodiconset
dt.df = dt.df %>%
mutate(Vowelonset2 =
ifelse(onset %in% c("p", "b"),
timeseriesoffset, Vperiodiconset))Are there any non-values in the times of velum gesture? Yes, 10 of them:
gaps = dt.df %>%
filter(coda %in% c("n", "nd", "nz")) %>%
filter(segment == "Vokal") %>%
group_by(rowind) %>%
slice_tail(n=1) %>%
filter(is.na(velumopening_maxcon_on) |
is.na(velumopening_maxvel_off) |
is.na(velumopening_maxvel_on)) %>%
pull(rowind)
gaps## [1] "14.259" "24.48" "29.22" "3.299" "3.325" "30.261" "34.131" "38.147"
## [9] "38.216" "38.257" "39.136" "40.49"
Add CV variable
tt.rowind = dt.df %>%
filter(Nasality == "n") %>%
filter(segment == "Vokal") %>%
group_by(rowind) %>%
slice_tail(n=1) %>%
ungroup() %>%
# choose ones that have values for the kinematic events
filter(!is.na(alveolarconstriction_nucleus_on)) %>%
filter(!is.na(alveolarconstriction_nucleus_off)) %>%
pull(rowind)
tt.df = dt.df %>%
filter(rowind %in% tt.rowind)
# number of segments: 1100
tt.df %>% pull(rowind) %>% n_distinct()## [1] 1438
## INFO: Checking if cache needs update for 44 sessions and 13577 bundles ...
## INFO: Performing precheck and calculating checksums (== MD5 sums) for _annot.json files ...
## INFO: Nothing to update!
## All segments of Vokal tier
vok.s = query(phys_db, "Vokal =~.*")
## as above without empty "" labels
vok.s = vok.s[vok.s$labels!="",]
## All segments from vowelfor tier
v.s = query(phys_db, "vowelfor =~.*")
## as above without empty "" labels
v.s = v.s[v.s$labels!="",]
## Test matching bundles
all(vok.s$bundle == v.s$bundle)## [1] TRUE
## get speaker-id
speaker = substring(v.s$bundle, 20, 22)
speaker = substring(speaker, 2, nchar(speaker))
speaker = as.numeric(speaker)
## get Trial-id
trial = as.numeric(substring(v.s$bundle,
nchar(v.s$bundle)-3,
nchar(v.s$bundle)))
##paste speaker.id and trial.id together
## to turn it into a character(!). This is needed
## so that a character vector is stored and then
## a character vector is read back in
id = paste("T",
paste(as.character(speaker),
as.character(trial), sep="."), sep=".")theme_michigan1 <-
function(n = 24){
theme(
axis.text = element_text(size=n),
axis.title.x = element_text(size=n),
axis.title.y = element_text(size=n),
text = element_text(size=n))
}
theme_phon =
function(atextx = 18, atexty = 18, atitlex=
18, atitley = 18, ptitle=24, strip.text.x = 24,
lpos = "bottom", othertext=24, ltitle =24){
# e.g. atexty = element_blank()
theme(
axis.text.x = element_text(size=atextx),
axis.title.x = element_text(size=atitley),
axis.text.y = element_text(size=atexty),
axis.title.y = element_text(size=atitley),
plot.title = element_text(size = ptitle),
legend.position=lpos,
legend.title = ltitle,
strip.text.x =element_text(size=strip.text.x),
text = element_text(size=othertext))
}
theme_michigan2 <-
function(n = 24){
theme(
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=24),
plot.title = element_text(size = 32),
legend.position="top",
text = element_text(size=n))
}
theme_michigan2apaper <-
function(n = 24, legend.pos="none", k = 18){
theme(
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=k),
axis.title.y = element_text(size=k),
plot.title = element_text(size = n),
legend.position=legend.pos,
text = element_text(size=n))
}
theme_michigan3 <-
function(n = 24){
theme(
legend.position = "none",
axis.text.y = element_text(size=24),
plot.title = element_text(size = 32),
text = element_text(size=n))
}
theme_michigan5 =
function(n = 18, m = 32, i = 20){
theme(
axis.text = element_text(size=n),
axis.title = element_text(size=i),
plot.title = element_text(size = m),
legend.position="top",
text = element_text(size=n))
}
theme_michigan6nt =
function(n = 32){
theme(
legend.position = "top",
strip.text.x = element_text(size = 32),
plot.title = element_text(size = 36),
axis.title.x = element_text(size=n),
axis.text.x = element_text(size=24),
axis.text.y = element_text(size=24),
legend.title= element_blank(),
text = element_text(size=n))
}
theme_phon =
function(atextx = 18, atexty = 18, atitlex=
18, atitley = 18, ptitle=24, strip.text.x = 24,
lpos = "bottom", othertext=24, ltitle =24){
# e.g. atexty = element_blank()
theme(
axis.text.x = element_text(size=atextx),
axis.title.x = element_text(size=atitley),
axis.text.y = element_text(size=atexty),
axis.title.y = element_text(size=atitley),
plot.title = element_text(size = ptitle),
legend.position=lpos,
legend.title = ltitle,
strip.text.x =element_text(size=strip.text.x),
text = element_text(size=othertext))
}# line-up relative to acoustic vowel offset
x.df = dt.df %>%
filter(coda %in% c("nt", "nd")) %>%
mutate(reltime =
plyr::round_any(1000 * (time_in_sequence - endv_time), 5))
# obtain mean times of peak velum lowering velocity
# relative to acoustic vowel offset
pkvel.clos.t = x.df %>%
rename(Dialect = language) %>%
group_by(vowel, coda, Dialect) %>%
summarise(m1 = mean(velumopening_maxvel_off -
endv_time, na.rm=T) * 1000) %>%
ungroup()## `summarise()` has grouped output by 'vowel', 'coda'. You can override using the
## `.groups` argument.
# aggregates of the three parameters between -50 and 200 ms
voice.df = x.df %>%
filter(reltime > -50 & reltime < 200) %>%
group_by(language, coda, vowel, reltime) %>%
summarise(
pefac = mean(probvoicepefac),
peryin = mean(1-aperyin),
SPL = 20 * log(mean(SPL_LP), base=10)) %>%
ungroup() %>%
rename(Dialect = language) ## `summarise()` has grouped output by 'language', 'coda', 'vowel'. You can
## override using the `.groups` argument.
Aggregates per observation between acoustic vowel offset and time of peak velocity of velum lowering
acmean.df = dt.df %>%
rename(Dialect = language) %>%
filter(coda %in% c("nt", "nd")) %>%
filter(time_in_sequence >= endv_time) %>%
filter(time_in_sequence <= velumopening_maxvel_off) %>%
group_by(rowind, vowel, coda, Dialect, onset, speaker, CV) %>%
summarise(
pefac = mean(probvoicepefac),
SPL = 20 * log(mean(SPL_LP), base=10),
peryin = mean(1-aperyin)) %>%
ungroup()## `summarise()` has grouped output by 'rowind', 'vowel', 'coda', 'Dialect',
## 'onset', 'speaker'. You can override using the `.groups` argument.
peryin (plot) peryin1 = voice.df %>%
ggplot +
aes(y = peryin, x = reltime, col = coda,
group = interaction(Dialect, coda, vowel)) +
geom_line(linewidth=lwd) +
scale_x_continuous("Time (ms)", breaks = c(0, 200)) +
ylab("Probability of voicing") +
facet_wrap(Dialect ~ vowel, ncol=5) +
geom_vline(xintercept=0, lty=2, linewidth=1.5) +
geom_vline(data = pkvel.clos.t,
aes(xintercept=m1, col = coda),
lty=3,linewidth=lwd) +
theme_michigan6nt() +
scale_colour_manual(values = cols.coda)
peryin1ggsave(filename = file.path(pfadfigs2, "peryin1.pdf"),
plot = peryin1,
device=cairo_pdf,
width = 45,
height = 30,
units = "cm")## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 1235.1 -2448.2
## coda in (coda | speaker) 0 9 1119.2 -2220.3 231.861 2 < 2.2e-16
## Dialect in (Dialect | CV) 0 9 1227.4 -2436.7 15.489 2 0.0004331
##
## <none>
## coda in (coda | speaker) ***
## Dialect in (Dialect | CV) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## coda:Dialect 1 0.01560 0.01560 1 42.204 3.1035 0.08537 .
## Dialect 2 0.00014 0.00014 1 42.852 0.0278 0.86829
## coda 0 1.36988 1.36988 1 45.140 272.7404 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## peryin ~ coda + (coda | speaker) + (Dialect | CV)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## coda 1.3699 1.3699 1 45.14 272.74 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans:
acmean.df %>%
lmer(peryin ~ coda + (coda | speaker) + (Dialect | CV), .) %>%
emmeans(., ~coda)## coda emmean SE df lower.CL upper.CL
## nd 0.820 0.0220 48.4 0.776 0.864
## nt 0.578 0.0197 51.1 0.538 0.617
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
Plot
SPL1 =
voice.df %>%
ggplot +
aes(y = SPL, x = reltime, col = coda,
group = interaction(Dialect, coda, vowel)) +
geom_line(linewidth=lwd) +
scale_x_continuous("Time (ms)", breaks = c(0, 200)) +
ylab("dB SPL") +
facet_wrap(Dialect ~ vowel, ncol=5) +
geom_vline(xintercept=0, lty=2, linewidth=1.5) +
geom_vline(data = pkvel.clos.t,
aes(xintercept=m1, col = coda),
lty=3,linewidth=lwd) +
theme_michigan6nt() +
scale_colour_manual(values = cols.coda)
SPL1ggsave(filename = file.path(pfadfigs2, "SPL1.pdf"),
plot = SPL1,
device=cairo_pdf,
width = 45,
height = 30,
units = "cm")## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -2652.1 5326.2
## coda in (coda | speaker) 0 9 -2723.9 5465.9 143.698 2 < 2.2e-16
## Dialect in (Dialect | CV) 0 9 -2657.5 5333.0 10.847 2 0.004413
##
## <none>
## coda in (coda | speaker) ***
## Dialect in (Dialect | CV) **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## coda:Dialect 1 2.448 2.448 1 41.470 0.5799 0.4506
## Dialect 2 0.033 0.033 1 41.467 0.0078 0.9298
## coda 0 292.433 292.433 1 46.552 69.3012 9.011e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## SPL ~ coda + (coda | speaker) + (Dialect | CV)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## coda 292.43 292.43 1 46.552 69.301 9.011e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Emmeans
acmean.df %>%
lmer(SPL ~ coda + (coda | speaker) + (Dialect | CV), .) %>%
emmeans(., ~coda)## coda emmean SE df lower.CL upper.CL
## nd 51.2 1.18 44.5 48.8 53.6
## nt 48.3 1.10 45.2 46.1 50.5
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
The function determines the time point, tp, between the peak velum velocity times (lowering and raising) at which the probability of voicing first falls below 0.5. If tp > 0.5 at the time point of peak velum raising velocity, the tp becomes equal to the time point of peak velum raising velocity.
aperyin.df = dt.df %>%
filter(coda %in% c("nt", "nd"),
time_in_sequence >= endv_time,
time_in_sequence <= velumopening_maxvel_off+100) %>%
group_by(rowind, endv_time, Vperiodiconset) %>%
# linear time-normalisation to 100 points
reframe(T1n = approx(time_in_sequence,
1-aperyin, n = 100)$y,
time_in_sequence = approx(time_in_sequence,
1-aperyin, n = 100)$x,
# apply lowess smoothing
T1smooth = lowess(time_in_sequence, T1n, .3)$y,
# find the time which the signal is less
# than p in the lowess-smoothed data...
Tminsmooth.peryin = fmin(time_in_sequence, T1smooth),
# and in the raw data
Tmin.peryin = fmin(time_in_sequence, T1n)) %>%
ungroup()Are there any times for which endv_time > Tminsmooth.peryin? No.
## # A tibble: 0 × 8
## # ℹ 8 variables: rowind <chr>, endv_time <dbl>, Vperiodiconset <dbl>,
## # T1n <dbl>, time_in_sequence <dbl>, T1smooth <dbl>, Tminsmooth.peryin <dbl>,
## # Tmin.peryin <dbl>
# choose a segment number
j = 46
# find the time when the signal is less
# than p in the lowess smoothed data
x1 = aperyin.df %>%
filter(rowind == unique(rowind)[j]) %>%
pull(Tminsmooth.peryin) %>% unique()
# and in the raw data
x2 = aperyin.df %>%
filter(rowind == unique(rowind)[j]) %>%
pull(Tmin.peryin) %>% unique()
a1 = aperyin.df %>%
filter(rowind == unique(rowind)[j]) %>%
ggplot +
aes(y = T1smooth, x = time_in_sequence) +
geom_line() +
geom_hline(yintercept=.5) +
geom_vline(xintercept=x1, col=2)
a2 = aperyin.df %>%
filter(rowind == unique(rowind)[j]) %>%
ggplot +
aes(y = T1n, x = time_in_sequence) +
geom_line() +
geom_hline(yintercept=.5) +
geom_vline(xintercept=x2, col=3)
grid.arrange(a1, a2, ncol=2)Read in the physiology data containing velum and tongue signals. We need
this because dt.df was derived from a Praat database that did not
contain any physiological signals.
dtphys.df = read.table(file.path(pfad, "dt.df.txt"))
dtphys.df$rowind = as.character(dtphys.df$rowind)Extract /nt, nd/
These are the unique rows of aperyin.df. We only need unique rows
because we only want to extract one time value per segment.
inner join aperyin.short.df to nt.df.
nt.df = inner_join(nt.df, aperyin.short.df %>%
select(rowind, Tminsmooth.peryin, Tmin.peryin),
by = "rowind")Replace Tminsmooth.peryin such that the right boundary is equal to
velumopening_maxvel_off if Tminsmooth.peryin >
velumopening_maxvel_off. Also remove any values for which endv_time <
velumopening_maxvel_off
nt.df = nt.df %>%
mutate(Tminsmooth2 =
ifelse(Tminsmooth.peryin > velumopening_maxvel_off,
velumopening_maxvel_off, Tminsmooth.peryin)) %>%
filter(endv_time > velumopening_maxvel_on) This is the area sum under the nasal curve between the acoustic vowel offset and time at which pvoice < 0.5 for PERYIN
propc.peryin.df = nt.df %>%
mutate(velum2US =
velum2US - velum2US_velumopening_gesture_onset) %>%
filter(time_in_sequence >= endv_time) %>%
filter(time_in_sequence <= Tminsmooth2) %>%
group_by(rowind, vowel, coda, language, onset, speaker, CV) %>%
summarise(cprop = sum(velum2US)) %>%
ungroup()## `summarise()` has grouped output by 'rowind', 'vowel', 'coda', 'language',
## 'onset', 'speaker'. You can override using the `.groups` argument.
This is the sum of nasalisation under the curve between the times of peak velum lowering and raising velocities.
nasdur.df = nt.df %>%
mutate(velum2US =
velum2US - velum2US_velumopening_gesture_onset) %>%
filter(time_in_sequence >= velumopening_maxvel_on) %>%
filter(time_in_sequence <= velumopening_maxvel_off) %>%
group_by(rowind) %>%
summarise(nasalprop = sum(velum2US)) %>%
ungroup()This is the ratio between the above two for PERYIN
propcnas.peryin.df = left_join(propc.peryin.df, nasdur.df,
by = "rowind") %>%
mutate(ratio = cprop/nasalprop)Plot of Tmin (Fig. 18)
n =48; k = 32; i = 28
#tmin = dtphys.df %>%
tmin = nt.df %>%
filter(coda %in% c("nt", "nd")) %>%
mutate(
den = velumopening_maxvel_off -
endv_time,
num = Tminsmooth.peryin - endv_time,
ratio = num/den) %>%
filter(!is.na(ratio)) %>%
rename(Dialect = language) %>%
ggplot +
aes(y = ratio, col = coda, x = Dialect) +
geom_boxplot(lwd=lwd) +
facet_wrap(~ vowel, ncol = 5) +
geom_hline(yintercept = 1, lty=2) +
geom_hline(yintercept = 0, lty=2) +
ylab("Proportion") +
xlab("") +
theme_phon(lpos="top",
atextx=i,
atexty=k,
atitlex=k,
atitley=k,
strip.text.x = k,
ptitle=n,
ltitle=element_blank(),
othertext=n) +
#coord_cartesian(ylim = c(-3.5, .5)) +
# ggtitle("Log. proportion of nasalization in coda-/n/") +
#theme_michigan3(n = 32) +
theme(axis.text.y =
element_text(angle = 90, vjust = 1, hjust=1)) +
coord_cartesian(ylim = c(0, 2)) +
scale_colour_manual(values = cols.coda)
tminggsave(filename = file.path(pfadfigs2, "tmin.pdf"),
plot = tmin,
cairo_pdf,
width = 45,
height = 30,
units = "cm")Plot of the ratio for peryin (Fig. 19)
n =48; k = 32; i = 28
peryinfig =
propcnas.peryin.df %>%
rename(Dialect = language) %>%
mutate(vowel = factor(vowel,
levels = c("æ", "eɪ", "ʌ", "ɛ", "ɪ"))) %>%
ggplot +
aes(y = ratio, x = Dialect, col=coda) +
geom_boxplot(lwd=lwd) +
facet_wrap(~ vowel, ncol = 5) +
ylab("Area (proportion)") +
xlab("") +
theme_phon(lpos="top",
atextx=i,
atexty=k,
atitlex=k,
atitley=k,
strip.text.x = k,
ptitle=n,
ltitle=element_blank(),
othertext=n) +
theme(axis.text.y =
element_text(angle = 90, vjust = 1, hjust=1)) +
geom_hline(yintercept=.5, lty=2) +
scale_colour_manual(values = cols.coda)
peryinfigggsave(filename = file.path(pfadfigs2, "peryinfig.pdf"),
plot = peryinfig,
device=cairo_pdf,
width = 45,
height = 30,
units = "cm")Statistics for PERYIN. Remove two values at exactly 1 since these are not admissible for a beta distribution (there are none at zero). This model predicts only the mean
p.df <- propcnas.peryin.df %>%
filter(ratio != 1 & ratio != 0) %>%
rename(Dialect = language)
m1 <- glmmTMB(ratio ~ Dialect * coda +
(coda|speaker) +
(Dialect|CV),
family=beta_family(),
data=p.df)
summary(m1)## Family: beta ( logit )
## Formula: ratio ~ Dialect * coda + (coda | speaker) + (Dialect | CV)
## Data: p.df
##
## AIC BIC logLik -2*log(L) df.resid
## -1024.3 -970.1 523.1 -1046.3 1000
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## speaker (Intercept) 0.53901 0.7342
## codant 0.19323 0.4396 -0.54
## CV (Intercept) 0.54281 0.7368
## DialectUSE 0.05217 0.2284 0.06
## Number of obs: 1011, groups: speaker, 43; CV, 19
##
## Dispersion parameter for beta family (): 9.32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7092 0.2234 3.174 0.001503 **
## DialectUSE -0.8436 0.2445 -3.450 0.000561 ***
## codant -0.8832 0.1087 -8.126 4.45e-16 ***
## DialectUSE:codant 0.2361 0.1725 1.369 0.171067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## coda Dialect contrast odds.ratio SE df null z.ratio p.value
## nd . BRE / USE 2.32 0.568 Inf 1 3.450 0.0022
## nt . BRE / USE 1.84 0.403 Inf 1 2.770 0.0224
## . BRE nd / nt 2.42 0.263 Inf 1 8.126 <.0001
## . USE nd / nt 1.91 0.265 Inf 1 4.662 <.0001
##
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
Add another regression equation for the precision parameter phi. AIC
says that the model m2 with precision predictors is a better fit.
m2 <- update(m1,
dispformula = ~ Dialect * coda
+ (coda | speaker)
# + (1 | CV)
)
bbmle::AICtab(m1,m2)## dAIC df
## m2 0.0 17
## m1 146.9 11
## Family: beta ( logit )
## Formula: ratio ~ Dialect * coda + (coda | speaker) + (Dialect | CV)
## Dispersion: ~Dialect * coda + (coda | speaker)
## Data: p.df
##
## AIC BIC logLik -2*log(L) df.resid
## -1171.1 -1087.5 602.6 -1205.1 994
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev. Corr
## speaker (Intercept) 0.30389 0.5513
## codant 0.17298 0.4159 -0.23
## CV (Intercept) 0.51740 0.7193
## DialectUSE 0.02732 0.1653 0.14
## Number of obs: 1011, groups: speaker, 43; CV, 19
##
## Dispersion model:
## Groups Name Variance Std.Dev. Corr
## speaker (Intercept) 0.6329 0.7955
## codant 0.3742 0.6117 -0.92
## Number of obs: 1011, groups: speaker, 43
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7804 0.2002 3.899 9.66e-05 ***
## DialectUSE -0.8411 0.1870 -4.498 6.87e-06 ***
## codant -0.9714 0.1022 -9.504 < 2e-16 ***
## DialectUSE:codant 0.2571 0.1620 1.587 0.112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.2921 0.1726 13.279 <2e-16 ***
## DialectUSE 0.2887 0.2801 1.031 0.303
## codant 0.2690 0.1789 1.504 0.133
## DialectUSE:codant -0.4559 0.2834 -1.609 0.108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Then use emmeans as usual, making sure type = "response", i.e. the
predictions are in the 0 to 1 scale, not in the logit scale.
## coda Dialect contrast odds.ratio SE df null z.ratio p.value
## nd . BRE / USE 2.32 0.434 Inf 1 4.498 <.0001
## nt . BRE / USE 1.79 0.377 Inf 1 2.778 0.0219
## . BRE nd / nt 2.64 0.270 Inf 1 9.504 <.0001
## . USE nd / nt 2.04 0.267 Inf 1 5.461 <.0001
##
## P value adjustment: bonferroni method for 4 tests
## Tests are performed on the log odds ratio scale
Unique speaker and trial codes for ‘saint’
wordtype = "saint"
talkerinfo = dt.df %>%
filter(orth==wordtype) %>%
mutate(speaker = as.character(speaker),
speaker = substring(speaker, 2, nchar(speaker)),
speaker = as.numeric(speaker)) %>%
select(speaker, trialnumber, language) %>%
distinct()Choose a row number between 1 and nrow(talkinfo) to display aligned
tracks for any saint word (see above)
i = 10
# i = 23
speakerid = talkerinfo$speaker[i]
trialid = talkerinfo$trialnumber[i]
temp = speaker == speakerid & trial == trialid
physid = paste(speakerid, trialid, sep=".")
# Nasalization relative to the acoustic VN boundary
## Method
segment.s = vok.s[temp,]
segment.s = segment.s %>%
mutate(start = start -200,
end = end + 300)
segment.wav = get_trackdata(phys_db, segment.s, "MEDIAFILE_SAMPLES")##
## INFO: parsing 1 wav segments/events
## | | | 0% | |======================================================================| 100%
# time of periodic onset of the vowel in Emudatabase: 0.7880938 * 1000
acvowelon.t =
dtphys.df %>% filter(rowind == physid) %>%
pull(V2start) * 1000
acvowelon.t = unique(acvowelon.t)
# Change time scale in segment.wav to make the periodic onset in the
# vowel have a time of zero ms
segment.wav = segment.wav %>%
mutate(times_orig =times_orig -acvowelon.t)Alignment of phys. data relative to the acoustic vowel onset
phys.id.df = dtphys.df %>%
mutate(
zerot = Vperiodiconset,
reltime = 1000 * (time_in_sequence - zerot)) %>%
filter(rowind == physid)Alignment of waveform relative to the acoustic vowel onset
acoustic.id.df = dt.df %>%
mutate(
zerot = Vperiodiconset,
reltime = 1000 * (time_in_sequence - zerot)) %>%
filter(rowind == physid)Alignment of probability of voicing times rel. to acoustic vowel onset
aperyin.id.df = aperyin.df %>%
mutate(
zerot = Vperiodiconset,
Tmin.peryin = 1000 * (Tmin.peryin - zerot),
Tminsmooth.peryin = 1000 * (Tminsmooth.peryin - zerot)) %>%
filter(rowind == physid) %>%
summarise(Tminsmooth.peryin = unique(Tminsmooth.peryin),
Tmin.peryin = unique(Tmin.peryin)) %>%
ungroup()Some time markers
# acoustic vowel offset time
VNbound.id = phys.id.df %>%
mutate(endv_time = 1000 * (endv_time - zerot)) %>%
pull(endv_time) %>% unique()
# time of peak velocity of velum lowering
pkvellower.id =
phys.id.df %>%
mutate(velumopening_maxvel_on =
1000 * (velumopening_maxvel_on - zerot)) %>%
pull(velumopening_maxvel_on) %>% unique()
# time of velum lowering peak
pkvel.id =
phys.id.df %>%
mutate(velumopening_maxcon_on =
1000 * (velumopening_maxcon_on - zerot)) %>%
pull(velumopening_maxcon_on) %>% unique()
# time of peak velocity of velum raising
pkvelraise.id =
phys.id.df %>%
mutate(velumopening_maxvel_off =
1000 * (velumopening_maxvel_off - zerot)) %>%
pull(velumopening_maxvel_off) %>% unique()Times at which prob_voice is <= 0.5 (2 elements, first is
Tminsmooth.peryin, the second is Tmin.peryin )
aperyin.times = aperyin.df %>%
mutate(
zerot = Vperiodiconset,
Tmin.peryin = 1000 * (Tmin.peryin - zerot),
Tminsmooth.peryin = 1000 * (Tminsmooth.peryin - zerot)) %>%
filter(rowind == physid) %>%
summarise(Tminsmooth.peryin = unique(Tminsmooth.peryin),
Tmin.peryin = unique(Tmin.peryin)) %>%
ungroup() %>%
as.numeric()Set the scale on the x-axis
Data frames for drawing the areas
data1 = phys.id.df %>%
filter(reltime >= VNbound.id) %>%
filter(reltime <= aperyin.times[1])
data2 = phys.id.df %>%
filter(reltime <= VNbound.id) %>%
filter(reltime >= pkvellower.id)ystart = -.1; yend.id = .75; yend.idtext = yend.id + .02
textsize = 20
textsize_yaxis = textsize - 4
nasal_trajectory = phys.id.df %>%
ggplot +
aes(y = velum2US - velum2US_velumopening_gesture_onset,
x = reltime) +
coord_cartesian(xlim = timeaxis, ylim = c(-.05, .77)) +
geom_line() +
ylab("Size of velum lowering") +
geom_area(
aes(y = velum2US - velum2US_velumopening_gesture_onset,
x = reltime),
data = data1,
fill = 4,
alpha = 0.5,
color = 1,
lwd = 0.5,
linetype = 1) +
geom_area(
aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
data = data2,
fill = "orange",
alpha = 0.5,
color = 1,
lwd = 0.5,
linetype = 1) +
theme(
axis.text.x = element_blank(),
axis.text.y = element_text(size=textsize_yaxis),
axis.title.y = element_text(size=textsize_yaxis),
axis.title = element_blank(),
legend.title = element_blank(),
legend.position = "none") +
scale_colour_manual(values = cols) +
# acoustic vowel offset
geom_segment(aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
x = VNbound.id, y = ystart, xend = VNbound.id,
yend = yend.id, lty=3, linewidth=1) +
annotate("text", x=VNbound.id, y=yend.idtext,
label="italic(t[2])",
parse=TRUE,
size=textsize/.pt) +
# pk. velocity of velum lowering
geom_segment(aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
x = pkvellower.id, y = ystart, xend = pkvellower.id,
yend = yend.id, lty=2, linewidth=lwd) +
annotate("text", x=pkvellower.id, y=yend.idtext,
label="italic(t[1])",
parse=TRUE,
size=textsize/.pt) +
# probability of voicing
geom_segment(aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
x = aperyin.times[1], y = ystart, xend = aperyin.times[1],
yend = yend.id, lty=1, linewidth=1, col="red") +
annotate("text", x=aperyin.times[1], y=yend.idtext,
label="italic(t[3])",
parse=TRUE,
size=textsize/.pt) +
# pk velocity of velum raising
geom_segment(aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
x = pkvelraise.id, y = ystart, xend = pkvelraise.id,
yend = yend.id, lty=2, linewidth=lwd) +
annotate("text", x=pkvelraise.id, y=yend.idtext,
label="italic(t[4])",
parse=TRUE,
size=textsize/.pt) +
# acoustic vowel onset
geom_segment(aes(y = velum2US -
velum2US_velumopening_gesture_onset, x = reltime),
x = 0, y = ystart, xend = 0,
yend = yend.id, lty=3, linewidth=1) +
annotate("text", x=0, y=yend.idtext,
label="italic(t[0])",
parse=TRUE,
size=textsize/.pt)
waveform = segment.wav %>%
ggplot +
aes(y = T1, x = times_orig) +
geom_line() +
coord_cartesian(xlim = timeaxis, ylim=c(-12000, 12000)) +
theme(
axis.text = element_blank(),
axis.title = element_blank(),
legend.title = element_blank(),
legend.position = "none") +
# acoustic vowel onset and offset
geom_vline(xintercept =
c(0, VNbound.id),
linewidth=1, lty=3) +
geom_vline(xintercept =
aperyin.times[1],
lty=1, linewidth=1, col="red")
probability_voice = acoustic.id.df %>%
ggplot +
aes(y = 1-aperyin,
x = reltime) +
coord_cartesian(xlim = timeaxis) +
theme(
axis.text = element_text(size=textsize),
axis.title.x = element_text(size=textsize),
axis.text.y = element_text(size=textsize_yaxis),
axis.title.y = element_text(size=textsize_yaxis),
text = element_text(size=textsize)) +
geom_line() +
ylab("Periodicity") +
xlab("Time (ms)") +
# acoustic vowel onset and offset
geom_vline(xintercept =
c(0, VNbound.id),
linewidth=1, lty=3) +
geom_vline(xintercept =
aperyin.times[1],
lty=1, linewidth=1, col="red") +
geom_hline(yintercept=.5, lty=1, lindewidth = .5)## Warning in geom_hline(yintercept = 0.5, lty = 1, lindewidth = 0.5): Ignoring
## unknown parameters: `lindewidth`
gA <- ggplotGrob(nasal_trajectory)
gB <- ggplotGrob(waveform)
gC <- ggplotGrob(probability_voice)
grid::grid.newpage()
figalignprob = grid.arrange(rbind(gA, gB, gC))## TableGrob (1 x 1) "arrange": 1 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]