Load libraries and data:

# for time_in_sequence() function
library(tidyverse)
library(gridExtra)
library(lmerTest)
library(emmeans)
library(emuR)
library(glmmTMB)
library(bbmle)
library(emmeans)

#pfad = "/Volumes/vdata/ERC2/Nasals23/analysis"
# pfad = "V:/Nasals23/analysis"

 #pfad = "/Users/jmh/b/current/nt_nd/analysis"
 #pfademu = "/Users/jmh/b/current/nt_nd"

pfad = "/Users/cunha/Documents/projects/MRI_E/language/submitted_v2/data_LMU/files"
pfademu = "/Users/cunha/Documents/projects/MRI_E/DB/version_oct2024"

# directory for saving figures
#pfadfigs = "/Volumes/vdata/ERC2/Nasals23/ntfigs"
#pfadfigs = "/Users/jmh/b/current/nt_nd/analysis/figs"
#pfadfigs2 = pfadfigs
pfadfigs2 = "Users/cunha/Documents/projects/MRI_E/language/submitted_v2/figs2"

load(file = file.path(pfad,       
"erc_mrinasals_concatsigs_prenasalraising_v3_voicing_prevowelVokalCodapostcoda_A_tdn__IEAVe__i_pbfszS_01.Rda"))

1 Preliminaries

1.1 keep only monosyllables

df_erc_prenasalraising = df_erc_prenasalraising %>%
  filter(secvow == "_")

1.2 Create a segment identifier

This is just speaker and trial number pasted together

df_erc_prenasalraising = df_erc_prenasalraising  %>%
  mutate(rowind = paste(speaker, trialnumber, sep="."))

1.3 Change speaker id to a factor

df_erc_prenasalraising = df_erc_prenasalraising  %>%
  mutate(speaker = factor(paste0("S", speaker)))

1.4 remove S41

df_erc_prenasalraising = df_erc_prenasalraising  %>%
  filter(speaker!="S41") %>%
  mutate(speaker = factor(speaker))

1.5 Identify nasality and change vowel labels

dt.df = df_erc_prenasalraising %>%
  mutate(Nasality = 
           case_when(substring(coda, 1, 1) %in% c("m", "n", "N") ~ "n", 
                              TRUE ~ "o")) %>%
  # change 'vowel labels
  mutate(vowel = case_when(vowel == "{" ~ "æ",
                           vowel == "E" ~ "É›",
                           vowel == "V" ~ "ʌ",
                            vowel == "I" ~ "ɪ",
                           vowel == "ei" ~ "eɪ")) %>%
  
# change the factor order for vowels
  mutate(vowel = factor(vowel, 
                        levels = 
                          c("æ", "eɪ", "ʌ", "ɛ", "ɪ")))

table(dt.df$Nasality)
## 
##      n      o 
## 724228 333263
table(dt.df$vowel)
## 
##      æ     eɪ      ʌ      ɛ      ɪ 
## 213916 235366 189961 210871 207377

1.6 give orthographic labels

# read in word table
words = read.table(file.path(pfad, "codeprosortho_list_corx.txt"))
# strip off final two characters in dt.df$labs
n = nchar(as.character(dt.df$labs))
w = substring(dt.df$labs, 1, n-2)
# match this to the first column of words
m = match(w, words[,1])
# check there are no NAs
any(is.na(m))
## [1] FALSE
# these are the word labels
wfin = words[m,2]
# bind them in to dt.df
dt.df$orth = factor(wfin)

# divide USE speakers in 3 and 4 regions 
table(dt.df$speaker)
## 
##    S1   S10   S11   S12   S13   S14   S15   S16   S17   S18    S2   S20   S21 
## 23860 21605 25493 25885 25575 25897 21329 24415 22544 18264 19352 22734 23653 
##   S22   S23   S24   S25   S26   S27   S28   S29    S3   S30   S31   S32   S33 
## 19413 24744 27563 28316 23121 22452 25289 28080 25359 25054 22787 26655 29123 
##   S34   S35   S36   S37   S38   S39    S4   S40   S42   S43   S44   S45    S5 
## 26483 23477 22702 24938 32066 28602 23464 27342 19538 26998 23416 22898 29201 
##    S6    S7    S8    S9 
## 27408 21213 23657 25526
# 3 US regions  - US_northeast, US_south, US_west
dt.df= dt.df %>%
  mutate(region3 = recode (speaker, "S1" = "BR", "S10" = "BR","S11" = "BR","S12" = "BR","S13" = "BR",
        "S14" = "US_northeast","S15" = "BR","S16" = "BR","S17" = "BR","S18" = "BR",
        "S2" = "US_south","S20"="BR", "S21" = "BR","S22" = "BR","S23" = "BR","S24" = "US_northeast",
        "S25" = "US_west", "S26" = "BR", "S27" = "US_west", "S28" = "US_south","S29" = "BR",
        "S3" = "US_south","S30" = "US_northeast","S31" = "US_northeast","S32" = "BR","S33" = "BR",
        "S34" = "US_northeast","S35" = "US_northeast",
        "S36" = "BR","S37" = "BR","S38" = "BR","S39" = "BR","S4" = "US_west","S5" = "US_west",
        "S6" = "US_northeast","S7" = "US_west", "S8" = "BR","S9" = "BR",
        "S40" = "BR", "S42" = "US_west","S43" = "BR", "S44" = "BR","S45" = "BR"))
# split US_northeast (7/16  speakers)
# in US_Atlantic and Midland to obtain roughtly the same amount of speakers per group
dt.df= dt.df %>%
    mutate(region4 = recode (speaker, "S1" = "BR", "S10" = "BR","S11" = "BR","S12" = "BR","S13" = "BR",
        "S14" = "US_Atlantic","S15" = "BR","S16" = "BR","S17" = "BR","S18" = "BR",
        "S2" = "US_south","S20"="BR", "S21" = "BR","S22" = "BR","S23" = "BR","S24" = "US_midland",
        "S25" = "US_west", "S26" = "BR", "S27" = "US_west", "S28" = "US_south","S29" = "BR",
        "S3" = "US_south","S30" = "US_Atlantic","S31" = "US_midland","S32" = "BR","S33" = "BR",
        "S34" = "US_Atlantic","S35" = "US_midland",
        "S36" = "BR","S37" = "BR","S38" = "BR","S39" = "BR","S4" = "US_west","S5" = "US_west",
        "S6" = "US_midland","S7" = "US_west", "S8" = "BR","S9" = "BR",
        "S40" = "BR", "S42" = "US_west","S43" = "BR", "S44" = "BR","S45" = "BR"))
   table(dt.df$region3)
## 
##           BR US_northeast     US_south      US_west 
##       664638       178669        70000       144184
   table(dt.df$region4)
## 
##          BR US_Atlantic    US_south  US_midland     US_west 
##      664638       77434       70000      101235      144184

1.7 Read in kinematic markers for nasalisation

load(file.path(pfad, 
"erc_mrinasals_allsubjects_artaku_allsegandsig_table_v2_rsnonan_addlang_addtt_sig.Rda"))
dim(df_erc_prenasalraising)
## [1] 12415   174
# code for segment identification, as before
# code for segment identification, as before

df_erc_prenasalraising = df_erc_prenasalraising %>%
  mutate(rowind = paste(speaker, trialnumber, sep="."))


df_erc = df_erc_prenasalraising %>%
  select(velumopening_gesture_on, 
         velumopening_gesture_off,
         velumopening_maxvel_on,
         velumopening_maxvel_off,
         velumopening_maxcon_on,
         velumopening_nucleus_on,
         velumopening_nucleus_off,
         velumopening_nucleus_dur,
         velum2USV_velumopening_maxvel_onset,
         velum2USV_velumopening_maxvel_offset,
         velum2US_velumopening_maxcon_onset,
         velum2US_velumopening_nucleus_midpoint,
         velum2US_velumopening_gesture_onset,
         Vokal_on,
         rowind
         ) 

# join to dt.df
dt.df = left_join(dt.df, df_erc, 
             by = "rowind")
rm(df_erc, df_erc_prenasalraising)

# obtain time series offset
timeseriesoffset = dt.df %>% 
group_by (rowind) %>%
filter(segment == "Vokal") %>%
slice_head(n = 1) %>%
select(time_in_sequence, rowind) %>%
  rename(timeseriesoffset = time_in_sequence) %>%
ungroup()

# join to dt.df
dt.df = left_join(dt.df, timeseriesoffset, by = "rowind")

# obtain lineuptime and subtract
dt.df = dt.df %>%
  mutate(
  lineuptime = Vokal_on - timeseriesoffset, 
  velumopening_gesture_on = 
           velumopening_gesture_on - lineuptime,
         velumopening_gesture_off = 
           velumopening_gesture_off - lineuptime,
         velumopening_maxvel_on = 
           velumopening_maxvel_on - lineuptime,
         velumopening_maxvel_off = 
           velumopening_maxvel_off - lineuptime,
         velumopening_maxcon_on = 
           velumopening_maxcon_on - lineuptime,
         velumopening_nucleus_on = 
           velumopening_nucleus_on - lineuptime,
         velumopening_nucleus_off = 
           velumopening_nucleus_off - lineuptime)

1.8 Read in kinematic markers for tongue tip

load(file.path(pfad,
"erc_mrinasals_allsubjects_artaku_allsegandsig_table_v2_rsnonan_addlang_addtt_sig.Rda"))
dim(df_erc_prenasalraising)
## [1] 12415   174
# code for segment identification, as before
df_erc_prenasalraising = df_erc_prenasalraising %>%
  mutate(rowind = paste(speaker, trialnumber, sep="."))

# code for segment identification, as before
df_erc_prenasalraising = df_erc_prenasalraising %>%
  mutate(rowind = paste(speaker, trialnumber, sep="."))
df_erc = df_erc_prenasalraising %>%
  select(alveolarconstriction_maxcon_on, 
alveolarconstriction_nucleus_on,
alveolarconstriction_nucleus_off,
alveolarconstriction_maxvel_on,
alveolarconstriction_maxvel_off,
alveolarconstriction_nucleus_dur,
alveolarconstriction_maxvel_dur,
# displacements
alvUS_alveolarconstriction_maxvel_onset,
alvUS_alveolarconstriction_maxvel_offset,
alvUS_alveolarconstriction_maxcon_onset,
# position at gesture onset
alvUS_alveolarconstriction_gesture_onset,
# velocities
alvUSV_alveolarconstriction_maxvel_onset,
alvUSV_alveolarconstriction_maxvel_offset,
Vokal_on,
rowind) 

# left join to dt.df
dt.df = left_join(dt.df, df_erc, 
             by = "rowind")
rm(df_erc, df_erc_prenasalraising)

# subtract lineuptime

dt.df = dt.df %>%
  mutate(alveolarconstriction_maxcon_on =
           alveolarconstriction_maxcon_on - lineuptime,
         alveolarconstriction_nucleus_off =
           alveolarconstriction_nucleus_off - lineuptime,
         alveolarconstriction_nucleus_on = 
           alveolarconstriction_nucleus_on - lineuptime,
         alveolarconstriction_maxvel_on =
           alveolarconstriction_maxvel_on - lineuptime,
         alveolarconstriction_maxvel_off =
           alveolarconstriction_maxvel_off - lineuptime) 

1.9 Acoustic times

times.df = dt.df %>%
  group_by(rowind) %>%
  filter(segment == "Vokal") %>%
  slice_tail(n=1) %>%
  mutate(endv_time = time_in_sequence,
         midv_time = (endv_time+timeseriesoffset)/2) %>%
  select(rowind, midv_time, endv_time) %>%
  ungroup()
# now join this to dt.df
dt.df = left_join(dt.df, times.df, 
             by = "rowind")

df has 4 columns that were obtained with the Emu script from the database ercnasals_audio_emuDB containing acoustic segmentations right at the end of this document. The four columns in periodicV.df.txt are:

  • Vstart: start of the vowel as defined in the database tier Vokal
  • Vend: end fo the vowel as defined in the database tier Vokal
  • Vstart2: acoustic onset of periodicity of the vowel extracted from the database tier vowelfor
  • Vend2: end fo the vowel as defined in the database tier vowelfor = Vstart
df = read.table(file.path(pfad, "periodicV.df.txt"))
# make rowind match with that of dt.df
df$rowind = as.character(substring(df$rowind, 3, 
                                   nchar(df$rowind)))
# join to dt.df
dt.df = left_join(dt.df, df, by="rowind")

Verify that Vend - Vstart is the same as endv_time - timeseriesoffset. They are except for a 2 ms difference on average (i.e. a rounding error).

# dt.df %>%
# mutate(v1 = Vend - Vstart, 
# v2 = endv_time - timeseriesoffset, 
#d = v1 - v2) %>%
# summarise(mean(d), sd(d))

Now define the onset of periodicity in relation to timeseriesoffset. To do this, subtract Vstart from V2start then add that to timeseriesoffset

dt.df = dt.df %>% 
  # this is the duration from the start of the vowel to the the beginning of periodicity
  mutate(d = V2start - Vstart, 
         # add this duration to `timeseriesoffset` which is also the start of the vowel but on a different time scale
         Vperiodiconset = timeseriesoffset + d)

Verify for any segment that Vperiodiconset corresponds to the start of the vowel in the vowelfor tier. This is done for segment 1.51 which is a production of ‘pant’. This says that the duration from the start of the vowel to the onset of periodicity is 0.1065625 s.

#speaker.i = 1
#trial.i = 51
#i = paste(speaker.i, trial.i, sep=".")
#dt.df %>%
#  filter(rowind %in% i) %>%
#  mutate(d = Vperiodiconset - timeseriesoffset) %>% 
#  pull(d) %>% unique()

Get the corresponding bundle from the Emu database. This matches the above time exactly.

#temp = (speaker == speaker.i) & (trial == trial.i)
#(v.s$start[temp] - vok.s$start[temp])/1000

Add an additional column, Vowelonset2 which has the same start times as timeseriesoffset for /p, b/ initial words, otherwise Vperiodiconset

dt.df = dt.df %>%
  mutate(Vowelonset2 = 
    ifelse(onset %in% c("p", "b"), 
           timeseriesoffset, Vperiodiconset))

Are there any non-values in the times of velum gesture? Yes, 10 of them:

  gaps = dt.df %>%
  filter(coda %in% c("n", "nd", "nz")) %>%
  filter(segment == "Vokal") %>%
  group_by(rowind) %>%
  slice_tail(n=1) %>%
  filter(is.na(velumopening_maxcon_on) |
           is.na(velumopening_maxvel_off) | 
           is.na(velumopening_maxvel_on)) %>%
  pull(rowind)
gaps
##  [1] "14.259" "24.48"  "29.22"  "3.299"  "3.325"  "30.261" "34.131" "38.147"
##  [9] "38.216" "38.257" "39.136" "40.49"
# remove these segments from further consideration
dt.df = dt.df %>%
  filter(!rowind %in% gaps) 

Add CV variable

dt.df = dt.df %>%
  mutate(CV = interaction(onset, vowel))

1.10 data-frame of tongue tip data

tt.rowind = dt.df %>%
  filter(Nasality == "n") %>%
  filter(segment == "Vokal") %>%
  group_by(rowind) %>%
  slice_tail(n=1) %>%
  ungroup() %>%
  # choose ones that have values for the kinematic events
  filter(!is.na(alveolarconstriction_nucleus_on)) %>%
  filter(!is.na(alveolarconstriction_nucleus_off)) %>%
  pull(rowind)

tt.df = dt.df %>%
  filter(rowind %in% tt.rowind)

# number of segments: 1100
tt.df %>% pull(rowind) %>% n_distinct()
## [1] 1438
tt.df$orth = factor(tt.df$orth)
tt.df$onset = factor(tt.df$onset)
tt.df$vowel= factor(tt.df$vowel)
tt.df$coda = factor(tt.df$coda)

2 Acoustic data from Emu database

##Emu database
phys_db = load_emuDB(file.path(pfademu, 
                               "ercnasals_audio_emuDB"))
## INFO: Checking if cache needs update for 44 sessions and 13577 bundles ...
## INFO: Performing precheck and calculating checksums (== MD5 sums) for _annot.json files ...
## INFO: Nothing to update!
## All segments of Vokal tier
vok.s = query(phys_db, "Vokal =~.*")
## as above without empty "" labels
vok.s = vok.s[vok.s$labels!="",]
## All segments from vowelfor tier
v.s = query(phys_db, "vowelfor =~.*")
## as above without empty "" labels
v.s = v.s[v.s$labels!="",]
## Test matching bundles
all(vok.s$bundle == v.s$bundle)
## [1] TRUE
## get speaker-id
speaker = substring(v.s$bundle, 20, 22)
speaker = substring(speaker, 2, nchar(speaker))
speaker = as.numeric(speaker)
## get Trial-id
trial = as.numeric(substring(v.s$bundle, 
                             nchar(v.s$bundle)-3, 
                           nchar(v.s$bundle)))
##paste speaker.id and trial.id together
## to turn it into a character(!). This is needed
## so that a character vector is stored and then
## a character vector is read back in
id = paste("T", 
          paste(as.character(speaker), 
               as.character(trial), sep="."), sep=".")

3 Themes

theme_michigan1 <-
function(n = 24){
  theme(
axis.text = element_text(size=n), 
axis.title.x = element_text(size=n), 
axis.title.y = element_text(size=n), 
text = element_text(size=n))
}
theme_phon = 
function(atextx = 18, atexty = 18, atitlex=
                 18, atitley = 18, ptitle=24, strip.text.x = 24,
             lpos = "bottom", othertext=24, ltitle =24){
        # e.g. atexty = element_blank()
        theme(
            axis.text.x = element_text(size=atextx),
            axis.title.x = element_text(size=atitley),
            axis.text.y = element_text(size=atexty),
            axis.title.y = element_text(size=atitley),
            plot.title = element_text(size = ptitle),
            legend.position=lpos,
            legend.title = ltitle,
            strip.text.x =element_text(size=strip.text.x),
            text = element_text(size=othertext))
    }
theme_michigan2 <-
function(n = 24){
    theme(
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(size=24),
        plot.title = element_text(size = 32),
        legend.position="top",
        text = element_text(size=n))
}
theme_michigan2apaper <-
function(n = 24, legend.pos="none", k = 18){
    theme(
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(size=k),
axis.title.y = element_text(size=k),
        plot.title = element_text(size = n),
        legend.position=legend.pos,
        text = element_text(size=n))
}
theme_michigan3 <-
function(n = 24){
    theme(
        legend.position = "none",
        axis.text.y = element_text(size=24),
        plot.title = element_text(size = 32),
        text = element_text(size=n))
}
theme_michigan5 = 
function(n = 18, m = 32, i = 20){
    theme(    
        axis.text = element_text(size=n),
    axis.title = element_text(size=i),

        plot.title = element_text(size = m),
        legend.position="top",
        text = element_text(size=n))
}
theme_michigan6nt = 
function(n = 32){
    theme(
        legend.position = "top",
        strip.text.x = element_text(size = 32),
        plot.title = element_text(size = 36),
        axis.title.x = element_text(size=n),
        axis.text.x = element_text(size=24),
        axis.text.y = element_text(size=24), 
legend.title= element_blank(),
        text = element_text(size=n))
}
theme_phon = 
function(atextx = 18, atexty = 18, atitlex=
                 18, atitley = 18, ptitle=24, strip.text.x = 24,
             lpos = "bottom", othertext=24, ltitle =24){
        # e.g. atexty = element_blank()
        theme(
            axis.text.x = element_text(size=atextx),
            axis.title.x = element_text(size=atitley),
            axis.text.y = element_text(size=atexty),
            axis.title.y = element_text(size=atitley),
            plot.title = element_text(size = ptitle),
            legend.position=lpos,
            legend.title = ltitle,
            strip.text.x =element_text(size=strip.text.x),
            text = element_text(size=othertext))
    }
cols.coda = c("orange", "darkgreen")
cols.dial = c("red", "blue")
lwd = 1.5

4 Probability of voicing (peryin) and SPL

# line-up relative to acoustic vowel offset
x.df = dt.df %>%
    filter(coda %in% c("nt", "nd")) %>%
  mutate(reltime = 
           plyr::round_any(1000 * (time_in_sequence - endv_time), 5))

# obtain mean times of peak velum lowering velocity
# relative to acoustic vowel offset
pkvel.clos.t = x.df %>%
  rename(Dialect = language) %>%
  group_by(vowel, coda, Dialect) %>%
  summarise(m1 = mean(velumopening_maxvel_off -  
                       endv_time, na.rm=T) * 1000) %>%
 ungroup()
## `summarise()` has grouped output by 'vowel', 'coda'. You can override using the
## `.groups` argument.
# aggregates of the three parameters between -50 and 200 ms
voice.df = x.df %>%
  filter(reltime > -50 & reltime < 200) %>%
group_by(language, coda, vowel, reltime)  %>%
    summarise(
      pefac = mean(probvoicepefac), 
      peryin = mean(1-aperyin), 
      SPL = 20 * log(mean(SPL_LP), base=10)) %>%
  ungroup() %>%
  rename(Dialect = language) 
## `summarise()` has grouped output by 'language', 'coda', 'vowel'. You can
## override using the `.groups` argument.

Aggregates per observation between acoustic vowel offset and time of peak velocity of velum lowering

acmean.df = dt.df %>%
  rename(Dialect = language) %>%
  filter(coda %in% c("nt", "nd")) %>%
  filter(time_in_sequence >= endv_time) %>%
  filter(time_in_sequence <= velumopening_maxvel_off) %>%
  group_by(rowind, vowel, coda, Dialect, onset, speaker, CV) %>%
  summarise(
    pefac = mean(probvoicepefac),
    SPL = 20 * log(mean(SPL_LP), base=10),
    peryin = mean(1-aperyin)) %>%
  ungroup()
## `summarise()` has grouped output by 'rowind', 'vowel', 'coda', 'Dialect',
## 'onset', 'speaker'. You can override using the `.groups` argument.

4.1 Probability of voicing with peryin (plot)

 peryin1 = voice.df %>%
    ggplot + 
    aes(y = peryin, x = reltime, col = coda, 
        group = interaction(Dialect, coda, vowel)) +
  geom_line(linewidth=lwd) +
  scale_x_continuous("Time (ms)", breaks = c(0, 200)) +
    ylab("Probability of voicing") +
    facet_wrap(Dialect ~ vowel, ncol=5) +
    geom_vline(xintercept=0, lty=2, linewidth=1.5) +
  geom_vline(data = pkvel.clos.t, 
             aes(xintercept=m1,  col = coda),  
              lty=3,linewidth=lwd) +
 theme_michigan6nt() +
  scale_colour_manual(values = cols.coda)
peryin1

ggsave(filename = file.path(pfadfigs2, "peryin1.pdf"), 
      plot = peryin1, 
      device=cairo_pdf,
       width = 45, 
       height = 30, 
      units = "cm")

4.1.1 Statistics

acmean.df %>%
  lmer(peryin ~ coda  * Dialect + 
        (coda  | speaker) + (Dialect | CV),  .) %>%
step()
## Backward reduced random-effect table:
## 
##                           Eliminated npar logLik     AIC     LRT Df Pr(>Chisq)
## <none>                                 11 1235.1 -2448.2                      
## coda in (coda | speaker)           0    9 1119.2 -2220.3 231.861  2  < 2.2e-16
## Dialect in (Dialect | CV)          0    9 1227.4 -2436.7  15.489  2  0.0004331
##                              
## <none>                       
## coda in (coda | speaker)  ***
## Dialect in (Dialect | CV) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##              Eliminated  Sum Sq Mean Sq NumDF  DenDF  F value  Pr(>F)    
## coda:Dialect          1 0.01560 0.01560     1 42.204   3.1035 0.08537 .  
## Dialect               2 0.00014 0.00014     1 42.852   0.0278 0.86829    
## coda                  0 1.36988 1.36988     1 45.140 272.7404 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## peryin ~ coda + (coda | speaker) + (Dialect | CV)
# model found
acmean.df %>%
  lmer(peryin ~ coda + (coda | speaker) + (Dialect | CV),  .) %>%
 anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##      Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## coda 1.3699  1.3699     1 45.14  272.74 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans:
acmean.df %>%
  lmer(peryin ~ coda + (coda | speaker) + (Dialect | CV),  .) %>%
  emmeans(., ~coda)
##  coda emmean     SE   df lower.CL upper.CL
##  nd    0.820 0.0220 48.4    0.776    0.864
##  nt    0.578 0.0197 51.1    0.538    0.617
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

4.2 SPL

Plot

SPL1 = 
voice.df %>%
    ggplot + 
    aes(y = SPL, x = reltime, col = coda, 
        group = interaction(Dialect, coda, vowel)) +
  geom_line(linewidth=lwd) +
  scale_x_continuous("Time (ms)", breaks = c(0, 200)) +
    ylab("dB SPL") +
    facet_wrap(Dialect ~ vowel, ncol=5) +
    geom_vline(xintercept=0, lty=2, linewidth=1.5) +
  geom_vline(data = pkvel.clos.t, 
             aes(xintercept=m1,  col = coda),  
              lty=3,linewidth=lwd) +
 theme_michigan6nt() +
  scale_colour_manual(values = cols.coda)
SPL1

ggsave(filename = file.path(pfadfigs2, "SPL1.pdf"), 
      plot = SPL1, 
      device=cairo_pdf,
       width = 45, 
       height = 30, 
      units = "cm")

4.2.1 Statistics

acmean.df %>%
 lmer(SPL ~ coda * Dialect + 
         (coda | speaker) + (Dialect | CV),  .) %>%
  step()
## Backward reduced random-effect table:
## 
##                           Eliminated npar  logLik    AIC     LRT Df Pr(>Chisq)
## <none>                                 11 -2652.1 5326.2                      
## coda in (coda | speaker)           0    9 -2723.9 5465.9 143.698  2  < 2.2e-16
## Dialect in (Dialect | CV)          0    9 -2657.5 5333.0  10.847  2   0.004413
##                              
## <none>                       
## coda in (coda | speaker)  ***
## Dialect in (Dialect | CV) ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##              Eliminated  Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## coda:Dialect          1   2.448   2.448     1 41.470  0.5799    0.4506    
## Dialect               2   0.033   0.033     1 41.467  0.0078    0.9298    
## coda                  0 292.433 292.433     1 46.552 69.3012 9.011e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## SPL ~ coda + (coda | speaker) + (Dialect | CV)
# model found
acmean.df %>%
lmer(SPL ~ coda + (coda | speaker) + (Dialect | CV), .) %>%
anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##      Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## coda 292.43  292.43     1 46.552  69.301 9.011e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Emmeans
acmean.df %>%
lmer(SPL ~ coda + (coda | speaker) + (Dialect | CV), .) %>%
  emmeans(., ~coda)
##  coda emmean   SE   df lower.CL upper.CL
##  nd     51.2 1.18 44.5     48.8     53.6
##  nt     48.3 1.10 45.2     46.1     50.5
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

5 Find time point when probability of voicing first decays to 0.5

The function determines the time point, tp, between the peak velum velocity times (lowering and raising) at which the probability of voicing first falls below 0.5. If tp > 0.5 at the time point of peak velum raising velocity, the tp becomes equal to the time point of peak velum raising velocity.

5.1 Function

# function to test whether a signal, y, falls below a threshold
fmin = function(x, y, p =.5)
{
  # if y does not fall below p, then
  # return the last value of y
 if(!any(y <=p))
    v5 = x[length(x)]
 # otherwise the first value when y = p
  else
    v5 = x[y <=p][1]
  v5
}

5.2 Applied to peryin

aperyin.df  = dt.df %>%
    filter(coda %in% c("nt", "nd"), 
           time_in_sequence >= endv_time, 
           time_in_sequence <= velumopening_maxvel_off+100) %>% 
  group_by(rowind, endv_time, Vperiodiconset) %>%
  # linear time-normalisation to 100 points
  reframe(T1n = approx(time_in_sequence, 
                       1-aperyin, n = 100)$y, 
          time_in_sequence = approx(time_in_sequence, 
                         1-aperyin, n = 100)$x,
          # apply lowess smoothing
          T1smooth = lowess(time_in_sequence, T1n, .3)$y,
          # find the time which the signal is less
          # than p in the lowess-smoothed data...
          Tminsmooth.peryin = fmin(time_in_sequence, T1smooth),
          # and in the raw data
          Tmin.peryin = fmin(time_in_sequence, T1n)) %>%
  ungroup()

Are there any times for which endv_time > Tminsmooth.peryin? No.

aperyin.df %>% filter(endv_time > Tminsmooth.peryin)
## # A tibble: 0 × 8
## # ℹ 8 variables: rowind <chr>, endv_time <dbl>, Vperiodiconset <dbl>,
## #   T1n <dbl>, time_in_sequence <dbl>, T1smooth <dbl>, Tminsmooth.peryin <dbl>,
## #   Tmin.peryin <dbl>
# choose a segment number
j = 46
# find the time when the signal is less
# than p in the lowess smoothed data
x1 = aperyin.df %>%
  filter(rowind == unique(rowind)[j]) %>%
  pull(Tminsmooth.peryin) %>% unique()
# and in the raw data
x2 = aperyin.df %>%
  filter(rowind == unique(rowind)[j]) %>%
  pull(Tmin.peryin) %>% unique()


a1 = aperyin.df %>%
   filter(rowind == unique(rowind)[j]) %>%
  ggplot +
  aes(y = T1smooth, x = time_in_sequence) +
  geom_line() +
  geom_hline(yintercept=.5) +
  geom_vline(xintercept=x1, col=2)

a2 = aperyin.df %>%
   filter(rowind == unique(rowind)[j]) %>%
  ggplot +
  aes(y = T1n, x = time_in_sequence) +
  geom_line() +
  geom_hline(yintercept=.5) +
  geom_vline(xintercept=x2, col=3)

grid.arrange(a1, a2, ncol=2)

Read in the physiology data containing velum and tongue signals. We need this because dt.df was derived from a Praat database that did not contain any physiological signals.

dtphys.df = read.table(file.path(pfad, "dt.df.txt"))
dtphys.df$rowind = as.character(dtphys.df$rowind)

Extract /nt, nd/

nt.df = dtphys.df %>% 
  filter(coda %in% c("nd", "nt")) 

These are the unique rows of aperyin.df. We only need unique rows because we only want to extract one time value per segment.

aperyin.short.df = aperyin.df %>%
group_by(rowind) %>%
 slice_head(n = 1) 

inner join aperyin.short.df to nt.df.

nt.df = inner_join(nt.df, aperyin.short.df %>%
                    select(rowind, Tminsmooth.peryin, Tmin.peryin), 
             by = "rowind")

Replace Tminsmooth.peryin such that the right boundary is equal to velumopening_maxvel_off if Tminsmooth.peryin > velumopening_maxvel_off. Also remove any values for which endv_time < velumopening_maxvel_off

nt.df = nt.df %>%
  mutate(Tminsmooth2 =
           ifelse(Tminsmooth.peryin > velumopening_maxvel_off, 
                  velumopening_maxvel_off, Tminsmooth.peryin)) %>% 
  filter(endv_time > velumopening_maxvel_on) 

This is the area sum under the nasal curve between the acoustic vowel offset and time at which pvoice < 0.5 for PERYIN

propc.peryin.df = nt.df %>%
  mutate(velum2US = 
           velum2US - velum2US_velumopening_gesture_onset) %>%
    filter(time_in_sequence >= endv_time) %>%
  filter(time_in_sequence <= Tminsmooth2) %>%
  group_by(rowind, vowel, coda, language, onset, speaker, CV) %>%
  summarise(cprop = sum(velum2US)) %>%
  ungroup()
## `summarise()` has grouped output by 'rowind', 'vowel', 'coda', 'language',
## 'onset', 'speaker'. You can override using the `.groups` argument.

This is the sum of nasalisation under the curve between the times of peak velum lowering and raising velocities.

nasdur.df = nt.df %>%
  mutate(velum2US = 
           velum2US - velum2US_velumopening_gesture_onset) %>%
filter(time_in_sequence >= velumopening_maxvel_on) %>%
 filter(time_in_sequence <= velumopening_maxvel_off) %>%
  group_by(rowind) %>%
  summarise(nasalprop = sum(velum2US)) %>%
  ungroup()

This is the ratio between the above two for PERYIN

propcnas.peryin.df = left_join(propc.peryin.df, nasdur.df, 
             by = "rowind") %>%
  mutate(ratio = cprop/nasalprop)

Plot of Tmin (Fig. 18)

n =48; k = 32; i = 28
#tmin = dtphys.df %>%
tmin = nt.df %>%
filter(coda %in% c("nt", "nd")) %>%
  mutate(
    den = velumopening_maxvel_off -
           endv_time, 
    num = Tminsmooth.peryin - endv_time,
    ratio = num/den) %>%
  filter(!is.na(ratio)) %>%
  rename(Dialect =  language) %>%
  ggplot +
  aes(y = ratio, col = coda, x = Dialect) +
     geom_boxplot(lwd=lwd) +
  facet_wrap(~ vowel, ncol = 5) +
  geom_hline(yintercept = 1, lty=2) +
  geom_hline(yintercept = 0, lty=2) + 
  ylab("Proportion") +
  xlab("") +
  theme_phon(lpos="top",
             atextx=i,
             atexty=k,
             atitlex=k,
             atitley=k,
             strip.text.x = k,
             ptitle=n,
             ltitle=element_blank(),
             othertext=n) +
#coord_cartesian(ylim = c(-3.5, .5)) +
# ggtitle("Log. proportion  of nasalization in coda-/n/") +
#theme_michigan3(n = 32) +
  theme(axis.text.y = 
element_text(angle = 90, vjust = 1, hjust=1)) +
  coord_cartesian(ylim = c(0, 2)) + 
scale_colour_manual(values = cols.coda)
tmin

ggsave(filename = file.path(pfadfigs2, "tmin.pdf"), 
      plot = tmin, 
      cairo_pdf,
       width = 45, 
       height = 30, 
      units = "cm")

Plot of the ratio for peryin (Fig. 19)

n =48; k = 32; i = 28
peryinfig = 
propcnas.peryin.df %>%
  rename(Dialect = language) %>%
  mutate(vowel = factor(vowel, 
                        levels = c("æ", "eɪ", "ʌ", "ɛ", "ɪ"))) %>%
  ggplot +
  aes(y = ratio, x = Dialect, col=coda) +
  geom_boxplot(lwd=lwd) +
  facet_wrap(~ vowel, ncol = 5) +
  ylab("Area (proportion)") +
  xlab("") +
  theme_phon(lpos="top",
             atextx=i,
             atexty=k,
             atitlex=k,
             atitley=k,
             strip.text.x = k,
             ptitle=n,
             ltitle=element_blank(),
             othertext=n) + 
  theme(axis.text.y = 
element_text(angle = 90, vjust = 1, hjust=1)) +
  geom_hline(yintercept=.5, lty=2) +
  scale_colour_manual(values = cols.coda) 
peryinfig

ggsave(filename = file.path(pfadfigs2, "peryinfig.pdf"), 
      plot = peryinfig, 
      device=cairo_pdf,
       width = 45, 
       height = 30, 
      units = "cm")

Statistics for PERYIN. Remove two values at exactly 1 since these are not admissible for a beta distribution (there are none at zero). This model predicts only the mean

p.df <- propcnas.peryin.df %>%
  filter(ratio != 1 & ratio != 0) %>%
  rename(Dialect = language)


m1 <- glmmTMB(ratio ~ Dialect * coda +
                (coda|speaker) +
                (Dialect|CV),
              family=beta_family(),
              data=p.df)
summary(m1)
##  Family: beta  ( logit )
## Formula:          ratio ~ Dialect * coda + (coda | speaker) + (Dialect | CV)
## Data: p.df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   -1024.3    -970.1     523.1   -1046.3      1000 
## 
## Random effects:
## 
## Conditional model:
##  Groups  Name        Variance Std.Dev. Corr  
##  speaker (Intercept) 0.53901  0.7342         
##          codant      0.19323  0.4396   -0.54 
##  CV      (Intercept) 0.54281  0.7368         
##          DialectUSE  0.05217  0.2284   0.06  
## Number of obs: 1011, groups:  speaker, 43; CV, 19
## 
## Dispersion parameter for beta family (): 9.32 
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         0.7092     0.2234   3.174 0.001503 ** 
## DialectUSE         -0.8436     0.2445  -3.450 0.000561 ***
## codant             -0.8832     0.1087  -8.126 4.45e-16 ***
## DialectUSE:codant   0.2361     0.1725   1.369 0.171067    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(m1, ~ Dialect | coda, type = "response") %>% 
  pairs(simple = "each", combine = TRUE)
##  coda Dialect contrast  odds.ratio    SE  df null z.ratio p.value
##  nd   .       BRE / USE       2.32 0.568 Inf    1   3.450  0.0022
##  nt   .       BRE / USE       1.84 0.403 Inf    1   2.770  0.0224
##  .    BRE     nd / nt         2.42 0.263 Inf    1   8.126  <.0001
##  .    USE     nd / nt         1.91 0.265 Inf    1   4.662  <.0001
## 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

Add another regression equation for the precision parameter phi. AIC says that the model m2 with precision predictors is a better fit.

m2 <- update(m1,
             dispformula = ~ Dialect * coda
             + (coda | speaker)
             # + (1 | CV)
             )
bbmle::AICtab(m1,m2)
##    dAIC  df
## m2   0.0 17
## m1 146.9 11
summary(m2)
##  Family: beta  ( logit )
## Formula:          ratio ~ Dialect * coda + (coda | speaker) + (Dialect | CV)
## Dispersion:             ~Dialect * coda + (coda | speaker)
## Data: p.df
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##   -1171.1   -1087.5     602.6   -1205.1       994 
## 
## Random effects:
## 
## Conditional model:
##  Groups  Name        Variance Std.Dev. Corr  
##  speaker (Intercept) 0.30389  0.5513         
##          codant      0.17298  0.4159   -0.23 
##  CV      (Intercept) 0.51740  0.7193         
##          DialectUSE  0.02732  0.1653   0.14  
## Number of obs: 1011, groups:  speaker, 43; CV, 19
## 
## Dispersion model:
##  Groups  Name        Variance Std.Dev. Corr  
##  speaker (Intercept) 0.6329   0.7955         
##          codant      0.3742   0.6117   -0.92 
## Number of obs: 1011, groups:  speaker, 43
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         0.7804     0.2002   3.899 9.66e-05 ***
## DialectUSE         -0.8411     0.1870  -4.498 6.87e-06 ***
## codant             -0.9714     0.1022  -9.504  < 2e-16 ***
## DialectUSE:codant   0.2571     0.1620   1.587    0.112    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Dispersion model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         2.2921     0.1726  13.279   <2e-16 ***
## DialectUSE          0.2887     0.2801   1.031    0.303    
## codant              0.2690     0.1789   1.504    0.133    
## DialectUSE:codant  -0.4559     0.2834  -1.609    0.108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Then use emmeans as usual, making sure type = "response", i.e. the predictions are in the 0 to 1 scale, not in the logit scale.

emmeans(m2, ~ Dialect | coda, type = "response") %>% 
  pairs(simple = "each", combine = TRUE)
##  coda Dialect contrast  odds.ratio    SE  df null z.ratio p.value
##  nd   .       BRE / USE       2.32 0.434 Inf    1   4.498  <.0001
##  nt   .       BRE / USE       1.79 0.377 Inf    1   2.778  0.0219
##  .    BRE     nd / nt         2.64 0.270 Inf    1   9.504  <.0001
##  .    USE     nd / nt         2.04 0.267 Inf    1   5.461  <.0001
## 
## P value adjustment: bonferroni method for 4 tests 
## Tests are performed on the log odds ratio scale

5.3 Plot of velum signal, waveform, probability of voicing.

Unique speaker and trial codes for ‘saint’

wordtype = "saint"
talkerinfo = dt.df %>%
  filter(orth==wordtype) %>%
  mutate(speaker = as.character(speaker), 
         speaker = substring(speaker, 2, nchar(speaker)), 
         speaker = as.numeric(speaker)) %>%
  select(speaker, trialnumber, language) %>%
  distinct()

Choose a row number between 1 and nrow(talkinfo) to display aligned tracks for any saint word (see above)

i = 10
# i = 23
speakerid = talkerinfo$speaker[i]
trialid = talkerinfo$trialnumber[i]
temp = speaker == speakerid & trial == trialid
physid = paste(speakerid, trialid, sep=".")

# Nasalization relative to the acoustic VN boundary 
## Method

segment.s = vok.s[temp,]
segment.s =  segment.s %>%
  mutate(start = start -200,
         end = end + 300)
segment.wav = get_trackdata(phys_db, segment.s, "MEDIAFILE_SAMPLES")
## 
##   INFO: parsing 1 wav segments/events
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
# time of periodic onset of the vowel in Emudatabase: 0.7880938 * 1000
acvowelon.t = 
  dtphys.df %>% filter(rowind == physid) %>% 
  pull(V2start) * 1000 
acvowelon.t = unique(acvowelon.t)

# Change time scale in segment.wav to make the periodic onset in the
# vowel have a time of zero ms
segment.wav = segment.wav %>%
  mutate(times_orig  =times_orig -acvowelon.t)

Alignment of phys. data relative to the acoustic vowel onset

phys.id.df =  dtphys.df %>%
  mutate(
    zerot = Vperiodiconset,  
    reltime =  1000 * (time_in_sequence -  zerot)) %>%
   filter(rowind == physid)

Alignment of waveform relative to the acoustic vowel onset

acoustic.id.df =  dt.df %>%
  mutate(
   zerot = Vperiodiconset,  
   reltime =  1000 * (time_in_sequence -  zerot)) %>%
   filter(rowind == physid)

Alignment of probability of voicing times rel. to acoustic vowel onset

aperyin.id.df =  aperyin.df %>%
  mutate(
   zerot = Vperiodiconset,  
   Tmin.peryin =  1000 * (Tmin.peryin -  zerot), 
   Tminsmooth.peryin =  1000 * (Tminsmooth.peryin -  zerot)) %>%
   filter(rowind == physid) %>%
  summarise(Tminsmooth.peryin = unique(Tminsmooth.peryin), 
            Tmin.peryin = unique(Tmin.peryin)) %>%
  ungroup()

Some time markers

# acoustic vowel offset time
VNbound.id = phys.id.df %>%
  mutate(endv_time = 1000 * (endv_time - zerot)) %>%
  pull(endv_time) %>% unique()

# time of peak velocity of velum lowering
pkvellower.id = 
phys.id.df %>%
  mutate(velumopening_maxvel_on = 
           1000 * (velumopening_maxvel_on - zerot)) %>%
  pull(velumopening_maxvel_on) %>% unique()

# time of velum lowering peak
pkvel.id = 
phys.id.df %>%
  mutate(velumopening_maxcon_on = 
           1000 * (velumopening_maxcon_on - zerot)) %>%
  pull(velumopening_maxcon_on) %>% unique()

# time of peak velocity of velum raising
pkvelraise.id = 
phys.id.df %>%
  mutate(velumopening_maxvel_off = 
           1000 * (velumopening_maxvel_off - zerot)) %>%
  pull(velumopening_maxvel_off) %>% unique()

Times at which prob_voice is <= 0.5 (2 elements, first is Tminsmooth.peryin, the second is Tmin.peryin )

aperyin.times =  aperyin.df %>%
  mutate(
   zerot = Vperiodiconset,  
   Tmin.peryin =  1000 * (Tmin.peryin -  zerot), 
   Tminsmooth.peryin =  1000 * (Tminsmooth.peryin -  zerot)) %>%
   filter(rowind == physid) %>%
  summarise(Tminsmooth.peryin = unique(Tminsmooth.peryin), 
            Tmin.peryin = unique(Tmin.peryin)) %>%
  ungroup() %>%
  as.numeric()

Set the scale on the x-axis

timeaxis = c(-50, pkvelraise.id + 50)

Data frames for drawing the areas

data1 = phys.id.df %>%
  filter(reltime >= VNbound.id) %>%
  filter(reltime <= aperyin.times[1])

data2 = phys.id.df %>%
  filter(reltime <= VNbound.id) %>%
  filter(reltime >= pkvellower.id)
ystart = -.1; yend.id = .75; yend.idtext = yend.id + .02
textsize = 20
textsize_yaxis = textsize - 4
 nasal_trajectory =  phys.id.df %>%
  ggplot + 
  aes(y = velum2US - velum2US_velumopening_gesture_onset, 
      x = reltime) +
   coord_cartesian(xlim = timeaxis, ylim = c(-.05, .77)) + 
  geom_line() +
  ylab("Size of velum lowering") + 
  geom_area(
    aes(y = velum2US - velum2US_velumopening_gesture_onset, 
        x = reltime), 
        data = data1, 
    fill = 4,
            alpha = 0.5,
            color = 1,    
            lwd = 0.5,    
            linetype = 1) +
  geom_area(
    aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
        data = data2, 
    fill = "orange",
            alpha = 0.5,
            color = 1,    
            lwd = 0.5,    
            linetype = 1) +
  theme(
        axis.text.x = element_blank(),
        axis.text.y = element_text(size=textsize_yaxis), 
        axis.title.y = element_text(size=textsize_yaxis), 
        axis.title = element_blank(),
        legend.title = element_blank(),
        legend.position = "none") +
scale_colour_manual(values = cols) +
   
  # acoustic vowel offset
  geom_segment(aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
             x = VNbound.id, y = ystart, xend = VNbound.id,
             yend = yend.id, lty=3, linewidth=1) +
   annotate("text", x=VNbound.id, y=yend.idtext, 
           label="italic(t[2])", 
           parse=TRUE, 
           size=textsize/.pt) +
   
   
   # pk. velocity of velum lowering
   geom_segment(aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
             x = pkvellower.id, y = ystart, xend = pkvellower.id,
             yend = yend.id, lty=2, linewidth=lwd) +
   annotate("text", x=pkvellower.id, y=yend.idtext, 
           label="italic(t[1])", 
           parse=TRUE, 
           size=textsize/.pt) +
   
   # probability of voicing
   geom_segment(aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
             x = aperyin.times[1], y = ystart, xend = aperyin.times[1],
             yend = yend.id, lty=1, linewidth=1, col="red") +
   annotate("text", x=aperyin.times[1], y=yend.idtext, 
           label="italic(t[3])", 
           parse=TRUE, 
           size=textsize/.pt) +
   
     # pk velocity of  velum raising
      geom_segment(aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
             x = pkvelraise.id, y = ystart, xend = pkvelraise.id,
             yend = yend.id, lty=2, linewidth=lwd) +
   annotate("text", x=pkvelraise.id, y=yend.idtext, 
           label="italic(t[4])", 
           parse=TRUE, 
           size=textsize/.pt) +
   
   # acoustic vowel onset
   geom_segment(aes(y = velum2US - 
          velum2US_velumopening_gesture_onset, x = reltime), 
             x = 0, y = ystart, xend = 0,
             yend = yend.id, lty=3, linewidth=1) +
   annotate("text", x=0, y=yend.idtext, 
           label="italic(t[0])", 
           parse=TRUE, 
           size=textsize/.pt) 

waveform = segment.wav %>%
  ggplot +
  aes(y = T1, x = times_orig) +
  geom_line() +
   coord_cartesian(xlim = timeaxis, ylim=c(-12000, 12000)) +
  theme(
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.title = element_blank(),
        legend.position = "none") +
  # acoustic vowel onset and offset
  geom_vline(xintercept = 
               c(0, VNbound.id), 
             linewidth=1, lty=3) +
geom_vline(xintercept = 
               aperyin.times[1],  
             lty=1, linewidth=1, col="red")

probability_voice = acoustic.id.df %>%
  ggplot + 
  aes(y = 1-aperyin, 
      x = reltime) +
coord_cartesian(xlim = timeaxis) + 
  theme(
    axis.text = element_text(size=textsize), 
axis.title.x = element_text(size=textsize), 
  axis.text.y = element_text(size=textsize_yaxis), 
axis.title.y = element_text(size=textsize_yaxis), 
text = element_text(size=textsize)) +
  geom_line()  +
  ylab("Periodicity") + 
  xlab("Time (ms)") +
  # acoustic vowel onset and offset
  geom_vline(xintercept = 
               c(0, VNbound.id), 
             linewidth=1, lty=3) +
geom_vline(xintercept = 
               aperyin.times[1],  
             lty=1, linewidth=1, col="red") +
  geom_hline(yintercept=.5, lty=1, lindewidth = .5)
## Warning in geom_hline(yintercept = 0.5, lty = 1, lindewidth = 0.5): Ignoring
## unknown parameters: `lindewidth`
gA <- ggplotGrob(nasal_trajectory)
gB <- ggplotGrob(waveform)
gC <- ggplotGrob(probability_voice)

grid::grid.newpage()
figalignprob = grid.arrange(rbind(gA, gB, gC))

figalignprob
## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
ggsave(filename = file.path(pfadfigs2, "figalignprob.pdf"), 
       plot = figalignprob, 
       device = cairo_pdf,
      width = 30, 
       height = 20, 
       units = "cm")