library(tidyverse)
library(magrittr)
library(gridExtra)
library(wrassp)
library(emuR)
library(lme4)
library(lmerTest)
library(emmeans)
library(optimx)

0. Preliminaries

Open dataframe with all formant values (raw, time and speaker-normalised) from mid stem vowels

D_basic=read.table("D.txt")

# add "Gram" as for the DCT dataframe:

D_basic=D_basic %>%
  mutate(Gram = ifelse(Word %in% 
                         c("anelli", "anello", "bella", 
                           "bello", "cappelli", "cappello", 
                           "capretta", "capretti",
                           "capretto", "cervelli", "cervello",
                           "coltelli", "coltello", "dente", 
                           "denti" ,   "donna"  , 
                           "donne", "ferri",    
                           "ferro" ,   "letti",    "letto" ,  
                           "mela",     "mele" ,    "mese",   
                           "mesi",     "pecora" ,  "pecore", 
                           "pesca",    "pesche" ,   "pettine", 
                           "pettini",  "pezza",    "pezzo",   
                           "piede",    "piedi",    "pietra",  
                           "pietre",   "prete",    "preti",    
                           "sedia",    "sedie",    "stella" , 
                           "stelle", "venti",    "vento",  
                           "vecchia",  "vecchio",  "verme", "vermi",
                           "buona",  "buone",  "buoni", 
                           "buono",  "corna",  "corno", 
                           "cotta",  "cotto",  "cuore",  "cuori" ,   
                           "foglia", "foglie","fuochi", "fuoco",  
                           "grossa", "grosso", "lunga",  "lungo",  "lunghi",
                           "morta",  "morti",  "morto",    "nipote",
                           "nipoti", "nuova",  "nuovo", 
                           "occhi",  "occhio", "ossa",   "osso",   
                           "ponte",  "ponti",  "porci",  "porco",  
                           "rosa",   "rose",   "ruota",  "ruote" ,
                           "sole", "sposa","sposo", 
                           "topi",   "topo", "uomini", "uomo",  
                           "uova",   "uovo",    "zoppa",  "zoppo",
                           "chiodo", "chiodi", "fiore", "fiori", "gioco", "giochi"),
                       
                       "NAdj", "V"))

# add "MET"

D_basic = D_basic  %>%
    mutate(MET = ifelse(Suffix_vowel %in% 
                           c("i", "u"), T, F))

# order levels of "Region"

D_basic$Region = factor(D_basic$Region, levels = c("MM", "ZZ", "MZ"))

Open dataframe with DCT-scores for each stem vowel token:

D=read.table("D_dct.txt")

Some information about our database.

How many speakers?

levels(factor(D$speaker))
##  [1] "CA01F" "CC01F" "CC01M" "CC02F" "CC02M" "CC03F" "CC03M" "CC04F" "CC05F"
## [10] "CC06F" "CC07F" "CC08F" "CV01F" "CV02F" "CV03F" "LA01F" "LI01M" "LI02M"
## [19] "LI03M" "LI04M" "LI05M" "LI06M" "MG01M" "MG02M" "MM02F" "MM03F" "MM03M"
## [28] "MM04F" "MM04M" "MM05F" "MM05M" "MM06F" "MM06M" "MM07F" "MM07M" "MM08F"
## [37] "MM08M" "MM09F" "MM09M" "MM10F" "MM11F" "MM11M" "MM12F" "MM12M" "MM13F"
## [46] "MM13M" "MM14F" "MM14M" "MM15F" "MM15M" "MM16F" "MM16M" "OM01F" "OM01M"
## [55] "OM02F" "OM02M" "OM03F" "RI01M" "RI02M" "SC01F" "SC01M" "SC02F" "SC02M"
## [64] "SC03F" "SC03M" "SC04M" "SC05M" "SC06M" "SC07M" "SD01F" "SH01M" "SL01F"
## [73] "SL01M" "SL02F" "SL02M" "SL03F" "TR01F" "TR01M" "TR02F" "TR02M" "TR03F"
## [82] "TR03M" "TR04F" "TR05F" "TR06F" "VB01F" "VB01M" "VB02F" "VB02M" "VB03F"
## [91] "VB04F" "VB05F" "VB06F" "VI01F" "VI01M" "VI02F" "VI02M" "VI03F" "VI03M"

In particular:

N.B.: “Region” here is based on the expected metaphonic patterns and does not strictly refer to geography. In particular:

1. DCT analysis of F1 in all suffix vowel contexts, verbs vs Nouns/adjectives

F1 values of /e/ and /o/- stems were analysed altogether since for both vowel types F1 lowering or raising corresponds to raising or lowering of the vowel respectively.

What to expect:

1.1. Overview: plots

Plot of aggregated formants (F1) separately by region, grammatical class, metaphonic vs non-metaphonic forms. We do this to have a look at how stem vowel F1 generally look like depending on metaphonic context (TRUE = suffixes /i, u/; FALSE = suffixes /a, e/).

v.mean.F1 = D_basic %>%
  # 2 vowels
  filter(Stem_vowel %in% c("e", "o") ) %>%
  # for each label, speaker and normalised time point
  group_by(speaker, Gram, times_norm, MET, Stem_vowel, Region) %>%
  # calculate mean F2
  summarise(F1n = mean(F1n)) %>%
  ungroup()

cols = c("darkorange", "purple")

v.mean.F1 %>% 
  ggplot +
  # plot F1 as a function of normalised time
  # colour-coded by MET
  aes(y = F1n, x = times_norm, col=MET, group=MET) +
  facet_grid(Gram~Region)+
  geom_smooth()+
  theme_light() +
  ylab("Normalised F1 (standard deviations)")+
  xlab("Normalised time")+
  scale_x_continuous("Normalised time", breaks = c(0, 0.25, 0.5, 0.75)) +
  theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
        text = element_text(size = 12), legend.position = "top")+
      scale_color_manual(values = cols, name = "Metaphony")

Alternatively, we can also plot the stem vowels separately (suggested by Reviewer 2):

a=v.mean.F1 %>% filter(Stem_vowel == "e") %>%
  ggplot +
  # plot F1 as a function of normalised time
  # colour-coded by MET
  aes(y = F1n, x = times_norm, col=MET, group=MET) +
  facet_grid(Gram~Region)+
  geom_smooth()+
  theme_light() +
  ylab("Normalised F1")+
  xlab("Normalised time")+
  scale_x_continuous("Normalised time", breaks = c(0, 0.25, 0.5, 0.75)) +
  theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
        text = element_text(size = 12), legend.position = "top")+
      scale_color_manual(values = cols, name = "Metaphony")+
  ggtitle ("Stem-/e/")


b=v.mean.F1  %>% filter(Stem_vowel == "o") %>%
  ggplot +
  # plot F1 as a function of normalised time
  # colour-coded by MET
  aes(y = F1n, x = times_norm, col=MET, group=MET) +
  facet_grid(Gram~Region)+
  geom_smooth()+
  theme_light() +
  ylab("Normalised F1")+
  xlab("Normalised time")+
  scale_x_continuous("Normalised time", breaks = c(0, 0.25, 0.5, 0.75)) +
  theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
        text = element_text(size = 12), legend.position = "top")+
      scale_color_manual(values = cols, name = "Metaphony")+
  ggtitle ("Stem-/o/")

grid.arrange(a, b, nrow=2)

k0, comparison between MET = T vs MET = F, by region and grammatical category:

D = D  %>%
    mutate(MET = ifelse(Suffix_vowel %in% 
                           c("i", "u"), T, F))

D$Region = factor(D$Region, levels = c("MM", "ZZ", "MZ"))

cols.gram = c("blue", "red")

D %>%
    ggplot +
    aes(y = X0, x = MET, fill = Gram) +
    geom_violin(position = position_dodge(0.8), color = NA, alpha=0.2)  +
    geom_boxplot(width=0.3, alpha = 0.7, cex=.5, position=position_dodge(.8)) +
    facet_wrap(.~Region) + 
    coord_cartesian(ylim = c(-2, 2)) +
    theme_light() + xlab("Metaphony") + ylab(expression(k[0]))+
    theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
          text = element_text(size = 12), legend.position = "top") +
    scale_fill_manual(values = cols.gram, name = "Grammatical category")

k1, comparison between all four suffix vowels, by region and grammatical category:

D %>%
    ggplot +
    aes(y = X1, x = MET, fill = Gram) +
    geom_violin(position = position_dodge(0.8), color = NA, alpha=0.2)  +
    geom_boxplot(width=0.3, alpha = 0.7, cex=.5, position=position_dodge(.8)) +
    facet_wrap(.~Region) + 
    coord_cartesian(ylim = c(-1.3, 1.3)) +
    theme_light() + xlab("Metaphony") + ylab(expression(k[1]))+
    theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
          text = element_text(size = 12), legend.position = "top") +
    scale_fill_manual(values = cols.gram, name = "Grammatical category")

From the plots above it is clearly visible that verbs show overall less metaphony than both adjectives and nouns together. Also, we can see that metaphony/coarticulatory effects are less marked (if not absent) in verbs. The statistical analysis below confirms that these differences are significant.

1.2. Statistics

D %>%
    lmer(X0 ~ Region * Gram * MET  + (Region | Stem) + (MET| speaker), .) %>% step()
## Backward reduced random-effect table:
## 
##                           Eliminated npar logLik   AIC     LRT Df Pr(>Chisq)
## <none>                                 22 -17702 35448                      
## Region in (Region | Stem)          0   17 -18557 37147 1709.25  5  < 2.2e-16
## MET in (MET | speaker)             0   20 -17784 35608  164.44  2  < 2.2e-16
##                              
## <none>                       
## Region in (Region | Stem) ***
## MET in (MET | speaker)    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                 Eliminated Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Region:Gram:MET          0 37.993  18.996     2 16710    43.8 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## X0 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker)
D %>%
    lmer(X1 ~ Region * Gram * MET  + (Region | Stem) + (MET| speaker), .) %>% step()
## Backward reduced random-effect table:
## 
##                           Eliminated npar  logLik    AIC    LRT Df Pr(>Chisq)
## <none>                                 22 -1085.7 2215.3                     
## Region in (Region | Stem)          0   17 -1488.7 3011.4 806.05  5  < 2.2e-16
## MET in (MET | speaker)             0   20 -1138.2 2316.3 104.97  2  < 2.2e-16
##                              
## <none>                       
## Region in (Region | Stem) ***
## MET in (MET | speaker)    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                 Eliminated Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Region:Gram:MET          0 2.2324  1.1162     2 15704  17.749 1.996e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## X1 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker)

Models found by step() coincide with the ones suggested:

D %>%
    lmer(X0 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
## Region            0.549   0.275     2    57.1   0.6330  0.534700    
## Gram              2.429   2.429     1    56.0   5.6012  0.021429 *  
## MET              82.070  82.070     1   202.3 189.2289 < 2.2e-16 ***
## Region:Gram       0.046   0.023     2    53.5   0.0531  0.948292    
## Region:MET        6.018   3.009     2   198.3   6.9380  0.001224 ** 
## Gram:MET        154.438 154.438     1 17067.3 356.0887 < 2.2e-16 ***
## Region:Gram:MET  37.993  18.996     2 16709.6  43.8002 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
D %>%
    lmer(X1 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## Region          0.3586  0.1793     2    88.8  2.8509  0.063096 .  
## Gram            0.0076  0.0076     1    56.0  0.1201  0.730266    
## MET             1.1152  1.1152     1   201.8 17.7336 3.827e-05 ***
## Region:Gram     0.3483  0.1742     2    51.0  2.7696  0.072132 .  
## Region:MET      0.6157  0.3078     2   195.3  4.8953  0.008425 ** 
## Gram:MET        3.9447  3.9447     1 17030.8 62.7285 2.518e-15 ***
## Region:Gram:MET 2.2324  1.1162     2 15704.5 17.7493 1.996e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

For both models, there is an interesting three-way interaction between region, suffix vowel, and grammatical category. This is why we do a post-hoc test.

#emm_options(pbkrtest.limit = 17259)

D %>%
  lmer(X0 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
   emmeans(.,  ~ MET * Region|Gram) %>%
  pairs(., simple = "each", combine=T) %>%
  as.data.frame() %>%
  filter(p.value <= 0.05) %>%
  as_tibble() %>%
  mutate(p.value = round(p.value, 4)) %>%
  print(., n = nrow(.), max_extra_cols = ncol(.))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## # A tibble: 8 × 9
##   Region Gram  MET   contrast     estimate     SE    df z.ratio p.value
##   <chr>  <chr> <chr> <chr>           <dbl>  <dbl> <dbl>   <dbl>   <dbl>
## 1 MM     NAdj  .     FALSE - TRUE    0.504 0.0440   Inf   11.4   0     
## 2 ZZ     NAdj  .     FALSE - TRUE    0.641 0.0477   Inf   13.4   0     
## 3 MZ     NAdj  .     FALSE - TRUE    1.08  0.0350   Inf   30.9   0     
## 4 .      NAdj  TRUE  MM - MZ         0.351 0.0780   Inf    4.50  0.0002
## 5 .      NAdj  TRUE  ZZ - MZ         0.220 0.0702   Inf    3.13  0.0412
## 6 MM     .     TRUE  NAdj - V       -0.743 0.201    Inf   -3.70  0.0053
## 7 ZZ     .     TRUE  NAdj - V       -0.723 0.214    Inf   -3.38  0.0175
## 8 MZ     .     TRUE  NAdj - V       -1.02  0.253    Inf   -4.03  0.0013
D %>%
  lmer(X1 ~ Region * Gram * MET + (Region | Stem) + (MET | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
   emmeans(.,  ~ MET * Region|Gram) %>%
  pairs(., simple = "each", combine=T) %>%
  as.data.frame() %>%
  filter(p.value <= 0.05) %>%
  as_tibble() %>%
  mutate(p.value = round(p.value, 4)) %>%
  print(., n = nrow(.), max_extra_cols = ncol(.))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## # A tibble: 4 × 9
##   Region Gram  MET   contrast     estimate     SE    df z.ratio p.value
##   <chr>  <chr> <chr> <chr>           <dbl>  <dbl> <dbl>   <dbl>   <dbl>
## 1 ZZ     NAdj  .     FALSE - TRUE   0.213  0.0171   Inf   12.4   0     
## 2 MZ     NAdj  .     FALSE - TRUE   0.0567 0.0122   Inf    4.63  0.0001
## 3 .      NAdj  TRUE  MM - ZZ        0.282  0.0349   Inf    8.06  0     
## 4 .      NAdj  TRUE  ZZ - MZ       -0.217  0.0304   Inf   -7.12  0

As regards the differences between regions, results are in line with FPCA analysis by Greca et al. 2024: k1 plays a role mainly for ZZ in nouns and adjectives, while differences in k0 are significant between the regions MM-MZ and ZZ-MZ.

However, for MET=T vs F, k-score differences between regions are significant only for nouns and adjectives. Also, differences between grammatical categories are significant in metaphonic contexts for all three regions. In line with the previous plots, this confirms that verbs show overall significantly less metaphony than nouns and adjectives in all regions.

Consequently, the next step is to zoom into different verb lexical stems, to check whether there are differences between (and within) verbs (following Lausberg 1939; see Introduction in the paper).

2. Differences between verbal lexical stems

We plot verbal lexical stems separately by person: this is because, following Lausberg (1939), we may have metaphony in some persons but not in others, or metaphony in all persons, or no metaphony in any person. “Person” includes the suffix vowels grouped into three levels: non-high /a, e/ suffixes (morphologically marking the third person singular); high front /i/ (second person singular); high back /u/ (first person singular).

Below the code to order the verbal stems in conjugation order and to generate the new variable “Person”.

# put verbs in latin conjugation order 
# (NB: /vol/ comes from "volo, vis, velle", an irregular verb; in Italian it belongs to the second conjugation):

D_verb = D %>% filter(Gram == "V")

D_verb$Stem = factor(D_verb$Stem, levels = c("pens", "trov", "ten", "vol", "dorm", "ess", "mor"))

# Create variable "Person" as described above and in the Method section of the paper

D_third = D_verb %>% filter(Suffix_vowel %in% c("a", "e"))
D_third$Person <- "3rd"

D_second = D_verb %>% filter(Suffix_vowel == "i")
D_second$Person <- "2nd"

D_first = D_verb %>% filter(Suffix_vowel == "u")
D_first$Person <- "1st"

D_verb= rbind(D_third, D_second, D_first)

# colours for the plots

cols.suff = c("darkgrey", "orange","darkgreen")

2.1 Overview: violin plots

Comparison between MM and MZ:

D_verb %>% filter(Region %in% c( "MM", "MZ") & !Stem %in% c( "mor", "vol")) %>%
     ggplot +
     aes(y = X0, x = Person, fill = Person) +
     geom_violin(position = position_dodge(0.8), color = NA, alpha=0.4, width=1)  +
     geom_boxplot(width=0.4, alpha = 0.7, cex=.5, position=position_dodge(.8)) +
     ylab(expression(k[0]))+
     xlab("")+
     geom_hline(yintercept=0, linetype="longdash", size=0.3)+
     facet_grid(Region~Stem) + 
     coord_cartesian(ylim = c(-2, 2)) +
     theme_light() +
     scale_fill_manual(values = cols.suff, 
                       name = "Person")+
     theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
           text = element_text(size = 10), legend.position = "top") 

Mormanno, k0

/pens/: 1. Latin conjugation. There is a slight coarticulatory effect of suffix-/u/ pulling the signal’s mean downwards, otherwise no visible effects of suffix vowel. Please also consider that the 2. pers. sg. suffix here is /asi/ (yellow violin plot), so it is no surprise that coarticulatorily we have similar effects to suffix-/a/ (grey violin plot).

/trov/: 1. Latin conjugation. We find a very similar situation to /pens/.

/ten/: 2. Latin conjugation. k0 is slightly lower for both 1. and (surprisingly, given we are dealing with an /e/-suffix) 3. person singular, but differences do not look significant.

/dorm/: 4. Latin conjugation. There is no raising at all. Indeed, all values point to vowel lowering.

/ess/: 4. Latin conjugation. Values suggest the presence of mid-high /e/ in the stem, with slight raising for the second person singular (but differences do no appear to be significant). NB this could also be linked to the fact that often the pronunciation is rather /jE/ or /je/, so that the mean raises a bit compared to other dialects.

To sum-up: in Mormanno, vowel stems present overall mid-low vowels (with only 2 exceptions, /vol, ess/). Probably, the presence of raising before /u/-suffixes (which is however not that consistent) is due to “gradient” coarticulatory effects.

Mittelzone, k0

/pens/: 1. conjugation. Differences do not look significant.

/trov/: 1. conjugation. Differences do not look significant.

/ten/: 2. conjugation. Differences do not look significant.

/dorm/: 4. conjugation. Differences do not look significant. However, there may be a slight raising (part of the violin plots below 0)

/ess/: 4. conjugation. Values point to generalised vowel raising, especially marked in the second and third persons. Please also notice that, similarly as in other regions, the first person sg. may have a diphthongal vowel quality [jesk@], and this may lower the whole signal’s mean.

There is a very big variation in the scores (very long violin plots). For MZ, there may be a loss of metaphony for /dorm/ compared to the data by Lausberg (general high means, although many tokens appear below the 0-line), while the observations by Lausberg fit our data for /ess/ (less metaphony in the 1. person and metaphonic vowel raising in the other two).


Zwischenzone, k1 (signal’s slope, indirectly degree of diphthongisation)

values below 0 suggest more opening diphthongisation /O/ > [wo, uo]; /E/ > [jE, ie].

D_verb %>% filter(Region == "ZZ" & !Stem %in% c( "mor", "vol")) %>%
     ggplot +
     aes(y = X1, x = Person, fill = Person) +
     geom_violin(position = position_dodge(0.8), color = NA, alpha=0.4, width=1)  +
     geom_boxplot(width=0.4, alpha = 0.7, cex=.5, position=position_dodge(.8)) +
     ylab(expression(k[1]))+
     xlab("")+
     geom_hline(yintercept=0, linetype="longdash", size=0.3)+
     facet_grid(Region~Stem) + 
     coord_cartesian(ylim = c(-1.3, 1)) +
     theme_light() +
     scale_fill_manual(values = cols.suff, 
                       name = "Person")+
     theme(strip.text.x = element_text(color = "black"), strip.text.y = element_text(color = "black"), 
           text = element_text(size = 10), legend.position = "top") 

/pens/: 1. conjugation. Apparently no significant differences between suffixes.

/trov/: 1. conjugation. Apparently no significant differences between suffixes.

/ten/: 2. conjugation. By observing the grey and yellow violin plots this would suggest that the 2. and 3. (suffix-/e/) p. singular may show metaphonic diphthongisation.

/dorm/: 4. conjugation. It looks like all three persons may diphthong, with slightly lower k1 values in the first two persons. This is not exactly in line with what described by Lausberg for two villages of the ZZ (one of which, Verbicaro, is also represented in our data). However, please also remember that the signal’s means for the verb were high (open stem vowels).

/ess/: 4. conjugation: the values indicate no diphthongisation.

In general, the presence of long violin plots points to great variation within the data. To sum-up: general diphthongisation for /dorm/ (an evolution compared to the data by Lausberg?) and unexpected metaphony of the 2. and 3. persons in /ten/. There may be slight metaphony in the second person of /vol/.

General summary: a general tendency to metaphonise less for all verbs also compared to the data by Lausberg.

To check what differences are significant or not, we need statistics.

2.2. Statistics

We test whether the differences above are significant: the hypothesis is that that are significant differences for some verbs between different persons. We test this hypothesis separately by DCT-score.

# how many tokens to be analysed?

D_verb %>% filter(!Stem %in% c( "mor", "vol")) %>% nrow()
## [1] 1813
# Analysis

#k0

D_verb %>% filter (!Stem %in% c( "mor", "vol")) %>%
    lmer(X0 ~ Stem * Person * Region + (1 | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .)  %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                     Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
## Stem               203.062  50.766     4 1709.50 152.6272 < 2.2e-16 ***
## Person               2.334   1.167     2 1711.85   3.5089  0.030144 *  
## Region               3.070   1.535     2   78.76   4.6151  0.012726 *  
## Stem:Person          5.834   0.729     8 1696.98   2.1923  0.025485 *  
## Stem:Region         77.776   9.722     8 1710.27  29.2292 < 2.2e-16 ***
## Person:Region        5.241   1.310     4 1711.95   3.9391  0.003459 ** 
## Stem:Person:Region  11.395   0.712    16 1697.14   2.1413  0.005335 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#k1
D_verb %>% filter (!Stem %in% c( "mor", "vol")) %>%
    lmer(X1 ~ Stem * Person * Region + (1 | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .)  %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                     Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
## Stem               22.5897  5.6474     4 1727.20 103.2767 < 2.2e-16 ***
## Person              0.6426  0.3213     2 1731.75   5.8760  0.002862 ** 
## Region              0.1388  0.0694     2   78.82   1.2691  0.286756    
## Stem:Person         3.8868  0.4859     8 1707.38   8.8850 5.481e-12 ***
## Stem:Region         2.2349  0.2794     8 1728.31   5.1089 2.624e-06 ***
## Person:Region       0.2385  0.0596     4 1731.20   1.0902  0.359721    
## Stem:Person:Region  1.8196  0.1137    16 1707.70   2.0797  0.007190 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Below the post-hoc tests for each of the models above (Reminder: the first persons is indicated by /u/-suffixed forms; /e, a/ suffixes correspond to the third person; /i/-suffixes indicate the second person):

# k0

D_verb %>%  filter (!Stem %in% c( "mor", "vol")) %>%
     lmer(X0 ~ Stem * Person * Region + (1 | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
    emmeans(.,  ~ Person * Region | Stem) %>%
     pairs(., simple = "each", combine=T) %>%
     as.data.frame() %>%
     filter(p.value <= 0.05) %>%
     as_tibble() %>%
     mutate(p.value = round(p.value, 4)) %>%
   print(., n = nrow(.), max_extra_cols = ncol(.))
## # A tibble: 60 × 9
##    Region Stem  Person contrast    estimate     SE    df t.ratio p.value
##    <chr>  <chr> <chr>  <chr>          <dbl>  <dbl> <dbl>   <dbl>   <dbl>
##  1 ZZ     ten   .      1st - 3rd     -0.586 0.156  1696.   -3.77  0.0304
##  2 ZZ     ten   .      2nd - 3rd     -0.659 0.156  1696.   -4.24  0.0043
##  3 MZ     ess   .      1st - 2nd      0.479 0.122  1692.    3.93  0.0159
##  4 .      ten   2nd    MM - ZZ        0.660 0.165   617.    4.00  0.0128
##  5 .      ten   2nd    ZZ - MZ       -0.823 0.149   570.   -5.53  0     
##  6 .      dorm  1st    MM - MZ        0.739 0.131   537.    5.66  0     
##  7 .      dorm  1st    ZZ - MZ        0.723 0.149   563.    4.86  0.0003
##  8 .      dorm  2nd    MM - MZ        0.821 0.142   695.    5.78  0     
##  9 .      dorm  2nd    ZZ - MZ        0.715 0.152   605.    4.71  0.0005
## 10 .      dorm  3rd    MM - MZ        0.800 0.146   730.    5.48  0     
## 11 .      dorm  3rd    ZZ - MZ        0.752 0.153   606.    4.91  0.0002
## 12 .      ess   2nd    MM - MZ        0.724 0.165   978.    4.40  0.0022
## 13 .      ess   3rd    MM - MZ        0.787 0.152   813.    5.18  0     
## 14 MM     .     1st    pens - ess     0.524 0.131  1707.    3.99  0.0126
## 15 MM     .     1st    trov - ess     0.631 0.138  1728.    4.59  0.0009
## 16 MM     .     1st    ten - ess      0.568 0.124  1719.    4.56  0.001 
## 17 MM     .     1st    dorm - ess     0.811 0.126  1712.    6.46  0     
## 18 MM     .     2nd    pens - ess     0.870 0.155  1705.    5.63  0     
## 19 MM     .     2nd    trov - ess     0.937 0.161  1710.    5.80  0     
## 20 MM     .     2nd    ten - ess      0.812 0.154  1723.    5.28  0     
## 21 MM     .     2nd    dorm - ess     0.958 0.156  1712.    6.14  0     
## 22 MM     .     3rd    pens - ess     0.643 0.142  1699.    4.52  0.0012
## 23 MM     .     3rd    trov - ess     0.865 0.158  1697.    5.48  0     
## 24 MM     .     3rd    ten - ess      0.578 0.137  1716.    4.22  0.0047
## 25 MM     .     3rd    dorm - ess     0.807 0.145  1701.    5.55  0     
## 26 ZZ     .     1st    pens - ess     0.764 0.156  1697.    4.89  0.0002
## 27 ZZ     .     1st    trov - ten     0.596 0.149  1693.    3.99  0.0124
## 28 ZZ     .     1st    trov - ess     1.17  0.155  1693.    7.59  0     
## 29 ZZ     .     1st    ten - dorm    -0.614 0.147  1692.   -4.19  0.0053
## 30 ZZ     .     1st    ten - ess      0.577 0.152  1696.    3.79  0.0281
## 31 ZZ     .     1st    dorm - ess     1.19  0.152  1692.    7.83  0     
## 32 ZZ     .     2nd    pens - ess     0.676 0.162  1700.    4.17  0.0056
## 33 ZZ     .     2nd    trov - ten     0.625 0.160  1695.    3.92  0.0168
## 34 ZZ     .     2nd    trov - ess     0.921 0.170  1702.    5.43  0     
## 35 ZZ     .     2nd    ten - dorm    -0.700 0.149  1691.   -4.69  0.0005
## 36 ZZ     .     2nd    dorm - ess     0.997 0.160  1698.    6.23  0     
## 37 ZZ     .     3rd    pens - ess     0.614 0.167  1700.    3.68  0.0428
## 38 ZZ     .     3rd    trov - ess     1.07  0.177  1696.    6.02  0     
## 39 ZZ     .     3rd    ten - ess      0.987 0.172  1695.    5.75  0     
## 40 ZZ     .     3rd    dorm - ess     1.05  0.165  1698.    6.37  0     
## 41 MZ     .     1st    pens - dorm    0.625 0.103  1697.    6.08  0     
## 42 MZ     .     1st    pens - ess     0.986 0.115  1708.    8.56  0     
## 43 MZ     .     1st    trov - dorm    0.786 0.105  1696.    7.51  0     
## 44 MZ     .     1st    trov - ess     1.15  0.117  1711.    9.79  0     
## 45 MZ     .     1st    ten - dorm     0.562 0.0972 1700.    5.78  0     
## 46 MZ     .     1st    ten - ess      0.923 0.110  1709.    8.38  0     
## 47 MZ     .     2nd    pens - dorm    0.785 0.108  1694.    7.25  0     
## 48 MZ     .     2nd    pens - ess     1.65  0.119  1701.   13.8   0     
## 49 MZ     .     2nd    trov - dorm    0.880 0.106  1698.    8.28  0     
## 50 MZ     .     2nd    trov - ess     1.74  0.117  1707.   14.8   0     
## 51 MZ     .     2nd    ten - dorm     0.838 0.101  1699.    8.30  0     
## 52 MZ     .     2nd    ten - ess      1.70  0.112  1708.   15.1   0     
## 53 MZ     .     2nd    dorm - ess     0.861 0.113  1703.    7.63  0     
## 54 MZ     .     3rd    pens - dorm    0.620 0.111  1696.    5.56  0     
## 55 MZ     .     3rd    pens - ess     1.41  0.116  1699.   12.2   0     
## 56 MZ     .     3rd    trov - dorm    0.587 0.108  1696.    5.45  0     
## 57 MZ     .     3rd    trov - ess     1.38  0.113  1710.   12.2   0     
## 58 MZ     .     3rd    ten - dorm     0.746 0.105  1702.    7.08  0     
## 59 MZ     .     3rd    ten - ess      1.54  0.110  1706.   13.9   0     
## 60 MZ     .     3rd    dorm - ess     0.794 0.112  1699.    7.11  0
# k1

 D_verb %>%  filter (!Stem %in% c( "mor", "vol")) %>%
     lmer(X1 ~ Stem * Person * Region + (1 | speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
    emmeans(.,  ~ Person * Region | Stem) %>%
     pairs(., simple = "each", combine=T) %>%
     as.data.frame() %>%
     filter(p.value <= 0.05) %>%
     as_tibble() %>%
     mutate(p.value = round(p.value, 4)) %>%
   print(., n = nrow(.), max_extra_cols = ncol(.))
## # A tibble: 36 × 9
##    Region Stem  Person contrast    estimate     SE    df t.ratio p.value
##    <chr>  <chr> <chr>  <chr>          <dbl>  <dbl> <dbl>   <dbl>   <dbl>
##  1 ZZ     ten   .      1st - 2nd      0.399 0.0594 1692.    6.72  0     
##  2 ZZ     ten   .      1st - 3rd      0.348 0.0630 1700.    5.52  0     
##  3 MZ     ten   .      1st - 2nd      0.189 0.0395 1708.    4.79  0.0003
##  4 MZ     ten   .      1st - 3rd      0.215 0.0402 1711.    5.35  0     
##  5 MM     .     1st    pens - dorm    0.398 0.0506 1703.    7.88  0     
##  6 MM     .     1st    trov - dorm    0.317 0.0529 1730.    5.99  0     
##  7 MM     .     1st    ten - dorm     0.216 0.0474 1709.    4.55  0.001 
##  8 MM     .     1st    dorm - ess    -0.255 0.0507 1734.   -5.03  0.0001
##  9 MM     .     2nd    pens - ten     0.204 0.0557 1737.    3.66  0.0475
## 10 MM     .     2nd    pens - dorm    0.394 0.0567 1719.    6.95  0     
## 11 MM     .     2nd    trov - dorm    0.273 0.0594 1708.    4.59  0.0008
## 12 MM     .     2nd    dorm - ess    -0.255 0.0630 1730.   -4.05  0.0095
## 13 MM     .     3rd    pens - trov    0.278 0.0620 1712.    4.48  0.0014
## 14 MM     .     3rd    pens - ten     0.273 0.0532 1751.    5.13  0.0001
## 15 MM     .     3rd    pens - dorm    0.376 0.0569 1725.    6.61  0     
## 16 MM     .     3rd    dorm - ess    -0.226 0.0588 1712.   -3.84  0.0232
## 17 ZZ     .     1st    pens - dorm    0.404 0.0611 1701.    6.61  0     
## 18 ZZ     .     1st    trov - dorm    0.374 0.0605 1693.    6.18  0     
## 19 ZZ     .     1st    ten - dorm     0.444 0.0594 1694.    7.47  0     
## 20 ZZ     .     1st    dorm - ess    -0.293 0.0616 1695.   -4.75  0.0004
## 21 ZZ     .     2nd    pens - ten     0.428 0.0612 1706.    7.00  0     
## 22 ZZ     .     2nd    pens - dorm    0.499 0.0622 1710.    8.02  0     
## 23 ZZ     .     2nd    trov - ten     0.380 0.0646 1704.    5.89  0     
## 24 ZZ     .     2nd    trov - dorm    0.452 0.0655 1702.    6.89  0     
## 25 ZZ     .     2nd    ten - ess     -0.252 0.0639 1710.   -3.95  0.0149
## 26 ZZ     .     2nd    dorm - ess    -0.323 0.0648 1705.   -4.99  0.0001
## 27 ZZ     .     3rd    pens - ten     0.346 0.0640 1701.    5.40  0     
## 28 ZZ     .     3rd    pens - dorm    0.343 0.0610 1696.    5.62  0     
## 29 MZ     .     1st    pens - dorm    0.244 0.0416 1706.    5.88  0     
## 30 MZ     .     1st    pens - ess     0.177 0.0466 1726.    3.80  0.0273
## 31 MZ     .     1st    trov - dorm    0.156 0.0424 1706.    3.69  0.0422
## 32 MZ     .     1st    ten - dorm     0.214 0.0393 1710.    5.43  0     
## 33 MZ     .     2nd    pens - ten     0.216 0.0438 1720.    4.93  0.0002
## 34 MZ     .     2nd    pens - dorm    0.209 0.0439 1702.    4.76  0.0004
## 35 MZ     .     3rd    pens - ten     0.274 0.0448 1729.    6.11  0     
## 36 MZ     .     3rd    pens - dorm    0.259 0.0451 1706.    5.73  0

The statistics confirms that differences observed before between persons are significant for /ten/ in ZZ (both k0 and k1) and slightly so in /ess/ in MZ (k0, 1st vs 2nd person). In all other cases, either are the effects non-significant, or metaphony is levelled to the whole paradigm, or it does not apply at all (see also Sections 3.2 and 3.3 in the paper).


3. Revisions: Alternative LMER models with four fixed factors

3.1. Statistics: step()-tests

Reviewer 2 suggested: “I recommend that the authors include an interaction between vowel and dialect as a predictor in the regression model(s)”, since (citing the editor) “differences in nouns/adjectives versus verbs could be because of different proportions of /e/ and /o/ in the lexical categories”.

A note: our models had “(lexical) stem” as a RF, so part of variation between /e/ and /o/-stemmed words was already considered in our previous models.

library(lmerTest)
library(emmeans)

D %>%
    lmer(X0 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), .) %>% step()
## boundary (singular) fit: see help('isSingular')
## Backward reduced random-effect table:
## 
##                           Eliminated npar logLik   AIC     LRT Df Pr(>Chisq)
## <none>                                 34 -17641 35349                      
## Region in (Region | Stem)          0   29 -18485 37028 1688.42  5  < 2.2e-16
## MET in (MET | speaker)             0   32 -17725 35513  168.03  2  < 2.2e-16
##                              
## <none>                       
## Region in (Region | Stem) ***
## MET in (MET | speaker)    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                            Eliminated Sum Sq Mean Sq NumDF   DenDF F value
## Region:Gram:MET:Stem_vowel          1  1.094  0.5472     2 16268.1  1.2699
## Region:Gram:Stem_vowel              2  0.707  0.3534     2    48.7  0.8202
## Region:MET:Stem_vowel               3  2.507  1.2534     2  8792.5  2.9090
## Region:Stem_vowel                   4  0.994  0.4968     2    49.3  1.1528
## Region:Gram:MET                     0 37.980 18.9900     2 16721.5 44.0694
## Gram:MET:Stem_vowel                 0  9.131  9.1311     1 16968.7 21.1902
##                               Pr(>F)    
## Region:Gram:MET:Stem_vowel   0.28087    
## Region:Gram:Stem_vowel       0.44634    
## Region:MET:Stem_vowel        0.05458 .  
## Region:Stem_vowel            0.32412    
## Region:Gram:MET            < 2.2e-16 ***
## Gram:MET:Stem_vowel        4.189e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## X0 ~ Region + Gram + MET + Stem_vowel + (Region | Stem) + (MET | speaker) + Region:Gram + Region:MET + Gram:MET + Gram:Stem_vowel + MET:Stem_vowel + Region:Gram:MET + Gram:MET:Stem_vowel
D %>%
    lmer(X1 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), .) %>% step()
## Backward reduced random-effect table:
## 
##                           Eliminated npar  logLik    AIC    LRT Df Pr(>Chisq)
## <none>                                 34 -1078.2 2224.3                     
## Region in (Region | Stem)          0   29 -1349.9 2757.8 543.46  5  < 2.2e-16
## MET in (MET | speaker)             0   32 -1130.8 2325.6 105.34  2  < 2.2e-16
##                              
## <none>                       
## Region in (Region | Stem) ***
## MET in (MET | speaker)    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##                            Eliminated  Sum Sq Mean Sq NumDF   DenDF F value
## Region:Gram:MET:Stem_vowel          1 0.29073 0.14537     2 14471.9  2.3177
## Region:Gram:Stem_vowel              2 0.08186 0.04093     2    42.9  0.6525
## Region:Gram:MET                     0 2.16861 1.08431     2 14280.0 17.2870
## Region:MET:Stem_vowel               0 0.43591 0.21795     2  4109.7  3.4748
## Gram:MET:Stem_vowel                 0 0.69124 0.69124     1 16934.8 11.0204
##                               Pr(>F)    
## Region:Gram:MET:Stem_vowel  0.098538 .  
## Region:Gram:Stem_vowel      0.525815    
## Region:Gram:MET            3.173e-08 ***
## Region:MET:Stem_vowel       0.031058 *  
## Gram:MET:Stem_vowel         0.000903 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## X1 ~ Region + Gram + MET + Stem_vowel + (Region | Stem) + (MET | speaker) + Region:Gram + Region:MET + Gram:MET + Region:Stem_vowel + Gram:Stem_vowel + MET:Stem_vowel + Region:Gram:MET + Region:MET:Stem_vowel + Gram:MET:Stem_vowel

So the stem vowel seems to play a role (3-way interaction between Gram, MET, and Stem vowel).

3.2. LMER: anova() and post-hoc tests

k0:

library(lmerTest)
library(emmeans)

D %>% lmer(X0 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                             Sum Sq Mean Sq NumDF   DenDF  F value    Pr(>F)    
## Region                       0.681   0.341     2    54.2   0.7904 0.4588286    
## Gram                         3.757   3.757     1    53.6   8.7195 0.0046645 ** 
## MET                         71.778  71.778     1   209.5 166.5977 < 2.2e-16 ***
## Stem_vowel                   3.233   3.233     1    53.6   7.5031 0.0083462 ** 
## Region:Gram                  0.063   0.032     2    50.7   0.0735 0.9292099    
## Region:MET                   6.014   3.007     2   205.2   6.9798 0.0011679 ** 
## Gram:MET                   134.149 134.149     1 17067.4 311.3640 < 2.2e-16 ***
## Region:Stem_vowel            0.692   0.346     2    50.5   0.8036 0.4533592    
## Gram:Stem_vowel              0.779   0.779     1    53.6   1.8090 0.1842884    
## MET:Stem_vowel               6.109   6.109     1 16963.9  14.1788 0.0001668 ***
## Region:Gram:MET             38.404  19.202     2 16462.3  44.5686 < 2.2e-16 ***
## Region:Gram:Stem_vowel       0.471   0.236     2    50.5   0.5469 0.5821348    
## Region:MET:Stem_vowel        0.863   0.432     2 16268.1   1.0021 0.3671378    
## Gram:MET:Stem_vowel          9.816   9.816     1 16960.0  22.7821  1.83e-06 ***
## Region:Gram:MET:Stem_vowel   1.094   0.547     2 16268.0   1.2700 0.2808572    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can apply the same model also to k1:

library(lmerTest)
library(emmeans)

D %>%
    lmer(X1 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>% anova()
## Type III Analysis of Variance Table with Satterthwaite's method
##                            Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## Region                     0.4129  0.2065     2    82.2  3.2919  0.042144 *  
## Gram                       0.0049  0.0049     1    53.6  0.0775  0.781757    
## MET                        1.3028  1.3028     1   214.0 20.7718 8.695e-06 ***
## Stem_vowel                 0.0289  0.0289     1    53.5  0.4603  0.500385    
## Region:Gram                0.4166  0.2083     2    45.8  3.3208  0.044999 *  
## Region:MET                 0.6735  0.3368     2   205.9  5.3694  0.005332 ** 
## Gram:MET                   4.1091  4.1091     1 17016.9 65.5147 6.154e-16 ***
## Region:Stem_vowel          0.3069  0.1535     2    45.5  2.4469  0.097889 .  
## Gram:Stem_vowel            0.0016  0.0016     1    53.5  0.0262  0.871998    
## MET:Stem_vowel             0.1246  0.1246     1 16929.9  1.9859  0.158783    
## Region:Gram:MET            1.9617  0.9808     2 14763.1 15.6385 1.642e-07 ***
## Region:Gram:Stem_vowel     0.0683  0.0342     2    45.5  0.5448  0.583713    
## Region:MET:Stem_vowel      0.1010  0.0505     2 14473.9  0.8051  0.447058    
## Gram:MET:Stem_vowel        0.5526  0.5526     1 16924.1  8.8113  0.002998 ** 
## Region:Gram:MET:Stem_vowel 0.2907  0.1454     2 14471.9  2.3176  0.098543 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The 4-way interaction is never significant, but the interaction between Gram, MET, and stem vowel is significant for both models.

Post-hoc tests for k0:

#emm_options(pbkrtest.limit = 17259)

D %>%
  lmer(X0 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
   emmeans(.,  ~ Stem_vowel * MET|Gram) %>%
  pairs(., simple = "each", combine=T) %>%
  as.data.frame() %>%
  filter(p.value <= 0.05) %>%
  as_tibble() %>%
  mutate(p.value = round(p.value, 4)) %>%
  print(., n = nrow(.), max_extra_cols = ncol(.))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## NOTE: Results may be misleading due to involvement in interactions
## # A tibble: 6 × 9
##   MET   Gram  Stem_vowel contrast     estimate     SE    df z.ratio p.value
##   <chr> <chr> <chr>      <chr>           <dbl>  <dbl> <dbl>   <dbl>   <dbl>
## 1 FALSE NAdj  .          e - o          -0.804 0.109    Inf   -7.36  0     
## 2 TRUE  NAdj  .          e - o          -0.472 0.109    Inf   -4.33  0.0002
## 3 .     NAdj  e          FALSE - TRUE    0.542 0.0308   Inf   17.6   0     
## 4 .     NAdj  o          FALSE - TRUE    0.874 0.0275   Inf   31.8   0     
## 5 TRUE  .     e          NAdj - V       -0.925 0.233    Inf   -3.96  0.0009
## 6 TRUE  .     o          NAdj - V       -0.690 0.208    Inf   -3.32  0.0109

There are differences between F1 means in /e/ and /o/ in both metaphonic and non-metaphonic contexts. However, such differences are significant only for N/Adj, not for verbs.

Same post-hoc texts for k1:

#emm_options(pbkrtest.limit = 17259)

D %>%
  lmer(X1 ~ Region * Gram * MET * Stem_vowel + (Region | Stem) + (MET| speaker), 
         control = lmerControl(
                           optimizer ='optimx', optCtrl=list(method='nlminb')), .) %>%
   emmeans(.,  ~ Stem_vowel * MET|Gram) %>%
  pairs(., simple = "each", combine=T) %>%
  as.data.frame() %>%
  filter(p.value <= 0.05) %>%
  as_tibble() %>%
  mutate(p.value = round(p.value, 4)) %>%
  print(., n = nrow(.), max_extra_cols = ncol(.))
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'pbkrtest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(pbkrtest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
## To enable adjustments, add the argument 'lmerTest.limit = 17259' (or larger)
## [or, globally, 'set emm_options(lmerTest.limit = 17259)' or larger];
## but be warned that this may result in large computation time and memory use.
## NOTE: Results may be misleading due to involvement in interactions
## # A tibble: 2 × 9
##   MET   Gram  Stem_vowel contrast     estimate      SE    df z.ratio p.value
##   <chr> <chr> <chr>      <chr>           <dbl>   <dbl> <dbl>   <dbl>   <dbl>
## 1 .     NAdj  e          FALSE - TRUE   0.139  0.0112    Inf   12.4        0
## 2 .     NAdj  o          FALSE - TRUE   0.0741 0.00987   Inf    7.50       0

To sum-up : there are some differences in vowel height/F1 mean for /e/ vs /o/ in N/Adj only, no matter the suffix context. The verbs are not affected by these. So the results about differences in metaphony between N/Adj and verbs are still valid.

4. Relationship between stem and suffix vowel height

4.1 Data preparation

suff.df contains mean values (across all time points) of suffix vowels, when they were not deleted.

suff.df = read.table("suff_mean.txt")

We only wish to analyse nouns and adjectives (Nadj)

d.df = D_basic %>% 
  filter(Gram == "NAdj")

Join suff.df to d.df

# Rename the variables so they have
# different names from `D_basic`
sx.df = suff.df %>%
  rename(F1nx = F1n, F1x = F1) %>%
  select(bundle, F1nx, F1x)

# Join to D_basic
f.df  = left_join(d.df, sx.df, by = "bundle")
# extract at temporal midpoint
f5.df = f.df %>% filter(times_norm== .5)
# should have same number of rows as bundle count
nrow(f5.df) == length(unique(d.df$bundle))
## [1] TRUE
# proportion of non-existent suffixes (36%)
sum(is.na(f5.df$F1nx))/nrow(f5.df)
## [1] 0.3598552

4.2 Regression of F1 at stem midpoint and F1 in suffix vowel

We wish to determine whether there is a predictable relationship between F1-stem and F1-suffix within each suffix vowel.

Remove ‘ZZ’

f5.df = f5.df %>% 
  filter(Region!="ZZ") %>%
  mutate(Region = factor(Region))

An information for the method part: How many tokens with phonetically realised suffixes?

f5.df %>% filter(Trig %in% c("a", "e", "i", "u")) %>% nrow()
## [1] 8223

NB: the statistical models below ignore rows for which there are no suffix vowel formant data.

This is the model:

f5.lmer = f5.df %>%
      lmer(F1n ~ F1nx * Region  * Suffix_vowel +  
         + (Region|Stem) +
           (1|speaker), .) 
anova(f5.lmer)
## Type III Analysis of Variance Table with Satterthwaite's method
##                          Sum Sq Mean Sq NumDF  DenDF  F value    Pr(>F)    
## F1nx                      22.49  22.485     1 6902.7  83.1346 < 2.2e-16 ***
## Region                     0.47   0.470     1   57.3   1.7391  0.192503    
## Suffix_vowel             329.01 109.670     3 7541.0 405.4817 < 2.2e-16 ***
## F1nx:Region                1.98   1.983     1 6916.6   7.3331  0.006786 ** 
## F1nx:Suffix_vowel          0.75   0.250     3 7539.6   0.9234  0.428475    
## Region:Suffix_vowel       58.46  19.486     3 6462.6  72.0448 < 2.2e-16 ***
## F1nx:Region:Suffix_vowel   0.81   0.268     3 7549.6   0.9923  0.395255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test slopes for significance together with F1nx interaction with Region:

emtrends(f5.lmer, pairwise ~ Region, by = 'Suffix_vowel', var = 'F1nx')  %>%
  # or e.g. rbind(adjust = "bonferroni") %>%
  rbind(adjust = "sidak") %>%
 summary(., infer=T)
##  Suffix_vowel Region contrast F1nx.trend     SE  df asymp.LCL asymp.UCL z.ratio
##  a            MM     .           0.10909 0.0372 Inf   0.00275     0.215   2.932
##  a            MZ     .           0.05602 0.0218 Inf  -0.00631     0.118   2.569
##  e            MM     .           0.11917 0.0410 Inf   0.00208     0.236   2.909
##  e            MZ     .           0.11566 0.0285 Inf   0.03412     0.197   4.054
##  i            MM     .           0.15915 0.0418 Inf   0.03968     0.279   3.807
##  i            MZ     .           0.05084 0.0209 Inf  -0.00884     0.111   2.435
##  u            MM     .           0.17902 0.0374 Inf   0.07222     0.286   4.790
##  u            MZ     .           0.08470 0.0238 Inf   0.01673     0.153   3.561
##  a            .      MM - MZ     0.05307 0.0431 Inf  -0.07014     0.176   1.231
##  e            .      MM - MZ     0.00351 0.0499 Inf  -0.13910     0.146   0.070
##  i            .      MM - MZ     0.10831 0.0467 Inf  -0.02523     0.242   2.318
##  u            .      MM - MZ     0.09432 0.0443 Inf  -0.03225     0.221   2.130
##  p.value
##   0.0397
##   0.1159
##   0.0427
##   0.0006
##   0.0017
##   0.1649
##   <.0001
##   0.0044
##   0.9480
##   1.0000
##   0.2196
##   0.3331
## 
## Degrees-of-freedom method: asymptotic 
## Confidence level used: 0.95 
## Conf-level adjustment: sidak method for 12 estimates 
## P value adjustment: sidak method for 12 tests

Here is a plot (Figure 7 in the paper):

theme_phon = 
function(atextx = 18, atexty = 18, atitlex=
                 18, atitley = 18, ptitle=24, strip.text.x = 24,
             lpos = "bottom", othertext=24, ltitle =24){
        theme(
            axis.text.x = element_text(size=atextx),
            axis.title.x = element_text(size=atitley),
            axis.text.y = element_text(size=atexty),
            axis.title.y = element_text(size=atitley),
            plot.title = element_text(size = ptitle),
            legend.position=lpos,
            legend.title = ltitle,
            strip.text.x =element_text(size=strip.text.x),
            text = element_text(size=othertext))
}
n =30; k = 22; i = 20; textsize = 30

col.values = c(rep("darkgreen", 2), 
               rep("red", 2), rep("blue", 2), rep("black", 2))
lty.values = rep(c(1, 3), 4)
lwd = 1.5

regmodel = f5.lmer %>%  
  emmip(., Region * Suffix_vowel ~ F1nx, 
      cov.reduce = range, plotit=F) %>%
  
  emmip_ggplot(., Region * Suffix_vowel ~ F1nx, 
      cov.reduce = range, 
      linearg=list(linewidth=lwd))  +
  scale_color_manual(values = col.values) +
  scale_linetype_manual(values = lty.values) +
  ylab("Normalized F1 in stem vowel") +
  xlab("Normalized F1 in suffix vowel") +
theme_light() +
  theme_phon(lpos="top",
             atextx=i,
             atexty=i,
             atitlex=k,
             atitley=k,
             strip.text.x = k,
             ptitle=n,
             ltitle=element_blank(),
             othertext=n) 

ggsave(filename = "regression.png", 
    plot = regmodel, 
      width = 20, 
       height = 20, 
   units = "cm")
regmodel