1 Preliminaries

This are the commands for analysing the secondary data associated with this publication.

Load libraries and data:

library(emuR)
library(tidyverse)
library(gridExtra)
library(lmerTest)
library(emmeans)

# Secondary dataset: change the path to the location
# of prefix.df.txt and of spectral.df.txt created by primary.Rmd
pfad2 = "/Volumes/vdata/ERC3/Kenya/secondary"
# Second dataset for storing figures: create a directory for storing the figures
pfad3 = file.path(pfad2, "figs")

# read in data-frame derived from primary dataset
prefix.df = read.table(file.path(pfad2, "prefix.df.txt"), fill=T, sep="")
# read in spectral data needed for fig. 2
spectral.df = read.table(file.path(pfad2, "spectral.df.txt"), fill=T, sep="")

Check distribution of stems by dialect

prefix.df %>%
  group_by(stem, dial) %>%
  summarise(count = n()) %>%
  ungroup() %>%
pivot_wider(names_from = dial, 
            values_from = count) %>%
  as.data.frame()
## `summarise()` has grouped output by 'stem'. You can override using the
## `.groups` argument.
##   stem  Ch  Im  Tg
## 1  ara 114 135 147
## 2  EBa 113 130 102
## 3 imba 117 139 146
## 4 ongo 129 137 154
## 5  ora 124 133 150

2 Analysis of plurals

2.1 A comparison between the three varieties

Recode the dialect labels and some place labels and some stem labels.

prefix.df = prefix.df %>%
  mutate(dial =
           case_when(
             dial == "Ch" ~ "Chuka",
             dial == "Im" ~ "Imenti",
             dial == "Tg" ~ "Tigania"), 
         labels = 
           case_when(labels == "b" ~ "bj",
                     labels == "β" ~ "βj",
                     labels == "v" ~ "vj",
                     T ~ as.character(labels)),
         labels = factor(labels, 
                         levels = c("bj", "βj", "vj", 
                                    "ð", "s",  "ts", 
                                    "ʃ", "tʃ", "ɕ",
                                    "tɕ","c","k")),
         stem = 
           case_when(stem == "EBa" ~ "eβa",
                     stem == "ongo" ~ "oŋgo",
                     T ~ as.character(stem)), 
         stem = factor(stem))
cols = c("red", "black", "slategray")
fig1 = prefix.df %>%       
  filter(category=="pl") %>%
  ggplot +
  aes(fill = dial, x = labels) +
  geom_bar() +
  xlab("") +
  ylab("Number of occurrences") +
  theme(axis.text = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_fill_manual(values = cols) 
fig1
Distribution of phonetic variants of plural markers by dialect.

Figure 2.1: Distribution of phonetic variants of plural markers by dialect.

ggsave(filename = file.path(pfad3, "fig1.png"), 
       plot = fig1, 
       width = 12, 
       height = 8, 
       units = "cm")

It’s evident from the above that Chuka marks the plurals with a lingual consonant, Imenti with a labial, and Tigania with both. This is shown more clearly by collapsing the transcriptions into categories of labial = /b,β, v/ and (other) lingual consonants:

prefix.df = 
  prefix.df %>%
  mutate(lab2 = 
         case_when(labels %in% c("bj", "βj","vj") ~ "labial", 
                         TRUE ~ "lingual"), 
         lab2 = factor(lab2, levels = c("labial", "lingual")))

2.2 Analysis of plural prefixes in Tigania

The issue is why Tigania seems to have a split between labial and lingual consonants in their plural prefixes. Could it be due to sex, age, or the interviewer? For this purpose, create a new df of the plurals in Tigania only (age was also grouped into five categories with equal numbers per category although this was not used in any plot).

tig.df = prefix.df %>%
  filter(dial == "Tigania") %>%
  filter(category == "pl") 

There’s no influence of sex or age on the choice of labial vs. lingual in marking the plural prefix (just state this without any figures). However, there is an influence of the interviewer i.e. whether the interviewer was of a first language Imenti background vs. a local from the Tigania region.

cols = c("black", "slategray")
fig2 = tig.df  %>%
  mutate(interviewer =
           case_when(interviewer == "native" ~ "Loc", 
                     T ~ "Im")) %>%
  ggplot +
  aes(fill = lab2, x = interviewer) +
  facet_wrap(~stem, ncol=5) + 
  geom_bar(position="fill") +
  xlab("") +
  theme(axis.text = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_fill_manual(values = cols) +
 ylab("Proportion") +
  xlab("Interviewer background")
fig2
Proportion of labials and linguals for marking plurals in stem by interviewer background.

Figure 2.2: Proportion of labials and linguals for marking plurals in stem by interviewer background.

ggsave(filename = file.path(pfad3, "fig2.png"), 
       plot = fig2, 
       width = 12, 
       height = 8, 
       units = "cm")

The further issue is whether the division between labials and linguals is between speakers (one group of speakers uses labials, the other linguals) or within speakers (a given speaker varies freely in marking the plural between labial and lingual). This was analysed by counting by stem (i) the number of speakers who used both a labial and lingual for the same stem as opposed to (ii) those who mark a stem plural only with a labial and (iii) those speakers who mark a stem plural only with a lingual. This was also done separately for those speakers who were interviewed by the Imenti and local interviewer.

tig2.df = tig.df %>%
  mutate(interviewer =
           case_when(interviewer == "native" ~ "Local interviewer", 
                     T ~ "Imenti interviewer")) %>%
  group_by(stem, spk, lab2, interviewer) %>%
  summarise(count = n_distinct(spk)) %>%
  ungroup() %>%
     pivot_wider(names_from = lab2, 
                 values_from = count) %>%
  mutate(place =
           case_when(lingual == 1 & labial == 1 ~ "both",
                     is.na(lingual) & labial == 1 ~ "labial-only",
                     is.na(labial) & lingual == 1 ~ "lingual-only"),
         place = factor(place, 
                        levels = 
                          c("labial-only", 
                            "both", "lingual-only"))) %>%
  select(stem, place, interviewer) 
## `summarise()` has grouped output by 'stem', 'spk', 'lab2'. You can override
## using the `.groups` argument.
cols = c("black", "red", "slategray")
fig3 = tig2.df %>%
  ggplot +
  aes(x = stem, fill = place) +
  facet_wrap(~interviewer) +
  geom_bar() +
  ylab("Number of speakers") +
  xlab("Stem") +
  theme(axis.text = element_text(size=11), 
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_fill_manual(values = cols)
fig3

ggsave(filename = file.path(pfad3, "fig3.png"), 
       plot = fig3, 
       width = 12, 
       height = 8, 
       units = "cm")

The leftmost bar of the above plot shows, for example, that 2 speakers (in blue) always used a lingual in marking a plural of the /ara/ stem, 2 varied between using a labial and lingual, and 6 marked the /ara/plural with a labial only.

Overall, this plot shows:

  • more speakers used labial-only when the experiment was conducted by the Imenti interviewer; and more speakers use lingual-only when the experiment was conducted by the local interviewer
  • some speakers (green) flip between labial and lingual in marking the plural.

3 Analysis of linguals: Tigania vs. Chuka

The next part is concerned with determining how and whether there are differences between Chuka and Tigania for plurals marked with linguals (i.e. non-labials).

# extract only linguals in plurals
cor.df =
  prefix.df %>%
  filter((dial!="Imenti") &
           (lab2 == "lingual") & 
           (category == "pl")) %>%
  mutate(labels = factor(labels))

The following plot (see also Fig. 1) shows that there seems to be preference in Tigania for marking plurals with a dorsal component i.e. with /tɕ, ɕ/ whereas Chuka prefers non-dorsal i.e. apical/laminal /ts, s, tʃ, ʃ/. In the analysis below, the first group is referred to as dorsal and the second (which also includes the rare occurrence of /ð/) as apical. The following shows the proportion of Chuka and Tigania speakers who used an apical vs. dorsal separately for the five stems.

cor.df = cor.df %>%
  mutate(lab3 = 
case_when(labels %in% 
            c("ð",  "ts"  , "s" , "tʃ",   "ʃ") ~ 
            "ap",  TRUE ~ "do"), 
         lab3 = factor(lab3, 
                       levels = c("ap", "do")))
cols = c("red", "slategray")
fig4 = cor.df  %>%
  ggplot +
  aes(x = lab3, fill = dial) +
  geom_bar(position="fill") +
  facet_wrap(~stem, ncol=5) + 
  xlab("") +
  theme(axis.text = element_text(size=11), 
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_fill_manual(values = cols) +
 ylab("Proportion") 
fig4

ggsave(filename = file.path(pfad3, "fig4.png"), 
       plot = fig4, 
       width = 12, 
       height = 8, 
       units = "cm")

The pattern is similar across all five stems: Chuka prefers to mark plurals with apicals and Tigania with dorsals. The issue is once again whether this preference is between speakers (ie. some Chuka speakers always use apicals while others always use dorsals) or instead within speakers (a speaker varies freely between apicals and dorsals in marking plurals). The following plot counts the number of speakers by dialect and separately by stem who (i) vary freely in marking plurals of a given stem with apicals or dorsals as opposed to (ii) those who always used an apical or (iii) always used a dorsal.

cor.df = cor.df %>%
  group_by(stem, lab3, spk, dial) %>%
  summarise(count = n_distinct(spk)) %>%
  ungroup() %>%
     pivot_wider(names_from = lab3, 
                 values_from = count) %>%
  mutate(place =
           case_when(ap == 1 & do == 1 ~ "both",
                     is.na(ap) & do == 1 ~ "dorsal-only",
                     is.na(do) & ap == 1 ~ "apical-only")) %>%
  select(stem, place, dial)
## `summarise()` has grouped output by 'stem', 'lab3', 'spk'. You can override
## using the `.groups` argument.
cols = c("black", "red", "slategray")
fig5 = 
cor.df %>%
  ggplot +
  aes(x = stem, fill = place) +
  facet_wrap(~dial) +
  geom_bar() +
  ylab("Number of speakers") +
  xlab("Stem") +
  theme(axis.text = element_text(size=11), 
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_fill_manual(values = cols)
fig5

ggsave(filename = file.path(pfad3, "fig5.png"), 
       plot = fig5, 
       width = 12, 
       height = 8, 
       units = "cm")

The above plot shows the following:

3.1 Further acoustic analysis of /ɕ ʃ tɕ tʃ

First list what we have:

prefix.df %>%
  filter(labels %in% c("tʃ", "ʃ", "tɕ", "ɕ"))%>%
  mutate(labels = factor(labels)) %>%
  select(labels, dial) %>%
  table()
##       dial
## labels Chuka Imenti Tigania
##     ʃ     18      0       1
##     tʃ    17      0      33
##     ɕ     64      0      36
##     tɕ    57      5     118

In order to listen to these fricatives:

#serve(db, useViewer = F, 
#      seglist =  prefix.df %>%
#  filter(labels %in% c("tʃ", "tɕ", "ʃ", "ɕ" )))

Are /tʃ, ʃ/ really different from /tɕ,ɕ/? The region of interest is likely to be around 1-8 kHz. Also, there’s not point separating the fricatives from the affricates so the analysis will be of temporal midpoint of AP vs DO where AP = /tʃ, ʃ/ and DO = /tɕ,ɕ/ (I am assuming by the way that the annotations for tʃ, tɕ include only the fricative part, and not the preceding closure). The following computes aggregated spectra in this frequency range to compare AP = /tʃ, ʃ/ vs. DO = /tɕ,ɕ/ separately for Chuka and Tigania.

fig6 = 
spectral.df %>%
  ggplot +
  aes(x = freq, y = dctsmooth, col = dorlabs) +
  geom_line() +
#  facet_wrap(~dial) +
  xlab("Frequency (Hz)") +
  ylab("Intensity (dB)") +
  theme(axis.text = element_text(size=11), 
        axis.text.x = element_text(size=11), 
        axis.title.x = element_text(size=11), 
        axis.title.y = element_text(size=11), 
        text = element_text(size=11),
        legend.title = element_blank(),
        legend.position="top") + 
  scale_colour_manual(values = cols)
fig6

ggsave(filename = file.path(pfad3, "fig6.png"), 
       plot = fig6, 
       width = 12, 
       height = 8, 
       units = "cm")