This are the commands for analysing the secondary data associated with this publication.
Load libraries and data:
library(emuR)
library(tidyverse)
library(gridExtra)
library(lmerTest)
library(emmeans)
# Secondary dataset: change the path to the location
# of prefix.df.txt and of spectral.df.txt created by primary.Rmd
= "/Volumes/vdata/ERC3/Kenya/secondary"
pfad2 # Second dataset for storing figures: create a directory for storing the figures
= file.path(pfad2, "figs")
pfad3
# read in data-frame derived from primary dataset
= read.table(file.path(pfad2, "prefix.df.txt"), fill=T, sep="")
prefix.df # read in spectral data needed for fig. 2
= read.table(file.path(pfad2, "spectral.df.txt"), fill=T, sep="") spectral.df
Check distribution of stems by dialect
%>%
prefix.df group_by(stem, dial) %>%
summarise(count = n()) %>%
ungroup() %>%
pivot_wider(names_from = dial,
values_from = count) %>%
as.data.frame()
## `summarise()` has grouped output by 'stem'. You can override using the
## `.groups` argument.
## stem Ch Im Tg
## 1 ara 114 135 147
## 2 EBa 113 130 102
## 3 imba 117 139 146
## 4 ongo 129 137 154
## 5 ora 124 133 150
Recode the dialect labels and some place labels and some stem labels.
= prefix.df %>%
prefix.df mutate(dial =
case_when(
== "Ch" ~ "Chuka",
dial == "Im" ~ "Imenti",
dial == "Tg" ~ "Tigania"),
dial labels =
case_when(labels == "b" ~ "bj",
== "β" ~ "βj",
labels == "v" ~ "vj",
labels ~ as.character(labels)),
T labels = factor(labels,
levels = c("bj", "βj", "vj",
"ð", "s", "ts",
"ʃ", "tʃ", "ɕ",
"tɕ","c","k")),
stem =
case_when(stem == "EBa" ~ "eβa",
== "ongo" ~ "oŋgo",
stem ~ as.character(stem)),
T stem = factor(stem))
= c("red", "black", "slategray")
cols = prefix.df %>%
fig1 filter(category=="pl") %>%
+
ggplot aes(fill = dial, x = labels) +
geom_bar() +
xlab("") +
ylab("Number of occurrences") +
theme(axis.text = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_fill_manual(values = cols)
fig1
Figure 2.1: Distribution of phonetic variants of plural markers by dialect.
ggsave(filename = file.path(pfad3, "fig1.png"),
plot = fig1,
width = 12,
height = 8,
units = "cm")
It’s evident from the above that Chuka marks the plurals with a lingual consonant, Imenti with a labial, and Tigania with both. This is shown more clearly by collapsing the transcriptions into categories of labial = /b,β, v/ and (other) lingual consonants:
=
prefix.df %>%
prefix.df mutate(lab2 =
case_when(labels %in% c("bj", "βj","vj") ~ "labial",
TRUE ~ "lingual"),
lab2 = factor(lab2, levels = c("labial", "lingual")))
The issue is why Tigania seems to have a split between labial and lingual consonants in their plural prefixes. Could it be due to sex, age, or the interviewer? For this purpose, create a new df of the plurals in Tigania only (age was also grouped into five categories with equal numbers per category although this was not used in any plot).
= prefix.df %>%
tig.df filter(dial == "Tigania") %>%
filter(category == "pl")
There’s no influence of sex or age on the choice of labial vs. lingual in marking the plural prefix (just state this without any figures). However, there is an influence of the interviewer i.e. whether the interviewer was of a first language Imenti background vs. a local from the Tigania region.
= c("black", "slategray")
cols = tig.df %>%
fig2 mutate(interviewer =
case_when(interviewer == "native" ~ "Loc",
~ "Im")) %>%
T +
ggplot aes(fill = lab2, x = interviewer) +
facet_wrap(~stem, ncol=5) +
geom_bar(position="fill") +
xlab("") +
theme(axis.text = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_fill_manual(values = cols) +
ylab("Proportion") +
xlab("Interviewer background")
fig2
Figure 2.2: Proportion of labials and linguals for marking plurals in stem by interviewer background.
ggsave(filename = file.path(pfad3, "fig2.png"),
plot = fig2,
width = 12,
height = 8,
units = "cm")
The further issue is whether the division between labials and linguals is between speakers (one group of speakers uses labials, the other linguals) or within speakers (a given speaker varies freely in marking the plural between labial and lingual). This was analysed by counting by stem (i) the number of speakers who used both a labial and lingual for the same stem as opposed to (ii) those who mark a stem plural only with a labial and (iii) those speakers who mark a stem plural only with a lingual. This was also done separately for those speakers who were interviewed by the Imenti and local interviewer.
= tig.df %>%
tig2.df mutate(interviewer =
case_when(interviewer == "native" ~ "Local interviewer",
~ "Imenti interviewer")) %>%
T group_by(stem, spk, lab2, interviewer) %>%
summarise(count = n_distinct(spk)) %>%
ungroup() %>%
pivot_wider(names_from = lab2,
values_from = count) %>%
mutate(place =
case_when(lingual == 1 & labial == 1 ~ "both",
is.na(lingual) & labial == 1 ~ "labial-only",
is.na(labial) & lingual == 1 ~ "lingual-only"),
place = factor(place,
levels =
c("labial-only",
"both", "lingual-only"))) %>%
select(stem, place, interviewer)
## `summarise()` has grouped output by 'stem', 'spk', 'lab2'. You can override
## using the `.groups` argument.
= c("black", "red", "slategray")
cols = tig2.df %>%
fig3 +
ggplot aes(x = stem, fill = place) +
facet_wrap(~interviewer) +
geom_bar() +
ylab("Number of speakers") +
xlab("Stem") +
theme(axis.text = element_text(size=11),
axis.text.x = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_fill_manual(values = cols)
fig3
ggsave(filename = file.path(pfad3, "fig3.png"),
plot = fig3,
width = 12,
height = 8,
units = "cm")
The leftmost bar of the above plot shows, for example, that 2 speakers (in blue) always used a lingual in marking a plural of the /ara/ stem, 2 varied between using a labial and lingual, and 6 marked the /ara/plural with a labial only.
Overall, this plot shows:
The next part is concerned with determining how and whether there are differences between Chuka and Tigania for plurals marked with linguals (i.e. non-labials).
# extract only linguals in plurals
=
cor.df %>%
prefix.df filter((dial!="Imenti") &
== "lingual") &
(lab2 == "pl")) %>%
(category mutate(labels = factor(labels))
The following plot (see also Fig. 1) shows that there seems to be preference in Tigania for marking plurals with a dorsal component i.e. with /tɕ, ɕ/ whereas Chuka prefers non-dorsal i.e. apical/laminal /ts, s, tʃ, ʃ/. In the analysis below, the first group is referred to as dorsal and the second (which also includes the rare occurrence of /ð/) as apical. The following shows the proportion of Chuka and Tigania speakers who used an apical vs. dorsal separately for the five stems.
= cor.df %>%
cor.df mutate(lab3 =
case_when(labels %in%
c("ð", "ts" , "s" , "tʃ", "ʃ") ~
"ap", TRUE ~ "do"),
lab3 = factor(lab3,
levels = c("ap", "do")))
= c("red", "slategray")
cols = cor.df %>%
fig4 +
ggplot aes(x = lab3, fill = dial) +
geom_bar(position="fill") +
facet_wrap(~stem, ncol=5) +
xlab("") +
theme(axis.text = element_text(size=11),
axis.text.x = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_fill_manual(values = cols) +
ylab("Proportion")
fig4
ggsave(filename = file.path(pfad3, "fig4.png"),
plot = fig4,
width = 12,
height = 8,
units = "cm")
The pattern is similar across all five stems: Chuka prefers to mark plurals with apicals and Tigania with dorsals. The issue is once again whether this preference is between speakers (ie. some Chuka speakers always use apicals while others always use dorsals) or instead within speakers (a speaker varies freely between apicals and dorsals in marking plurals). The following plot counts the number of speakers by dialect and separately by stem who (i) vary freely in marking plurals of a given stem with apicals or dorsals as opposed to (ii) those who always used an apical or (iii) always used a dorsal.
= cor.df %>%
cor.df group_by(stem, lab3, spk, dial) %>%
summarise(count = n_distinct(spk)) %>%
ungroup() %>%
pivot_wider(names_from = lab3,
values_from = count) %>%
mutate(place =
case_when(ap == 1 & do == 1 ~ "both",
is.na(ap) & do == 1 ~ "dorsal-only",
is.na(do) & ap == 1 ~ "apical-only")) %>%
select(stem, place, dial)
## `summarise()` has grouped output by 'stem', 'lab3', 'spk'. You can override
## using the `.groups` argument.
= c("black", "red", "slategray")
cols =
fig5 %>%
cor.df +
ggplot aes(x = stem, fill = place) +
facet_wrap(~dial) +
geom_bar() +
ylab("Number of speakers") +
xlab("Stem") +
theme(axis.text = element_text(size=11),
axis.text.x = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_fill_manual(values = cols)
fig5
ggsave(filename = file.path(pfad3, "fig5.png"),
plot = fig5,
width = 12,
height = 8,
units = "cm")
The above plot shows the following:
First list what we have:
%>%
prefix.df filter(labels %in% c("tʃ", "ʃ", "tɕ", "ɕ"))%>%
mutate(labels = factor(labels)) %>%
select(labels, dial) %>%
table()
## dial
## labels Chuka Imenti Tigania
## ʃ 18 0 1
## tʃ 17 0 33
## ɕ 64 0 36
## tɕ 57 5 118
In order to listen to these fricatives:
#serve(db, useViewer = F,
# seglist = prefix.df %>%
# filter(labels %in% c("tʃ", "tɕ", "ʃ", "ɕ" )))
Are /tʃ, ʃ/ really different from /tɕ,ɕ/? The region of interest is likely to be around 1-8 kHz. Also, there’s not point separating the fricatives from the affricates so the analysis will be of temporal midpoint of AP vs DO where AP = /tʃ, ʃ/ and DO = /tɕ,ɕ/ (I am assuming by the way that the annotations for tʃ, tɕ include only the fricative part, and not the preceding closure). The following computes aggregated spectra in this frequency range to compare AP = /tʃ, ʃ/ vs. DO = /tɕ,ɕ/ separately for Chuka and Tigania.
=
fig6 %>%
spectral.df +
ggplot aes(x = freq, y = dctsmooth, col = dorlabs) +
geom_line() +
# facet_wrap(~dial) +
xlab("Frequency (Hz)") +
ylab("Intensity (dB)") +
theme(axis.text = element_text(size=11),
axis.text.x = element_text(size=11),
axis.title.x = element_text(size=11),
axis.title.y = element_text(size=11),
text = element_text(size=11),
legend.title = element_blank(),
legend.position="top") +
scale_colour_manual(values = cols)
fig6
ggsave(filename = file.path(pfad3, "fig6.png"),
plot = fig6,
width = 12,
height = 8,
units = "cm")