prefix.df = query(db,
"[prefix == b|c|ç|ɕ|ð|k|K|p|s|ʃ|tɕ|ts|tʃ|v|ʔ|β
^ #MAU =~ .*]") %>%
mutate(
wordlabs = requery_hier(db, ., "ORT-MAU")$labels,
vlabs = requery_seq(db, ., 1)$labels,
vstart = requery_seq(db, ., 1)$start,
vend = requery_seq(db, ., 1)$end,
stemcons = requery_seq(db, ., 2)$labels,
dial= substring(bundle, 1, 2),
spk = substring(bundle, 1, 4),
sex= substring(bundle, 5, 5),
age= substring(session, 8, 9),
c_partner = substring(session, 13, 13),
pair = gsub('.{2}$', '', substring(bundle, 9)),
age_group= ifelse(age >40, "old", "young"),
word_morpho = recode(wordlabs,
"Biara" = "Bi.ara",
"BiEBa" = "Bi.EBa",
"Bieni" = "Bi.Eni",
"BiEni" = "Bi.Eni",
"Biimba" = "Bi.imba",
"Biongo" = "Bi.ongo",
"Biura" = "Bi.ura",
"Biora" = "Bi.ura",
"ciao" = "ciO",
"siao" = "siO",
"ciara" = "ci.ara",
"ciEBa" = "ci.EBa",
"cieni" = "ci.Eni",
"ciEni" = "ci.Eni",
"ciimba" = "ci.imba",
"cimba" = "ci.imba",
"ciola" = "ci.ora",
"ciongo" = "ci.ongo",
"ciora" = "ci.ora",
"ciura" = "ci.ora",
"siara" = "si.ara",
"sjara" = "si.ara",
"siEBa" = "si.EBa",
"siEni" = "si.Eni",
"siimba" = "si.imba",
"simba" = "si.imba",
"siola" = "si.ora",
"siora" = "si.ora",
"siura" = "si.ora",
"siongo" = "si.ongo"),
category= recode(wordlabs,
"Biara" = "pl",
"BiEBa" = "pl",
"Bieni" = "pl",
"BiEni" = "pl",
"Biimba" = "pl",
"Biongo" = "pl",
"Biura" = "pl",
"Biora" = "pl",
"ciao" = "sg",
"ciO" = "sg",
"siao" = "pl",
"siO" ="pl",
"ciara" = "sg",
"ciEBa" = "sg",
"cieni" = "sg",
"ciEni" = "sg",
"ciimba" = "sg",
"cimba" = "sg",
"ciola" = "sg",
"ciora" = "sg",
"ciongo" = "sg",
"ciora" = "sg",
"ciura" = "sg",
"siara" = "pl",
"sjara" = "pl",
"siEBa" = "pl",
"siEni" = "pl",
"siimba" = "pl",
"simba" = "pl",
"siola" = "pl",
"siora" = "pl",
"siura" = "pl",
"siongo" = "pl"),
stem= recode(word_morpho,
"Bi.ara" = "ara",
"Bi.EBa" = "EBa",
"Bi.Eni" = "Eni",
"Bi.imba" = "imba",
"Bi.ongo" = "ongo",
"Bi.ura" = "ora",
"ci.ara" = "ara",
"ci.EBa" = "EBa",
"ci.Eni" = "Eni",
"ci.imba" = "imba",
"ci.ora" = "ora",
"ci.ongo" = "ongo",
"si.ara" = "ara",
"si.EBa" = "EBa",
"si.Eni" = "Eni",
"si.imba" = "imba",
"si.ora" = "ora",
"si.ongo" = "ongo"),
pair= recode(pair,
"N02_ciola" = "N02_ciora",
"N02_ciura" = "N02_ciora",
"N05_cimba" ="N05_ciimba",
"N07_cieni"="N07_ciEni",
"N04_cieba" ="N04_ciEBa",
"u07_kwanda" ="kwanda",
"l21_kwanda" ="kwanda",
"l32_kudara" ="kudara"),
interviewer= recode(c_partner,
"C" = "native",
"K"="Imenti"),
labels = factor(labels, levels =
c("b","β","v", "ð",
"ts", "s", "tʃ" , "ʃ",
"tɕ", "ɕ" , "ç" , "c","k" ,"ʔ")))
Exclude /siO, ciO/ because there are so few of them and exclude stem /Eni/ because this only occurs in 2 dialects. Also exclude kudara kwanda and one with no name (““)
Also make age numeric.