library(tidyverse)
library(gridExtra)

base_directory = "~/ABM-Archiv/2022_23"

df_sd = read_csv(file.path(base_directory, "01_input_data/df_sd-stops-real_words-normal_and_fast.e92a42b.csv"))
df_wb = read_csv(file.path(base_directory, "01_input_data/df_wb-stops-real_words-normal_and_fast.0136fb4.csv"))

df = rbind(df_sd, df_wb) %>%
  mutate (release_type = case_when(
    following_sound %in% c("6", "@", "i", "I") ~ "oral",
    following_sound %in% c("m", "n", "N") ~ "nasal",
    following_sound %in% c("_h", "_t", "h") & second_next_sound %in% c("6", "@", "i", "I") ~ "oral",
    following_sound %in% c("_h", "_t", "h") & second_next_sound %in% c("m", "n", "N") ~ "nasal",
    following_sound %in% c("_h") & second_next_sound %in% c("l") ~ "lateral"
  )) %>%
  mutate (legal = case_when(variety == "SD" ~ "legal",
                            variety == "WB" ~ recode(category,
                                                     "V:C" = "legal",
                                                     "VC:" = "legal",
                                                     "VC" = "illegal",
                                                     "V:C:" = "illegal"))) %>%
  mutate (speaker_group = paste0(age, "_", variety)) %>%
  mutate (speaker_group_label = factor(speaker_group,
                                       levels = c("Y_SD", "Y_WB", "O_WB"),
                                       labels = c("Standard German", "Dialect, younger", "Dialect, older"))) %>% 
  mutate (standard_dialect_continuum = case_when(
    age == "O" & variety == "WB" ~ 1,
    age == "Y" & variety == "WB" ~ 2,
    age == "Y" & variety == "SD" ~ 3
  )) %>%
  mutate (consonant_strength = recode(category,
                                      "V:C"  = "Lenis",
                                      "VC"   = "Lenis",
                                      "V:C:" = "Fortis",
                                      "VC:"  = "Fortis")) %>%
  mutate (vowel_length = recode(category,
                                "V:C"  = "Long",
                                "VC"   = "Short",
                                "V:C:" = "Long",
                                "VC:"  = "Short")) %>% 
  mutate (rate = recode(rate,
                        "normal" = "normal",
                        "schnell" = "fast")) %>% 
  mutate (rate = factor(rate,
                        ordered = T,
                        levels = c("normal", "fast")))

df_normal = df %>% 
  filter(rate == "normal")

df_fast = df %>% 
  filter(rate == "fast")


n_colors = 2
color_red  = hcl(h = seq(15, 375, length = n_colors + 1), l = 65, c = 100)[1]
color_blue = hcl(h = seq(15, 375, length = n_colors + 1), l = 65, c = 100)[2]





df_shortshort = df %>%
  filter(category == "VC",
         speaker_group != "Y_WB"
         ) %>%
  mutate(speaker_group = recode(speaker_group,
                                "O_WB" = "Bavarian (older)",
                                "Y_SD" = "Standard German"))

df_control_values = df %>%
    filter(category == "VC",
           speaker_group == "Y_WB") %>%
    group_by(rate) %>%
    summarize(mean = mean(dur_c))

print(df_control_values)


png ("figure2.png", width=4800, height=2962+843, res = 600)

plot_theme =
  theme_bw() +
  theme(title = element_text(size = 14),
        text = element_text(size = 16),
        axis.text = element_text(size = 16),
        panel.grid.major = element_line(linewidth = 1))

p1 = df_shortshort %>%
  ggplot() +
  plot_theme +
  theme(legend.position = "top") +
  aes(x = dur_c, color = speaker_group, linetype = rate) +
  ggtitle("Distribution of stop closure duration by speaker group and speech rate") +
  xlab("Stop closure duration [ms]") +
  ylab("Probability density") +
  scale_x_continuous(limits = c(0, 200)) +
  scale_y_continuous(labels = c(), breaks = c()) +
  scale_color_manual(name = "",
                     limits = c("Standard German", "Bavarian (older)", "Bavarian (younger)"),
                     values = c("Standard German" = color_red, "Bavarian (older)" = color_blue, "Bavarian (younger)" = "black")) +
  scale_linetype_discrete(name = "", breaks = c("fast"="fast", "normal"="normal")) +
  geom_density(linewidth = 1) +
  geom_vline(xintercept = df_control_values[df_control_values$rate=="normal",]$mean, linetype = "solid", color = "black", linewidth = 1) +
  geom_vline(xintercept = df_control_values[df_control_values$rate=="fast",]$mean, linetype = "dotted", color = "black", linewidth = 1)


df_summary = df_shortshort %>%
    filter (rate == "normal") %>%
    group_by(speaker_group, rate) %>%
    summarize(mean = mean(dur_c),
              sd = sd(dur_c))

print(df_summary)

p2 = ggplot(df_summary) +
  plot_theme +
  theme(legend.position = "none") +
  aes(x = speaker_group, color = speaker_group) +
  geom_boxplot(aes(lower  = mean,
                   upper  = mean,
                   middle = mean,
                   ymin   = mean-sd,
                   ymax   = mean+sd),
               stat = "identity",
               linewidth = 1) +
  scale_x_discrete(labels = c(), breaks = c()) +
  scale_y_continuous(limits = c(0, 200)) +
  scale_color_manual(name = "", values = c("Standard German" = color_red, "Bavarian (older)" = color_blue)) +
  xlab(" ") +
  coord_flip()


grid.arrange(p1, p2,
             ncol = 1,
             nrow = 2,
             heights = c(2962, 843))


dev.off()
