---
title: "Complete set of interaction plots"
author: "Markus Jochim and Felicitas Kleber"
date: "2022-07-08"
output:
  html_document:
    toc: true
    number_sections: true
    toc_float:
      collapsed: true
    df_print: paged
    code_folding: show
  html_notebook:
    toc: true
    number_sections: true
    toc_float:
      collapsed: true
    df_print: paged
    code_folding: show
---


```{r init, include=FALSE}
library(ggplot2)
library(gridExtra)
library(tidyverse)
library(data.table)

library(rlist)
source("modelScript/functions/simulations.R")

resultsDirectory = "logs/"

average_consonant_duration = read_csv("data/average_consonant_duration.csv")

plotParameterAverageAcrossTime = function(simulation_names,
                                          labeller,
                                          results_directory,
                                          run_id,
                                          central_tendency = "median",
                                          additional_plot_parts = NULL,
                                          snapshotStepSize = 1) {
  for (current_simulation_name in simulation_names) {
    current_parameter_set = get_params(results_directory, current_simulation_name)
    
    files = Sys.glob(paste0(results_directory, "/", current_simulation_name, "/", run_id, "/pop.*.rds"))

    files = tibble(path = files) %>%
      mutate (snapshot = sub(path, pattern = ".*pop\\.([0-9]*)\\.rds.*", replacement = "\\1"),
              snapshot = as.numeric(snapshot)) %>%
      filter (snapshot %% snapshotStepSize == 0)

    p <- rbindlist(lapply(files$path, function(x) {
      r <- readRDS(x)
      r[, `:=` (condition = condition * current_parameter_set$interactionsPerSnapshot)] #,
                # P1 = P1
      #)]
      r[valid == TRUE,]
    }))
    
    df.average <- p %>% group_by(group, initial, condition) %>% dplyr::summarise(P1_mean = mean(P1),
                                                                                 P1_q25  = quantile(P1, 0.25),
                                                                                 P1_q50  = quantile(P1, 0.50),
                                                                                 P1_q75  = quantile(P1, 0.75))

    p1 <- ggplot(df.average)
    
    if (!is.null(additional_plot_parts)) {
      for (part in additional_plot_parts) {
        p1 = p1 + part
      }
    }
    
    
    ylabel = switch (current_parameter_set$features,
                     "dur_c" = "Stop clousre duration [ms]",
                     "log_dur_c" = "Stop closure duration [ln(ms)]",
                     current_parameter_set$features)
    
    p1 <- p1 +
      aes(x = condition, color = group) +
      xlab("Interactions") +
      ylab(ylabel) +
      scale_x_continuous(labels = scales::comma) +
      theme(text = element_text(size = 16),
            legend.position = "top") +
      ggtitle(label    = paste0(current_simulation_name, " (", run_id, ")"),
              subtitle = labeller(current_parameter_set))
    
    if (central_tendency == "median") {
      p1 <- p1 +
        geom_line(aes(y = P1_q50), size = 1.2) +
        geom_line(aes(y = P1_q25), linetype = 2) +
        geom_line(aes(y = P1_q75), linetype = 2) +
        geom_point(aes(y = P1_q50), size = 1.2) +
        geom_point(aes(y = P1_q25), alpha = 1) +
        geom_point(aes(y = P1_q75), alpha = 1)
    } else {
      p1 <- p1 +
        geom_line(aes(y = P1_mean), size = 1.2) +
        geom_point(aes(y = P1_mean), size = 1.2)
    }
    
    print(p1)
  }
}

plotInputDistributionPerAgent = function(simulation_names,
                                         labeller,
                                         results_directory,
                                         run_id,
                                         additional_plot_parts = NULL) {
  for (current_simulation_name in simulation_names) {
    interactionLog <- readRDS(file.path(results_directory, current_simulation_name, run_id, "intLog.rds"))
    pop <- readRDS(file.path(results_directory, current_simulation_name, run_id, "pop.0.rds"))
    groups <- pop %>% group_by(agentID, group) %>% dplyr::summarise()

    x <- interactionLog %>%
      left_join(groups, by = c("producerID" = "agentID")) %>%
      rename(producerGroup = group) %>%
      left_join(groups, by = c("perceiverID" = "agentID")) %>%
      rename(perceiverGroup = group)

    barplot <- ggplot(x) +
      aes(x = producerGroup,
          fill = perceiverGroup) +
      geom_bar() +
      scale_y_continuous(name = "Number of tokens perceived", labels = scales::comma) +
      facet_wrap(~perceiverID)
    
    if (!is.null(additional_plot_parts)) {
      for (part in additional_plot_parts) {
        barplot = barplot + part
      }
    }

    print(barplot)


    dotplot <- x %>%
      dplyr::group_by(perceiverGroup, perceiverID) %>%
      dplyr::count(producerGroup) %>%
      tidyr::spread(key = producerGroup, value = n) %>%
      dplyr::mutate(totalInteractions = sum(SD + WB),
                    dialectProportion = WB / totalInteractions) %>%

      ggplot() +
      aes(x = perceiverGroup, y = dialectProportion) +
      geom_point()

    print(dotplot)
  }
}


## Prepare static parts that can later be added to plots (lines representing the control group of speakers)

youngdialect_fast_reference_values = average_consonant_duration %>% 
  filter(dataset == "dataset1", rate == "schnell", speaker_group == "Y_WB")

youngdialect_normal_reference_values = average_consonant_duration %>% 
  filter(dataset == "dataset1", rate == "normal", speaker_group == "Y_WB")

youngdialect_fast_reference_lines = c(
  geom_hline(yintercept = youngdialect_fast_reference_values$median),
  geom_hline(yintercept = youngdialect_fast_reference_values$quartile1, linetype = 2),
  geom_hline(yintercept = youngdialect_fast_reference_values$quartile3, linetype = 2))

youngdialect_normal_reference_lines = c(
  geom_hline(yintercept = youngdialect_normal_reference_values$median),
  geom_hline(yintercept = youngdialect_normal_reference_values$quartile1, linetype = 2),
  geom_hline(yintercept = youngdialect_normal_reference_values$quartile3, linetype = 2))

youngdialect_fast_log_reference_lines = c(
  geom_hline(yintercept = youngdialect_fast_reference_values$median_log),
  geom_hline(yintercept = youngdialect_fast_reference_values$quartile1_log, linetype = 2),
  geom_hline(yintercept = youngdialect_fast_reference_values$quartile3_log, linetype = 2))

youngdialect_normal_log_reference_lines = c(
  geom_hline(yintercept = youngdialect_normal_reference_values$median_log),
  geom_hline(yintercept = youngdialect_normal_reference_values$quartile1_log, linetype = 2),
  geom_hline(yintercept = youngdialect_normal_reference_values$quartile3_log, linetype = 2))


## Prepare static parts that can later be added to plots (legend)

agent_population_scale = scale_color_discrete(name = "Agent population",
                                              breaks = c("WB", "SD"),
                                              labels = c("Bavarian", "Standard German"))

perceiver_population_scale = scale_fill_discrete(name = "Agent-listener population",
                                                 breaks = c("WB", "SD"),
                                                 labels = c("Bavarian", "Standard German"))

producer_population_scale = scale_x_discrete(name = "Agent-speaker population",
                                             breaks = c("WB", "SD"),
                                             labels = c("Bav", "SG"))

```

# Description

This R notebook contains all simulation results referred to in Chapter 5.3 of Markus Jochim’s *Internal and external factors in a Bavarian sound change*. All simulations have been carried out with version 3bc0dfb of the ABM code. The code is available in the `modelscript/` subdirectory of this distribution of supplementary materials or at https://github.com/IPS-LMU/ABM/commit/3bc0dfb194738b614f9757728f44b091d3b53ccd.

The results of the simulations are stored in the `logs/` directory, which has eight subdirectories, according to the following code listing:

```{r}
maximum_contact_scenario_linear    = "ABM20200113145453"
maximum_contact_scenario_log       = "ABM20200113131820"
asymmetric_contact_scenario_linear = "ABM20200118120147"
asymmetric_contact_scenario_log    = "ABM20200116005311"
symmetric_contact_scenario_linear  = "ABM20200117151520"
symmetric_contact_scenario_log     = "ABM20200203161833"
null_contact_scenario_linear       = "ABM20200115170020"
null_contact_scenario_log          = "ABM20200115173019"
```

The parameters used during the simulations are found in the respective subdirectory in the file `params.yaml`. Some parameters were set by calculating a formula; in these cases, `params.yaml` contains only the value and the formula is given in this notebook.

# Maximum contact scenario

The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`.

## Linear dur_c

### Amount of input

```{r}
simulations = maximum_contact_scenario_linear

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = maximum_contact_scenario_linear

additional_plot_parts = c(
  youngdialect_normal_reference_lines,
  agent_population_scale
)

for (run_id in 1:10) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 25,
                                 labeller = function (currentParameterSet) {})
}
```

## log_dur_c

### Amount of input

```{r}
simulations = maximum_contact_scenario_log

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))

```

### Parameter over time

```{r}
simulations = maximum_contact_scenario_log

additional_plot_parts = c(
  youngdialect_normal_log_reference_lines,
  agent_population_scale
)

for (run_id in 1:10) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 25,
                                 labeller = function (currentParameterSet) {})
}
```


# Asymmetric contact scenario

The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`.

## Linear dur_c

### Amount of input

```{r}
simulations = asymmetric_contact_scenario_linear

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = asymmetric_contact_scenario_linear

additional_plot_parts = c(
  youngdialect_normal_reference_lines,
  agent_population_scale
)

for (run_id in 1:7) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 15,
                                 labeller = function (currentParameterSet) {})
}
```

## log_dur_c

### Amount of input

```{r}
simulations = asymmetric_contact_scenario_log

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = asymmetric_contact_scenario_log

additional_plot_parts = c(
  youngdialect_normal_log_reference_lines,
  agent_population_scale
)

for (run_id in 1:8) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 500,
                                 labeller = function (currentParameterSet) {})
}
```


# Symmetric contact scenario

The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`.

## Linear dur_c

### Amount of input

```{r}
simulations = symmetric_contact_scenario_linear

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = symmetric_contact_scenario_linear

additional_plot_parts = c(
  youngdialect_normal_reference_lines,
  agent_population_scale
)

for (run_id in c(1:3, 5:8)) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 1500,
                                 labeller = function (currentParameterSet) {})
}
```

## log_dur_c

### Amount of input

```{r}
simulations = symmetric_contact_scenario_log

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = symmetric_contact_scenario_log

additional_plot_parts = c(
  youngdialect_normal_log_reference_lines,
  agent_population_scale
)

for (run_id in 1:7) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 15,
                                 labeller = function (currentParameterSet) {})
}
```


# Speech rate / Null contact scenario

The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`.

## linear dur_c

### Amount of input

```{r}
simulations = null_contact_scenario_linear

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = null_contact_scenario_linear

additional_plot_parts = c(
  youngdialect_fast_reference_lines,
  agent_population_scale
)

for (run_id in 1:10) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 50,
                                 labeller = function (currentParameterSet) {})
}
```

## log_dur_c

### Amount of input

```{r}
simulations = null_contact_scenario_log

plotInputDistributionPerAgent(simulations,
                              resultsDirectory,
                              1,
                              labeller = NULL,
                              additional_plot_parts = c(perceiver_population_scale,
                                                        producer_population_scale))
```

### Parameter over time

```{r}
simulations = null_contact_scenario_log

additional_plot_parts = c(
  youngdialect_fast_log_reference_lines,
  agent_population_scale
)

for (run_id in 1:10) {
  plotParameterAverageAcrossTime(simulations,
                                 resultsDirectory,
                                 run_id,
                                 central_tendency = "median",
                                 additional_plot_parts = additional_plot_parts,
                                 snapshotStepSize = 50,
                                 labeller = function (currentParameterSet) {})
}
```