--- title: "Complete set of interaction plots" author: "Markus Jochim and Felicitas Kleber" date: "2022-07-08" output: html_document: toc: true number_sections: true toc_float: collapsed: true df_print: paged code_folding: show html_notebook: toc: true number_sections: true toc_float: collapsed: true df_print: paged code_folding: show --- ```{r init, include=FALSE} library(ggplot2) library(gridExtra) library(tidyverse) library(data.table) library(rlist) source("modelScript/functions/simulations.R") resultsDirectory = "logs/" average_consonant_duration = read_csv("data/average_consonant_duration.csv") plotParameterAverageAcrossTime = function(simulation_names, labeller, results_directory, run_id, central_tendency = "median", additional_plot_parts = NULL, snapshotStepSize = 1) { for (current_simulation_name in simulation_names) { current_parameter_set = get_params(results_directory, current_simulation_name) files = Sys.glob(paste0(results_directory, "/", current_simulation_name, "/", run_id, "/pop.*.rds")) files = tibble(path = files) %>% mutate (snapshot = sub(path, pattern = ".*pop\\.([0-9]*)\\.rds.*", replacement = "\\1"), snapshot = as.numeric(snapshot)) %>% filter (snapshot %% snapshotStepSize == 0) p <- rbindlist(lapply(files$path, function(x) { r <- readRDS(x) r[, `:=` (condition = condition * current_parameter_set$interactionsPerSnapshot)] #, # P1 = P1 #)] r[valid == TRUE,] })) df.average <- p %>% group_by(group, initial, condition) %>% dplyr::summarise(P1_mean = mean(P1), P1_q25 = quantile(P1, 0.25), P1_q50 = quantile(P1, 0.50), P1_q75 = quantile(P1, 0.75)) p1 <- ggplot(df.average) if (!is.null(additional_plot_parts)) { for (part in additional_plot_parts) { p1 = p1 + part } } ylabel = switch (current_parameter_set$features, "dur_c" = "Stop clousre duration [ms]", "log_dur_c" = "Stop closure duration [ln(ms)]", current_parameter_set$features) p1 <- p1 + aes(x = condition, color = group) + xlab("Interactions") + ylab(ylabel) + scale_x_continuous(labels = scales::comma) + theme(text = element_text(size = 16), legend.position = "top") + ggtitle(label = paste0(current_simulation_name, " (", run_id, ")"), subtitle = labeller(current_parameter_set)) if (central_tendency == "median") { p1 <- p1 + geom_line(aes(y = P1_q50), size = 1.2) + geom_line(aes(y = P1_q25), linetype = 2) + geom_line(aes(y = P1_q75), linetype = 2) + geom_point(aes(y = P1_q50), size = 1.2) + geom_point(aes(y = P1_q25), alpha = 1) + geom_point(aes(y = P1_q75), alpha = 1) } else { p1 <- p1 + geom_line(aes(y = P1_mean), size = 1.2) + geom_point(aes(y = P1_mean), size = 1.2) } print(p1) } } plotInputDistributionPerAgent = function(simulation_names, labeller, results_directory, run_id, additional_plot_parts = NULL) { for (current_simulation_name in simulation_names) { interactionLog <- readRDS(file.path(results_directory, current_simulation_name, run_id, "intLog.rds")) pop <- readRDS(file.path(results_directory, current_simulation_name, run_id, "pop.0.rds")) groups <- pop %>% group_by(agentID, group) %>% dplyr::summarise() x <- interactionLog %>% left_join(groups, by = c("producerID" = "agentID")) %>% rename(producerGroup = group) %>% left_join(groups, by = c("perceiverID" = "agentID")) %>% rename(perceiverGroup = group) barplot <- ggplot(x) + aes(x = producerGroup, fill = perceiverGroup) + geom_bar() + scale_y_continuous(name = "Number of tokens perceived", labels = scales::comma) + facet_wrap(~perceiverID) if (!is.null(additional_plot_parts)) { for (part in additional_plot_parts) { barplot = barplot + part } } print(barplot) dotplot <- x %>% dplyr::group_by(perceiverGroup, perceiverID) %>% dplyr::count(producerGroup) %>% tidyr::spread(key = producerGroup, value = n) %>% dplyr::mutate(totalInteractions = sum(SD + WB), dialectProportion = WB / totalInteractions) %>% ggplot() + aes(x = perceiverGroup, y = dialectProportion) + geom_point() print(dotplot) } } ## Prepare static parts that can later be added to plots (lines representing the control group of speakers) youngdialect_fast_reference_values = average_consonant_duration %>% filter(dataset == "dataset1", rate == "schnell", speaker_group == "Y_WB") youngdialect_normal_reference_values = average_consonant_duration %>% filter(dataset == "dataset1", rate == "normal", speaker_group == "Y_WB") youngdialect_fast_reference_lines = c( geom_hline(yintercept = youngdialect_fast_reference_values$median), geom_hline(yintercept = youngdialect_fast_reference_values$quartile1, linetype = 2), geom_hline(yintercept = youngdialect_fast_reference_values$quartile3, linetype = 2)) youngdialect_normal_reference_lines = c( geom_hline(yintercept = youngdialect_normal_reference_values$median), geom_hline(yintercept = youngdialect_normal_reference_values$quartile1, linetype = 2), geom_hline(yintercept = youngdialect_normal_reference_values$quartile3, linetype = 2)) youngdialect_fast_log_reference_lines = c( geom_hline(yintercept = youngdialect_fast_reference_values$median_log), geom_hline(yintercept = youngdialect_fast_reference_values$quartile1_log, linetype = 2), geom_hline(yintercept = youngdialect_fast_reference_values$quartile3_log, linetype = 2)) youngdialect_normal_log_reference_lines = c( geom_hline(yintercept = youngdialect_normal_reference_values$median_log), geom_hline(yintercept = youngdialect_normal_reference_values$quartile1_log, linetype = 2), geom_hline(yintercept = youngdialect_normal_reference_values$quartile3_log, linetype = 2)) ## Prepare static parts that can later be added to plots (legend) agent_population_scale = scale_color_discrete(name = "Agent population", breaks = c("WB", "SD"), labels = c("Bavarian", "Standard German")) perceiver_population_scale = scale_fill_discrete(name = "Agent-listener population", breaks = c("WB", "SD"), labels = c("Bavarian", "Standard German")) producer_population_scale = scale_x_discrete(name = "Agent-speaker population", breaks = c("WB", "SD"), labels = c("Bav", "SG")) ``` # Description This R notebook contains all simulation results referred to in Chapter 5.3 of Markus Jochim’s *Internal and external factors in a Bavarian sound change*. All simulations have been carried out with version 3bc0dfb of the ABM code. The code is available in the `modelscript/` subdirectory of this distribution of supplementary materials or at https://github.com/IPS-LMU/ABM/commit/3bc0dfb194738b614f9757728f44b091d3b53ccd. The results of the simulations are stored in the `logs/` directory, which has eight subdirectories, according to the following code listing: ```{r} maximum_contact_scenario_linear = "ABM20200113145453" maximum_contact_scenario_log = "ABM20200113131820" asymmetric_contact_scenario_linear = "ABM20200118120147" asymmetric_contact_scenario_log = "ABM20200116005311" symmetric_contact_scenario_linear = "ABM20200117151520" symmetric_contact_scenario_log = "ABM20200203161833" null_contact_scenario_linear = "ABM20200115170020" null_contact_scenario_log = "ABM20200115173019" ``` The parameters used during the simulations are found in the respective subdirectory in the file `params.yaml`. Some parameters were set by calculating a formula; in these cases, `params.yaml` contains only the value and the formula is given in this notebook. # Maximum contact scenario The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`. ## Linear dur_c ### Amount of input ```{r} simulations = maximum_contact_scenario_linear plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = maximum_contact_scenario_linear additional_plot_parts = c( youngdialect_normal_reference_lines, agent_population_scale ) for (run_id in 1:10) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 25, labeller = function (currentParameterSet) {}) } ``` ## log_dur_c ### Amount of input ```{r} simulations = maximum_contact_scenario_log plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = maximum_contact_scenario_log additional_plot_parts = c( youngdialect_normal_log_reference_lines, agent_population_scale ) for (run_id in 1:10) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 25, labeller = function (currentParameterSet) {}) } ``` # Asymmetric contact scenario The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`. ## Linear dur_c ### Amount of input ```{r} simulations = asymmetric_contact_scenario_linear plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = asymmetric_contact_scenario_linear additional_plot_parts = c( youngdialect_normal_reference_lines, agent_population_scale ) for (run_id in 1:7) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 15, labeller = function (currentParameterSet) {}) } ``` ## log_dur_c ### Amount of input ```{r} simulations = asymmetric_contact_scenario_log plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = asymmetric_contact_scenario_log additional_plot_parts = c( youngdialect_normal_log_reference_lines, agent_population_scale ) for (run_id in 1:8) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 500, labeller = function (currentParameterSet) {}) } ``` # Symmetric contact scenario The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`. ## Linear dur_c ### Amount of input ```{r} simulations = symmetric_contact_scenario_linear plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = symmetric_contact_scenario_linear additional_plot_parts = c( youngdialect_normal_reference_lines, agent_population_scale ) for (run_id in c(1:3, 5:8)) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 1500, labeller = function (currentParameterSet) {}) } ``` ## log_dur_c ### Amount of input ```{r} simulations = symmetric_contact_scenario_log plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = symmetric_contact_scenario_log additional_plot_parts = c( youngdialect_normal_log_reference_lines, agent_population_scale ) for (run_id in 1:7) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 15, labeller = function (currentParameterSet) {}) } ``` # Speech rate / Null contact scenario The parameter `mahalanobisThreshold` was set to `qchisq(0.85, df=1)`. ## linear dur_c ### Amount of input ```{r} simulations = null_contact_scenario_linear plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = null_contact_scenario_linear additional_plot_parts = c( youngdialect_fast_reference_lines, agent_population_scale ) for (run_id in 1:10) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 50, labeller = function (currentParameterSet) {}) } ``` ## log_dur_c ### Amount of input ```{r} simulations = null_contact_scenario_log plotInputDistributionPerAgent(simulations, resultsDirectory, 1, labeller = NULL, additional_plot_parts = c(perceiver_population_scale, producer_population_scale)) ``` ### Parameter over time ```{r} simulations = null_contact_scenario_log additional_plot_parts = c( youngdialect_fast_log_reference_lines, agent_population_scale ) for (run_id in 1:10) { plotParameterAverageAcrossTime(simulations, resultsDirectory, run_id, central_tendency = "median", additional_plot_parts = additional_plot_parts, snapshotStepSize = 50, labeller = function (currentParameterSet) {}) } ```