# Aim:    this files defines a function that calculates the SE bootstrapping error
# Author: Tanja Krüger
# Input1: the predicted values
# Input2: the true values
# Input3: potentially the number of times its is bootstrappe
# Input4: the metric that I am interested in: for the sake of this example the accuracy score
# Output: the sandard error
########################################################################################################################
# Imports:
from sklearn.metrics import accuracy_score
import random
import numpy as np
import pandas as pd

########################################################################################################################
# Input:
# Data: two datasets, one representing the predictions and the other set the real data
y_pred=pd.Series([1,1,1,1,1,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0]) # predictions
y_true=pd.Series([1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0])     # real
# Number of bootstraps:
n_boot=1000
# Metric:
metric="accuracy" # options are accuracy or mcc at this point in time

########################################################################################################################
# The length of the dataset
l=len(y_true)
# Get an empty list to store the chosen metric for each bootstrap sample
metric_list=[None]*n_boot
########################################################################################################################

# Pull a bootstrap sample i times
for i in range(n_boot):
    # Sampling of the index.
    bootstrap_sample_index=random.choices(y_true.index, k=l) # use choice instead of sample to have replacement
    bootstrap_sample_ytrue=y_true[bootstrap_sample_index]
    bootstrap_sample_ypred=y_pred[bootstrap_sample_index]
    # print(bootstrap_sample_index)
    # print(bootstrap_sample_ytrue)
    # print(bootstrap_sample_ypred)
    # Get the right metric
    if metric== "accuracy":
        # Caculate the accuracy per bootstrapping sample
        boot_metric=accuracy_score(bootstrap_sample_ytrue,bootstrap_sample_ypred)
    elif metric== "mcc":
        boot_metric=sklearn.metrics.matthews_corrcoef(bootstrap_sample_ytrue,bootstrap_sample_ypred)
    else:
        print("At the moment the script can only handle accuracy or mcc. \nPlease select a metric from the available options")
    # Add the result to a list
    metric_list[i]=boot_metric


# Calculate the average and the standard error for the metric of interes
average_metric=np.mean(metric_list)
metric_se=np.std(metric_list)


# print the results for the case of accuracy



print(f"the average {metric} is: {average_metric},\nwith an Standard Error (SE) of: {metric_se}")

