#!/usr/bin/env python3
# coding: utf-8


###################################################################################################################
# Author: Tanja Krüger
# Aim: This file produces a single surprise plot for two datasets at a time
# Input: csv table of surprise on its diagonal for each amino acid (is calculated using two datasets)
# Output: a png file depicting the surprise for each amino acid
# The manual from the logomaker is found under https://logomaker.readthedocs.io/en/latest/

####################################################################################################################
# Import of all packages needed
import pandas as pd
import logomaker
import matplotlib.pyplot as plt
import re
import argparse

####################################################################################################################
# These colours are good for colourblind people.
CB_color_cycle = ['#EABA49', '#B1041B', '#61BDD2', '#2156B5']

# #################################################################################################
# Option depending where the user wants the run the code form, default running the code with make from the project
# folder.
cl = ""
# If one wants to execute this file from the Code/python folder uncomment the next line.
# cl="../../"

####################################################################################################################
# Define the amino acids for the labels in the plot.
amino_acids = ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]

# Define the arguments.
parser = argparse.ArgumentParser(prog="visualization_logomaker.py",
                                 description="Creates a png plot depicting the surprise of each amino acid ")
parser.add_argument("matrix",
                    type=str,
                    help="surprise matrix")

args = parser.parse_args()
df = pd.read_csv(args.matrix)  #eg: "../../../Data/derived/logomaker_matrix/exotoxinsLogoMaker.csv"

# log:
# Define where to write the log.
log_file = open(f"{cl}Data/derived/log.log","a")
print(f"####\nthe program: {parser.prog} was executed",file=log_file)
print(f"argument passed: {args.matrix}",file=log_file)
print(f"number of required arguments: 1",file=log_file)
print(f"argument should contain: the surprise matrix with the surprise values on its diagonal for each aa",file=log_file)

# Make a automated color scheme based on the input file name and if the amino acids are above or below
m= re.search("\/forLogoMaker\/(.*)_LogoMaker", args.matrix).group(1)
COLORS = {}
for num,i in enumerate(amino_acids):
    if m=="animal_toxins_animal_control" or m == "animal_toxins_combined_short_animal_control_proteins_short":
        COLORS[i] = '#B1041B' if df.iloc[num, num] >= 0 else '#EABA49'
    elif m=="bacterial_toxins_bacterial_control":
        COLORS[i] = '#2156B5' if df.iloc[num, num] >= 0 else '#61BDD2'
    elif m=="animal_toxins_bacterial_toxins":
        COLORS[i] = '#B1041B' if df.iloc[num, num] >= 0 else '#2156B5'
    elif m=="animal_control_bacterial_control":
        COLORS[i] = '#EABA49' if df.iloc[num, num] >= 0 else '#61BDD2'
    else:
        print("the wrong kind of file was provided, probably matrix in the wrong order")

# Define a style before the figure is created.
plt.style.use("seaborn")

# Create Logo object.
crp_logo = logomaker.Logo(df,flip_below=False,color_scheme=COLORS,figsize=[10, 5])
crp_logo.style_spines(visible=False)
crp_logo.style_spines(spines=['left', 'bottom'],visible=True)
crp_logo.style_xticks(fmt='%d',anchor=0)
labels = amino_acids
plt.title(f"{m}",fontsize="26")
plt.xlabel("amino acids",fontsize="20")
plt.ylabel("surprise",fontsize="20")
plt.grid()
plt.tight_layout()
save_name = "Figures/logomaker_" + m + ".png"
plt.savefig(f"{cl}{save_name}")


# logs:
print(f"created file : {save_name}",file=log_file)
print(f"{cl}{save_name} was stored under Figures",file=log_file)
log_file.close()

