#!/usr/bin/env python3
# coding: utf-8


# ######################################################################################################################
# Author: Tanja Krüger 



########################################################################################################################
# downloaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import re, argparse
from datetime import datetime
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
import logomaker
from scipy.stats import gaussian_kde
import csv
# #################################################################################################
# Option depending where the user wants the run the code form, default running the code with make from the project folder.
cl=""
# If one wants to execute this file from the Code/python folder uncomment the next line.
#cl="../../"

########################################################################################################################
# Get the arguments from the command line.
parser = argparse.ArgumentParser(prog="data_analysis_3.py",
                                 description="shared visualizing protein length, PI and aromaticity")
parser.add_argument("at100",
                    type=str,
                    help="fasta file animal toxins 100% reduced")
parser.add_argument("ft100",
                    type=str,
                    help="fasta file fungal toxins 100% reduced")
parser.add_argument("bt100",
                    type=str,
                    help="fasta file bacterial toxins 100% reduced")



args = parser.parse_args()


########################################################################################################################

#  Open fasta files and modify it to a useable dataframe.
with open(args.at100) as handle:
    at100 = pd.DataFrame({record.id: [record.description, str(record.seq)]  for record in SeqIO.parse(handle, "fasta")}).T
with open(args.bt100) as handle:
    bt100 = pd.DataFrame(
        {record.id: [record.description, str(record.seq)] for record in SeqIO.parse(handle, "fasta")}).T
with open(args.ft100) as handle:
    ft100 = pd.DataFrame(
        {record.id: [record.description, str(record.seq)] for record in SeqIO.parse(handle, "fasta")}).T
# rename the column of the dataframes
at100.columns,ft100.columns, bt100.columns=["info","seq"],["info","seq"],["info","seq"]

# Define the amino acids for the lables in the plot.
amino_acids = ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]



lens_at = pd.DataFrame([(len(i),"animal\ntoxins") for i in at100.seq],columns=["length","origin"])
lens_ft = pd.DataFrame([(len(i),"fungal\ntoxins") for i in ft100.seq],columns=["length","origin"])
lens_bt = pd.DataFrame([(len(i),"bacterial\ntoxins") for i in bt100.seq],columns=["length","origin"])

df_all_lens = pd.concat([lens_ft, lens_at,  lens_bt], ignore_index=True)


# Open a new csv file in write mode
def writer(df):
    with open(f"{cl}Data/derived/length_output_fungal.csv", "w") as f:
        # Create a csv writer object
        writer = csv.writer(f)
        for i in df.index:
            writer.writerow(df.iloc[i,:])

writer(df_all_lens)
