#!/usr/bin/env python3
# coding: utf-8

# Author: Tanja Krueger
# Aim: this script reads in the CDHIT 40 reduced data and rankorders the toxins and the non toxins according to their Histidin ratio
# Input: the CDHIT40 results of the bacterial exotoxins and the control proteins
# Output: the split data
#######################################################################################################################
# Import libraries needed.
import os
import re
import argparse
from datetime import datetime
from Bio import SeqIO
import pandas as pd

####################################################################################################################
# Option depending where the user wants the run the code form, default running the code with make from the project folder
cl=""
# If one wants to execute this file from the Code/python folder uncomment the next line
cl="../../"

#######################################################################################################################
# Get the arguments from the command line
parser = argparse.ArgumentParser(prog="rankordering.py",
                                 description="")
parser.add_argument("tox",
                    type=str,
                    help="the exotoxins data ")
parser.add_argument("cp",
                    type=str,
                    help="the contorl protein data")

args = parser.parse_args()

####################################################################################################################
# Open and write to the log file
out_file = open(f"{cl}Data/derived/log.log","a")
dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
# Write to the log file
print("##########",
      file=out_file)
print(f"program {parser.prog} was executed at {dt_string}",
      file=out_file)
print(f"program {parser.prog} was executed at {dt_string}")
print(f"argument passed: {args.cp}{args.tox}",
      file = out_file)
print(f"number of required arguments: ",
      file=out_file)
####################################################################################################################
# Open the exotoxins and the control data from their fasta files.
with open(args.cp) as handle:
    df_cp = pd.DataFrame({record.id: [record.description, str(record.seq)]  for record in SeqIO.parse(handle, "fasta")}).T

# Define headers for better human handling.
head_cp=["info","seq"]

# Rename headers of the clean fasta file.
df_cp.columns=head_cp
print(df_cp)
