#!/usr/bin/env python3
# coding: utf-8
# Author: Tanja Krueger
# Aim: explore the feature importance after a PCA
# Input1: the merged_exotoxins fasta file CDHIT100 reduced
# Input2: the controlproteins fasta file CD 100 reduced
# Output: non known jet, probably scores and loadings plot of a PCA


#Open the two fasta files
#Make one long concatenated dataframe out of it
#Calculate the ratio of amino acids per ID and keep the label
#
#######################################################################################################################
# Import libraries needed.
import os
import re
import argparse
from datetime import datetime
from Bio import SeqIO
import pandas as pd

####################################################################################################################
# Option depending where the user wants the run the code form, default running the code with make from the project folder
cl=""
# If one wants to execute this file from the Code/python folder uncomment the next line
cl="../../"

#######################################################################################################################
# Get the arguments from the command line
parser = argparse.ArgumentParser(prog="PCA_exploration_part1.py",
                                 description="this script explores the feature importance of the data ")
parser.add_argument("bt",
                    type=str,
                    help=" bacterial toxins, CDHIT100 reduced in fasta format")
parser.add_argument("cp",
                    type=str,
                    help="  control proteins, CDHIT100 reduced in fasta format ")

args = parser.parse_args()

####################################################################################################################
# Open and write to the log file
out_file = open(f"{cl}Data/derived/log.log","a")
dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
# Write to the log file
print("##########",
      file=out_file)
print(f"program {parser.prog} was executed at {dt_string}",
      file=out_file)
print(f"program {parser.prog} was executed at {dt_string}")
print(f"argument passed: {args.bt}{args.cp}",
      file = out_file)
print(f"number of required arguments: 2",
      file=out_file)

########################################################################################################################
# Open the bacterial toxins file
with open(args.cf) as handle:
    df_bt = pd.DataFrame({record.id: [record.description, str(record.seq)]  for record in SeqIO.parse(handle, "fasta")}).T
# Define headers for better human handling.
head_bt=["info","seq"]

# Open the bacterial toxins file
with open(args.cp) as handle:
    df_cp = pd.DataFrame({record.id: [record.description, str(record.seq)]  for record in SeqIO.parse(handle, "fasta")}).T
# Define headers for better human handling.
head_cp=["info","seq"]

#



