#!/usr/bin/env python3
# coding: utf-8

#######################################################################################################################
#
# This file removes the predicted signal peptides from proteins

# Input: the file with the predited signal location as a txt
# Input: the original sequences as a fasta file
# Output: the file without the predicited signal peptides


#The general steps that need to happen
# Open the fasta and make a dataframe
# Open the original sequences and wrangle it to a dataframe as well
# Get the same column to combine the two dataframes
# Get each sequences and only keep the part of the sequence that is NOT the sequence signal

########################################################################################################################
# downloaded
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
import re, argparse, csv, collections, random
from datetime import datetime
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio import SeqIO
import logomaker
########################################################################################################################
#Step1: Get the arguments from the command line.
parser = argparse.ArgumentParser(prog="signal_removal.py",
                                 description="removal of the signal peptides predicted by SignalP6")


parser.add_argument("OG_seqs1",
                    type=str,
                    help="the original sequences as fasta file part1")
parser.add_argument("pred_signals1",
                    type=str,
                    help="the predicted signal Peptides part1")
args = parser.parse_args()
# #################################################################################################
# Option depending where the user wants the run the code form, default running the code with make from the project folder.
cl = ""
# If one wants to execute this file from the Code/python folder uncomment the next line.
#cl = "../../"

########################################################################################################################
# Step2: Log
# Step2.1: Open the predictor logfile and the general logfile.
out_file = open(f"{cl}Data/derived/log.log", "a")

# Step2.2: Get the date and time
dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")

# Step2.3: Write to the predictor log file and the general logfile
print(f"""########## \n
program {parser.prog} was executed at {dt_string} \n
program {parser.prog} was executed at {dt_string} \n
argments passed: the original fasta with proteins still containing siganl peptides: {args.OG_seqs}\n
                the predicted signal peptide locations: {args.pred_signals}\n
number of required arguments:2""",file=out_file)
########################################################################################################################
# Step3: Opening the data to dataframes
# Step3.1: Open the original unchanged fasta

# Step 3.": Open the predicted loaction of the signal peptides
with open(args.OG_seqs1, 'r') as f:
    file_lines1 = f.readlines()
keymap_tox1 = pd.DataFrame([string.split('\t') for string in file_lines1[2:]])
print(keymap_tox1)