#!/usr/bin/env python3
# coding: utf-8
# Author: Tanja Krueger & code from: Luisa Jimenez Soto

#######################################################################################################################
# This program adds a hashtag to the beginning of a single raw fasta file.
# Content of the hashtag is extracted from the title and identifies the type of toxin.

# Input a single mulitfasta file.
# Output: any files with missing # where premended by the # + identification of the toxin

#######################################################################################################################
# Import libraries needed.
import os
import re
import argparse
from datetime import datetime
from Bio import SeqIO

#######################################################################################################################
# Get the arguments from the command line
parser = argparse.ArgumentParser(prog="hashToFile.py",
                                 description="adds a hashtag to the beginning of a single file")
parser.add_argument("Data",
                    type=str,
                    help="the file where a hash might be missing ")
args = parser.parse_args()

####################################################################################################################
# Open and write to the log file
out_file = open("Data/derived/log.log","a") # path has to be ammended if code is run from Code folder instead of makefile
dt_string = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
# Write to the log file
print("##########",
      file=out_file)
print(f"program {parser.prog} was executed at {dt_string}",
      file=out_file)
print(f"argument passed: {args.Data}",
      file = out_file)
print(f"number of required arguments: 1",
      file=out_file)
print(f"argument should contain: the data directory of the project",
      file=out_file)

####################################################################################################################
# Get the file.
f=args.Data
# Extract the name from the input with a regex expression.
m= re.search("\/derived\/(.*)_(.*)_(.*)_(.*)\.fasta", f)
savename = m.group(1) + " "+ m.group(2)  + " "+ m.group(3)
# Open the file.
with open(f, "r+") as file:
    first_line = file.readline()
    # Check the first line for the presence of a #
    if "#" in first_line:
        print(f"{f} hast the right format")
    # In the absence of a # in the first line, add a # together with the extracted information from the title
    else:
        print(f"{f} was changed, a # with the file content description was added to the fasta file")
		#create a file with the extracted hashtag that replaces the original file.
        content = file.read()
        file.seek(0, 0)
        file.write( f"#{savename}\n{first_line}{content}")
    file.close()

out_file.close()
########################################################################################################################


