from pathlib import Path
from Bio import SeqIO

def load_ids_from_fasta(fasta_path):
    return set(record.id for record in SeqIO.parse(fasta_path, "fasta"))

def load_ids_from_pdb_folder(pdb_folder):
    return set(p.stem for p in Path(pdb_folder).glob("*.pdb"))

def check_consistency(fasta_ids, pdb_ids, label="Group"):
    print("sanity check1")
    print(fasta_ids)
    print("sanity check2")
    print(pdb_ids)
    missing = fasta_ids - pdb_ids
    extra = pdb_ids - fasta_ids

    print(f"\n {label} Consistency Check")
    print(f" Total expected from FASTA: {len(fasta_ids)}")
    print(f" Total found in PDB folder: {len(pdb_ids)}")

    if missing:
        print(f" Missing PDBs (in FASTA but not in folder): {len(missing)}")
        for m in sorted(missing):
            print(f"   - {m}")
    else:
        print("No missing PDB files.")

    if extra:
        print(f" Extra PDBs (in folder but not in FASTA): {len(extra)}")
        for e in sorted(extra):
            print(f"   - {e}")
    else: ("No extra PDB files.")

def main(fasta_a, folder_a, fasta_b, folder_b):
    ids_a = load_ids_from_fasta(fasta_a)
    pdbs_a = load_ids_from_pdb_folder(folder_a)
    check_consistency(ids_a, pdbs_a, label="Group A")

    ids_b = load_ids_from_fasta(fasta_b)
    pdbs_b = load_ids_from_pdb_folder(folder_b)
    check_consistency(ids_b, pdbs_b, label="Group B")

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Check consistency between FASTA IDs and PDB folders.")
    parser.add_argument("--fasta_a", required=True, help="FASTA file for group A")
    parser.add_argument("--folder_a", required=True, help="PDB folder for group A")
    parser.add_argument("--fasta_b", required=True, help="FASTA file for group B")
    parser.add_argument("--folder_b", required=True, help="PDB folder for group B")

    args = parser.parse_args()
    main(args.fasta_a, args.folder_a, args.fasta_b, args.folder_b)
