import pandas as pd
import os
from tqdm import tqdm
import argparse
from datetime import date
import numpy

def arg_directory(path):
    if os.path.isdir(path):
        return path
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

def process_data(args):
    annots = args.path_to_data
    today = date.today()
    out_file = f'YOLO_detection_{today.day}_{today.month}'
    outdir = args.direction

    # Load and process data
    df = pd.concat({f: pd.read_csv(os.path.join(annots, f), sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf'])
                    for f in tqdm(os.listdir(annots))}, names=['file'])

    df = df.reset_index(level=[0])
    df = df.reset_index()
    del df['index']
    df['idx'] = df.file.str.split('_').str[-1].str.split('.').str[0]
    df.file = ['.'.join(x.file.split('.')[:-1]) for i, x in df.iterrows()]
    df.file = df.file + '.wav'

    # Define class names
    names = []
    CRED = '\033[91m'
    if len(names) == 0:
        total = len(df.groupby('espece').size()) - 1
        print(CRED + f'!! Consider that no names has been put into line 31 list so it will automatically be from 0 to {total} !!')
        names = np.arange(0,total+1).tolist()
    df['annot'] = df['espece'].apply(lambda x: names[int(x)])

    # Save the processed DataFrame to a CSV file
    df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
    print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='TODO')
    parser.add_argument('-p', '--path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files', required=True)
    parser.add_argument('-d', '--direction', type=arg_directory, help='Directory to which the dataframe will be stored', required=True)
    args = parser.parse_args()

    process_data(args)