import pandas as pd
import os
import argparse
import yaml
from datetime import date
from tqdm import tqdm

def arg_directory(path):
    if os.path.isdir(path):
        return path
    else:
        raise argparse.ArgumentTypeError(f'`{path}` is not a valid path')

def process_annotations(annotations_folder, duration, outdir, sr, names):
    today = date.today()
    out_file = f'YOLO_detection_{today.day}_{today.month}_freq_{sr}_duration_{duration}'

    df_list = []

    for file_name in tqdm(os.listdir(annotations_folder)):
        if file_name.endswith('.txt'):
            file_path = os.path.join(annotations_folder, file_name)
            try:
                annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h', 'conf'])
            except Exception:
                annotation_df = pd.read_csv(file_path, sep=' ', names=['espece', 'x', 'y', 'w', 'h'])   

            annotation_df['file'] = file_name
            annotation_df['idx'] = annotation_df['file'].str.split('_').str[-1].str.split('.').str[0]

            annotation_df['file'] = ['.'.join(x.file.split('.')[:-1]) for i, x in annotation_df.iterrows()]
            annotation_df.file = annotation_df.file + '.wav'

            annotation_df['annot'] = annotation_df['espece'].apply(lambda x: names[x])

            annotation_df['midl'] = (annotation_df['x'] * duration) + annotation_df['idx'].astype(int)
            annotation_df['freq_center'] = (1 - annotation_df['y']) * (sr / 2)
            annotation_df['freq_min'] = annotation_df['freq_center'] - (annotation_df['h'] * (sr / 2)) / 2
            annotation_df['freq_max'] = annotation_df['freq_center'] + (annotation_df['h'] * (sr / 2)) / 2
            annotation_df['start'] = annotation_df['midl'] - (annotation_df['w'] * duration) / 2
            annotation_df['stop'] = annotation_df['midl'] + (annotation_df['w'] * duration) / 2
            annotation_df['duration'] = annotation_df['stop'] - annotation_df['start']

            df_list.append(annotation_df)

    result_df = pd.concat(df_list, ignore_index=True)
    result_df.to_csv(os.path.join(outdir, f'{out_file}.csv'), index=False)
    print(f'Saved as {os.path.join(outdir, f"{out_file}.csv")}')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Collect detections and return a complete dataframe')
    parser.add_argument('path_to_data', type=arg_directory, help='Path of the folder that contains the .txt files')
    parser.add_argument('directory', type=arg_directory, help='Directory where the dataframe will be stored')
    parser.add_argument('names', type=str, help='path to YOLOv5 custom_data.yaml file')
    
    parser.add_argument('-s','--sr', type=int, help='Sampling Rate of the spectrogram', required=True)
    parser.add_argument('--duration', type=int, help='Duration of the spectrogram', default=8)
    args = parser.parse_args()

    with open(args.names, 'r') as file:
        data = yaml.safe_load(file)
    names = data['names']

    process_annotations(args.path_to_data, args.duration, args.directory, args.sr, names)
