import pandas as pd
import numpy as np
import soundfile as sf
import os
from tqdm import tqdm
import multiprocessing
from functools import partial

def gaussian_noise(f, folder):
    sig, fs = sf.read(folder+'clean/'+f)
    sig = (sig- np.mean(sig[int(29.5*fs):int(30.5*fs)]))/np.std(sig[int(29.5*fs):int(30.5*fs)])

    for db, dbfolder in zip([-24, -18, -12, -6, 0, 6], ['SNR_24dB', 'SNR_18dB', 'SNR_12dB', 'SNR_6dB', 'SNR_0dB', 'SNR_-6dB']):
        amplitud = np.sqrt(10**(db/10))
        sigg=np.random.normal(0, amplitud, len(sig))
        sigg += sig
        sf.write(folder+dbfolder+'/'+f, sigg, fs)


def boat_noise(f, folder, boats):
    sig, fs = sf.read(folder+'clean/'+f)
    sig2, fs = sf.read(folder+'clean/'+boats.iloc[np.random.randint(10)])
    sig = sig[:min(len(sig), len(sig2))]
    sig = (sig - np.mean(sig))/np.std(sig)
    sig2 = sig2[:min(len(sig), len(sig2))]
    sig2 = (sig2 - np.mean(sig2))/np.std(sig2)

    for db, dbfolder in zip([-24, -18, -12, -6, 0, 6], ['SNR_24dB', 'SNR_18dB', 'SNR_12dB', 'SNR_6dB', 'SNR_0dB', 'SNR_-6dB']):
        new = sig.copy()
        amplitud = np.sqrt(10**(db/10))
        new += amplitud*sig2
        sf.write(folder+'boat/'+dbfolder+'/'+f, new, fs)

pool = multiprocessing.Pool(processes=8)

folder = './PhyseterMacrocephalus/'
boats = [f for f in os.listdir(folder+'clean/') if 'negatif' in f]
boats = pd.Series(boats).sample(10)
pool.map(partial(boat_noise, folder=folder, boats=boats), os.listdir(folder+'clean/'))



