import os
import scipy.io.wavfile
import shutil
import random

lstot=[]
a=open('lsBIRDS50class.txt','w')
dirname='/NAS3/SABIOD/public_data/DIENS_DYNI/NEW_CHALLENGE_2015/'
for i in range(50):
    ls_wav=os.listdir(dirname+'CMTfinalspectro_CHECKED/TRAIN'+str(i+1))
    for wav in ls_wav:
	try:
            freq, sample=scipy.io.wavfile.read(dirname+'LIFECLEF2015BIRDCHALLENGE_TRAIN_TEST_SETS/'+wav[0:-8]+'.wav')
            length=len(sample)/freq
            xml=open(dirname+'LIFECLEF2015BIRDCHALLENGE_TRAIN_TEST_SETS_XML_METADATA/xml/train/'+wav[0:-8]+'.xml')
            data=xml.readlines()
            data=str(' '.join(data))
            classid=data.split('CLassId>')
	    classid=data.split('ClassId>')
            classid=classid[1]
            classid=classid.split('</ClassId')
            classid=classid[0]
            classid=classid[0:-2]
            xml.close()
            lstot=lstot+[[wav[0:-8]+'.wav', str(length), classid ]]
            #print wav, length, classid
	except:
	    print wav        

for info in lstot:
    k=random.random()
    if k>0.3:
        a.write('TRAIN '+str(info)+'\n')
        shutil.copy2(dirname+'LIFECLEF2015BIRDCHALLENGE_TRAIN_TEST_SETS/'+info[0], dirname+'BD50CLASSES/SUBTRAIN/'+info[0][0:-4]+'_'+info[2]+'.wav')
    else:
        a.write('VALID '+str(info)+'\n')
        shutil.copy2(dirname+'LIFECLEF2015BIRDCHALLENGE_TRAIN_TEST_SETS/'+info[0], dirname+'BD50CLASSES/SUBVALID/'+info[0][0:-4]+'_'+info[2]+'.wav')

a.close()
