# -*- coding: utf-8 -*- 
# Nicolas Enfon 13/05/14
import numpy as np
from matplotlib.pyplot import *
import os

#Import data
data=np.loadtxt('ALL_CLEAN.txt') #puis enelever les fichiers avec ping
[day, month, h1, znb, zici, z, pnb, pici, p, anb, a ] = [data[:,3], data[:,4], data[:,6] \
,data[:,12],data[:,13],data[:,14] \
,data[:,15],data[:,16],data[:,17] \
,data[:,18], data[:,19]]

#remove NaN
#sansNaN=[nb for nb in avecNaN if np.isnan(nb) == False]

#Truncate to keep only plausible data (ie IPI within a given range, nb of clicks not too high...)
#data2 = [part2, file2, day2, h12, h22, db2, ratio2, ping2, znb2, zici2, pnb2, pici2, moon2] = [ [] for i in xrange(len(data)) ]
#for i in xrange(len(data[0])):
#    if 0.005 <= zici[i] <= 0.17 and 0.2 <= pici[i] <= 3 and znb[i] <= 1000 and pnb[i] <= 400:        
#		for j in xrange(len(data)):
#			data2[j].append( data[j][i] )
#np.savetxt('TRUNC_part_file_day_h1_h2_db_ratio_ping_znb_zici_pnb_pici_moon', data2, delimiter=',')

#Make it boxplotable
def makeboxplotable(y,x):
    """Returns a array containing an array of y values, for each x value"""
    y_x = []
    for i in xrange( len( np.unique(x) ) ):
        y_x.append([ y[j] for j in xrange(len(y)) if x[j] == np.unique(x)[i]   ])
    return y_x
#truncz=[data[i,:] for i in xrange(len(data)) if data[i,14]==1]
zici_day = makeboxplotable(zici,day)
znb_day = makeboxplotable(znb,day)
z_day = makeboxplotable(z,day)
pici_day = makeboxplotable(pici,day)
pnb_day = makeboxplotable(pnb,day)
p_day = makeboxplotable(z,day)

#zici_moon = makeboxplotable(zici,moon)
#znb_moon = makeboxplotable(znb,moon)
#pici_moon = makeboxplotable(pici,moon)
#pnb_moon = makeboxplotable(pnb,moon)

zici_h1 = makeboxplotable(zici,h1)
znb_h1 = makeboxplotable(znb,h1)
z_h1 = makeboxplotable(z,h1)
pici_h1 = makeboxplotable(pici,h1)
pnb_h1 = makeboxplotable(pnb,h1)
p_h1 = makeboxplotable(p,h1)

#Simple plots X vs Y
os.chdir('/net/nas-lsis-3/SABIOD/public_data/ANTARESV3_21/STAT_ANALYSIS/TRUNC_SIMPLE_PLOTS_X_VS_Y')
all = [zici_day, znb_day, z_day, pici_day, pnb_day, p_day, zici_h1, znb_h1, z_h1, pici_h1, pnb_h1, p_h1]
allname = ['Zihpius_ici.day','Ziphius_nb.day','Zihpius_bool.day'\
,'Physeter_ici.day','Physeter_nb.day','Physeter_bool.day' \
,'Zihpius_ici.h1','Ziphius_nb.h1','Ziphius_bool.h1'\
,'Physeter_ici.h1','Physeter_nb.h1','Physeter_bool.h1']
for i in xrange(len(all)):
	figure(figsize=(20,5))
	boxplot(all[i],sym='')
	grid(True)
	title(allname[i].split('.')[0]+' VS '+allname[i].split('.')[1])
	ylabel(allname[i].split('.')[0])
	xlabel(allname[i].split('.')[1])
	savefig(  allname[i].split('.')[0]+'_vs_'+allname[i].split('.')[1]+'.png' ) 

#Simple histograms
os.chdir('/net/nas-lsis-3/SABIOD/public_data/ANTARESV3_21/STAT_ANALYSIS/TRUNC_HISTOGRAMS')
all = [znb, zici, z, pnb, pici, p]
allname = ['ziphius_NB', 'ziphius_ICI', 'ziphius_bool', 'physeter_NB', 'physeter_ICI', 'physeter_bool']
for i in xrange(len(all)):
	figure(figsize=(10,10))
	b = 50
	hist(all[i],bins = b)
	title('Histogram of '+allname[i])
	ylabel('Number of occurences')
	xlabel(allname[i]+' ('+str(b)+' bins)')
	savefig('histo_'+allname[i]+'.png')

#2D histograms
os.chdir('/net/nas-lsis-3/SABIOD/public_data/ANTARESV3_21/STAT_ANALYSIS/TRUNC_HISTOGRAMS_2D')
#zici, pici = np.log10(zici), np.log10(pici)
all = [day, h1, znb, zici, z, pnb, pici,p]
allname = ['day','hour','ziphius_NB','ziphius_ICI','ziphius_bool','physeter_NB','physeter_ICI','physeter_bool']
for i in xrange(len(all)):
	for j in xrange(i+1, len(all)):
		figure(figsize=(10,10))
		h, x, y = np.histogram2d(all[j],all[i], bins=(20,10))
		imshow(h, interpolation='nearest',extent=[y[0],y[-1],x[0],x[-1]],aspect='auto',cmap='bone',origin='lower')
		colorbar()
		xlabel(allname[i])
		ylabel(allname[j])
		title('Histogram 2D of '+allname[i]+' and '+allname[j])
		savefig('histo2d_'+allname[i]+'_'+allname[j])


"""
#R script
data=read.table('part_file_day_h1_h2_db_ratio_ping_znb_zici_pnb_pici_moon',sep=',')
data = t(data) #invert lines and rows
day = data[,3]
h1 = data[,4]
db = data[,6]
ratio = data[,7]
ping = data[,8]
znb = data[,9]
zici = data[,10]
pnb = data[,11]
pici = data[,12]
moon = data[,13]
#CAUTION: for normal anova we must have gaussian distribution and homoscedasticity
#normality:
shapiro.test(X) #if ties (= duplicated values)
ks.test(X,"pnorm",mean(X),sd(X)) #else, kolmogorov test
#homosc:
bartlett.test(Y~X) #must have gaussian distrib too...
fligner.test(Y~X) #alternative test
#if hypothesis verified:
analysis=anova(lm(Y~X));summary(analysis);analysis
#else, non parametric ranked test, doesn't need assumptions on the distribution:
kruskal.test(Y~X) #kruskal-wallis
"""


"""
#In case we need to reload the data
data = np.loadtxt('ToutReadmeAntaresV3_21houronly.txt',delimiter=',')
part = data[:,0]
file = data[:,1]
day_raw = data[:,2]
month = data[:,3]
h1 = data[:,5]
h2 = data[:,6]
wavsq = data[:,7]
ratio = data[:,8]
ping = data[:,9]  
znb = data[:,10]
zici = data[:,11]
pnb = data[:,14]
pici = data[:,15]
day = []
for i in xrange(len(month)):
    if month[i] == 8:
        day.append(0)
    else:
        day.append(day_raw[i] + (month[i] - 9) * 30 )
#conversions
db = np.log10(wavsq)
zici = 0.000512 * zici
pici = 0.000512 * pici
#Create the moon data: first 10 days are full moon, then nothing ...
moon = np.zeros(len(day))
i = day.index(9)
moon[:i] = 1
j = day.index(23)
k = day.index(38)
moon[j:k] = 1
l = day.index(52)
moon[l:] = 1

out = [part,file,day,h1,h2,db,ratio,ping,znb,zici,pnb,pici,moon]
np.savetxt('part_file_day_h1_h2_db_ratio_ping_znb_zici_pnb_pici_moon', out, delimiter=',')
"""

"""
#To make the index table for days:
tab = [ [], [], [] ]
nb_z=znb[0]
nb_p=pnb[0]
for i in xrange(1,len(day)):
	if i == len(np.unique(day)) - 1:
		tab[0].append(day[i-1])
		tab[1].append(z)
		tab[2].append(p)
		                
	if day[i] != day[i-1]:
		tab[0].append(day[i-1])
		tab[1].append(z)
		tab[2].append(p)
		z,p = znb[i],pnb[i]
	else:
		p += pnb[i]
		z += znb[i]		
"""	
