import cPickle
import gzip
import os
import sys
import time
import os
import numpy
from numpy import *
import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer

windowsvalid = '/NAS3/SABIOD/METHODES/NICOLAS/CNN2D_LIFEBIRD/DATA/VALID_SOMESP'
windowstrain = '/NAS3/SABIOD/METHODES/NICOLAS/CNN2D_LIFEBIRD/DATA/TRAIN_SOMESP'
resultdir = '/NAS3/SABIOD/METHODES/NICOLAS/CNN2D_LIFEBIRD/RESULTS_V1/'

class LeNetConvPoolLayer(object):
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
# pool 1 2 ou  2 1
        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(
            rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
            dtype=theano.config.floatX),
                               borrow=True)

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv.conv2d(input=input, filters=self.W,
                filter_shape=filter_shape, image_shape=image_shape)

        # downsample each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize, ignore_border=True)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

def evaluate_lenet5(learning_rate, n_epochs,
                    dataset='mnist.pkl.gz',
                    nkerns=[1,2], batch_size=500):
#                    learning_rate=0.1, n_epochs=1,
#                    dataset='mnist.pkl.gz',
#                    nkerns=[20, 50], batch_size=500
    
    rng = numpy.random.RandomState(23455)

    train_set_x, train_set_y = T.matrix('train_set_x', dtype='float32'), T.vector('train_set_y',dtype='int32')
    valid_set_x, valid_set_y = T.matrix('valid_set_x'), T.vector('valid_set_y',dtype='int32')
    test_set_x, test_set_y = T.matrix('test_set_x'), T.vector('test_set_y',dtype='int32')

    # compute number of minibatches for training, validation and testing
    #n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    #TODO: change that, set it to the right value
    n_train_batches = 10
    #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = 4
    #n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_test_batches = n_train_batches
    #n_train_batches /= batch_size
    #n_valid_batches /= batch_size
    #n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    imx, imy = 56, 2000    # image size
    kerx, kery = 13, 401   #kernel size
    outx, outy = (imx - kerx + 1) / 2, (imy - kery + 1) / 2
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 1, imx, imy))
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
            image_shape=(batch_size, 1, imx, imy),
            filter_shape=(nkerns[0], 1, kerx, kery), poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)
    layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
            image_shape=(batch_size, nkerns[0], outx, outy),
            filter_shape=(nkerns[1], nkerns[0], kerx, kery), poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * (outx-kerx+1)/2 * (outy-kery+1)/2,
                         n_out=500, activation=T.tanh)
    
    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=12)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function([x, y], layer3.errors(y))

    validate_model = theano.function([x,y], layer3.errors(y))

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        updates.append((param_i, param_i - learning_rate * grad_i))

    train_model = theano.function([x,y], cost, updates=updates)

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.998  #0.995 a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    memoryscore = []

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        print 'epoch nb', epoch
        for minibatch_index in xrange(n_train_batches):
            print 'Training: minibatch_index nb',minibatch_index
            #Load a minibatch    
            os.chdir(windowstrain)
            minibatch = open('batch_nb_'+str(minibatch_index)+'.pkl', 'rb')
            train_set = cPickle.load(minibatch)
            minibatch.close()
            train_set_x = theano.shared(numpy.asarray(train_set[0], dtype=theano.config.floatX),borrow=True)
            train_set_y = theano.shared(numpy.asarray(train_set[1], dtype=theano.config.floatX),borrow=True)

            iter = (epoch - 1) * n_train_batches + minibatch_index
            print 'current time:  ', time.ctime()
            if iter % 1 == 0:
                print 'training @ iter = ', iter
            
            cost_ij = train_model(train_set_x.get_value().astype(float32), train_set_y.get_value().astype(int32))

            if (iter + 1) % validation_frequency == 0:
            #if True:#a enlever
                print 'Validation'
                os.chdir(windowsvalid)
                validation_losses = []
                for i in xrange(n_valid_batches):
                    print 'Validation: minibatch nb', i
                    minibatch = open('batch_nb_'+str(i)+'.pkl','rb')
                    valid_set = cPickle.load(minibatch)
                    valid_set_x, valid_set_y = theano.shared(numpy.asarray(valid_set[0], dtype=theano.config.floatX),borrow=True), theano.shared(numpy.asarray(valid_set[1], dtype=theano.config.floatX),borrow=True)
                    minibatch.close()
                    
                    validation_losses.append(validate_model(valid_set_x.get_value().astype(float32),valid_set_y.get_value().astype(int32)))
                this_validation_loss = numpy.mean(validation_losses)
		memoryscore.append(this_validation_loss)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100.))
		print memoryscore
                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    #test_losses = [test_model() for i in xrange(n_test_batches)]
                    #test_score = numpy.mean(test_losses)
                    test_score = 9999999
                    print(('     epoch %i, minibatch %i/%i, test error of best '
                           'model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
            if patience <= iter:
                done_looping = True
                break
    
    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    print 'memoryscore:', memoryscore
    file = open(resultdir + 'resultsCNN2Dspecies12_25', 'a')# a for append
    file.write('\n\nlearning_rate='+str(learning_rate)+', n_epochs='+str(n_epochs)+', nkerns='+str(nkerns)+', batch_size='+str(batch_size)+', n_valid_batches='+str(n_valid_batches)+', n_train_batches='+str(n_train_batches)+', kernel:'+str(kerx)+', '+str(kery))
    file.write('Score: '+str(memoryscore))
    file.close()
if __name__ == '__main__':
    evaluate_lenet5(learning_rate=0.1, n_epochs=1, nkerns=[1,1], batch_size=500)
#initial values for nkerns was [20, 50]

def experiment(state, channel):
    evaluate_lenet5(state.learning_rate, dataset=state.dataset)