a
    Df                     @   sx  d dl Zd dlmZ d dlmZmZmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZ d dlZd dlZd dlmZ zd dlZW n$ ey   ed ed	dY n0 zd dlZd d
lmZ W n$ ey   ed eddY n0 G dd deZdd Zd,ddZd-ddZd.ddZd/ddZdd Zdd Zd0d!d"Z d1d$d%Z!d2d&d'Z"G d(d) d)ej#j$Z%G d*d+ d+ej&Z'dS )3    N)UMAP)warncatch_warningsfilterwarnings)TypingError)spectral_layout)check_random_state)KDTreea  The umap.parametric_umap package requires Tensorflow > 2.0 to be installed.
    You can install Tensorflow at https://www.tensorflow.org/install
    
    or you can install the CPU version of Tensorflow using 

    pip install umap-learn[parametric_umap]

    z/umap.parametric_umap requires Tensorflow >= 2.0)opszEThe umap.parametric_umap package requires Keras >= 3 to be installed.z#umap.parametric_umap requires Kerasc                       s   e Zd Zddddddddddi f fdd	Zd fdd	Zd fd	d
	Zdd Z fddZdd Zdd Z	dd Z
dddZ  ZS )ParametricUMAPNF      ?r   c                    s   t  jf i | || _|| _|| _|| _|| _|| _|| _|| _	d| _
|
| _|	| _|| _d| _d| _tjjddd| _| jdur|jd jd | jkrtd	|jd jd | jdS )
ab  
        Parametric UMAP subclassing UMAP-learn, based on keras/tensorflow.
        There is also a non-parametric implementation contained within to compare
        with the base non-parametric implementation.

        Parameters
        ----------
        batch_size : int, optional
            size of batch used for batch training, by default None
        dims :  tuple, optional
            dimensionality of data, if not flat (e.g. (32x32x3 images for ConvNet), by default None
        encoder : keras.Sequential, optional
            The encoder Keras network
        decoder : keras.Sequential, optional
            the decoder Keras network
        parametric_reconstruction : bool, optional
            Whether the decoder is parametric or non-parametric, by default False
        parametric_reconstruction_loss_fcn : bool, optional
            What loss function to use for parametric reconstruction,
            by default keras.losses.BinaryCrossentropy
        parametric_reconstruction_loss_weight : float, optional
            How to weight the parametric reconstruction loss relative to umap loss, by default 1.0
        autoencoder_loss : bool, optional
            [description], by default False
        reconstruction_validation : array, optional
            validation X data for reconstruction loss, by default None
        global_correlation_loss_weight : float, optional
            Whether to additionally train on correlation of global pairwise relationships (>0), by default 0
        keras_fit_kwargs : dict, optional
            additional arguments for model.fit (like callbacks), by default {}
        
   N   MbP?      @Z	clipvaluer   zNDimensionality of embedder network output ({}) doesnot match n_components ({}))super__init__dimsencoderdecoderparametric_reconstruction%parametric_reconstruction_loss_weight"parametric_reconstruction_loss_fcnautoencoder_loss
batch_sizeloss_report_frequencyglobal_correlation_loss_weightreconstruction_validationkeras_fit_kwargsparametric_modeln_training_epochskeras
optimizersAdam	optimizeroutputsshapen_components
ValueErrorformat)selfr   r   r   r   r   r   r   r   r   r   r    kwargs	__class__ a/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/umap/parametric_umap.pyr   &   s2    .
zParametricUMAP.__init__c                    s@   | j dkr.|d u rtd|| _t ||S t ||S d S NprecomputedzTPrecomputed distances must be supplied if metric                     is precomputed.)metricr*   _Xr   fitr,   XyZprecomputed_distancesr.   r0   r1   r6   }   s    
zParametricUMAP.fitc                    s@   | j dkr.|d u rtd|| _t ||S t ||S d S r2   )r4   r*   r5   r   fit_transformr7   r.   r0   r1   r:      s    
zParametricUMAP.fit_transformc                 C   s   | j jt|| j| jdS )aw  Transform X into the existing embedded space and return that
        transformed output.
        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            New data to be transformed.
        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Embedding of the new data in low-dimensional space.
        r   verbose)r   predictnp
asanyarrayr   r<   r,   r8   r0   r0   r1   	transform   s    zParametricUMAP.transformc                    s2   | j r"| jjt|| j| jdS t |S dS )a   Transform X in the existing embedded space back into the input
        data space and return that transformed output.
        Parameters
        ----------
        X : array, shape (n_samples, n_components)
            New points to be inverse transformed.
        Returns
        -------
        X_new : array, shape (n_samples, n_features)
            Generated data points new data in data space.
        r;   N)	r   r   r=   r>   r?   r   r<   r   inverse_transformr@   r.   r0   r1   rB      s
    z ParametricUMAP.inverse_transformc                 C   s:   | j }t| j| j| j| j| j| j| j|| j	| j
d
| _dS )zDefine the model in keras)negative_sample_rater   r   !parametric_reconstruction_loss_fnr   r   r   r   N)r   	UMAPModelZ_a_brC   r   r   r   r   r   r   r!   )r,   Zprlwr0   r0   r1   _define_model   s    zParametricUMAP._define_modelc                 C   s  | j dkr| j}| jd u r.t|d g| _n*t| jdkrXt|t|gt| j }| jrt	|dkszt
|dk rtd t|| j| j| j| j| j\}| _}}}| _tt|tjd| _tt|tjd| _d }	t|}
t| j| j| j| j|
| j|	\| _| _|   t|| j | j }| jr| j d urt| jdkrzt| j t| j gt| j | _ | j t!| j fd| j if}nd }| j"j#|f| j| j$ ||d	| j%}|j&| _'| jj(|| j)d
}|i fS )Nr3   r   r   r           zMData should be scaled to the range 0-1 for cross-entropy reconstruction loss.r   reconstruction)Zepochssteps_per_epochvalidation_data)r<   )*r4   r5   r   r>   r(   lenreshapelistr   maxminr   construct_edge_datasetgraph_n_epochsr   r   Zedge_weightr
   arrayexpand_dimsastypeint64headtailprepare_networksr   r   r)   rG   intr   r   Z
zeros_liker!   r6   r"   r    historyZ_historyr=   r<   )r,   r8   rS   initrandom_stateedge_datasetZn_edgesrX   rY   init_embeddingn_datarJ   rK   r\   	embeddingr0   r0   r1   _fit_embed_data   s    

"	

zParametricUMAP._fit_embed_datac                 C   s   t dd | j D S )Nc                 s   s,   | ]$\}}t ||r|d vr||fV  qdS ))r&   r   r   r!   N)should_pickle).0kvr0   r0   r1   	<genexpr>1  s   z.ParametricUMAP.__getstate__.<locals>.<genexpr>)dict__dict__items)r,   r0   r0   r1   __getstate__/  s    zParametricUMAP.__getstate__Tc              	   C   s2  | j d ur6tj|d}| j | |r6td| | jd urltj|d}| j| |rltd| | jd urtj|d}| j| |rtd| t	 t t
d tj|d}t|d	 }t| |tj W d    n1 s0    Y  |rtd
| W d    n1 s$0    Y  d S )Nencoder.keraszKeras encoder model saved to {}decoder.keraszKeras decoder model saved to {}zparametric_model.keraszKeras full model saved to {}ignore	model.pklwbz*Pickle of ParametricUMAP model saved to {})r   ospathjoinsaveprintr+   r   r!   r   r   openpickledumpHIGHEST_PROTOCOL)r,   save_locationr<   encoder_outputdecoder_outputparametric_model_outputmodel_outputoutputr0   r0   r1   ru   7  s,    


.zParametricUMAP.save)NN)NN)T)__name__
__module____qualname__r   r6   r:   rA   rB   rG   rc   rl   ru   __classcell__r0   r0   r.   r1   r   %   s(   Wbr   c                 C   s   |   }|  |jd }|du r:|jd dkr6d}nd}d|j|j|j t| k < |  ||j }|j}|j}|j}||||||fS )a<  
    gets elements of graphs, weights, and number of epochs per edge

    Parameters
    ----------
    graph_ : scipy.sparse.csr.csr_matrix
        umap graph of probabilities
    n_epochs : int
        maximum number of epochs per edge

    Returns
    -------
    graph scipy.sparse.csr.csr_matrix
        umap graph
    epochs_per_sample np.array
        number of epochs to train each sample for
    head np.array
        edge head
    tail np.array
        edge tail
    weight np.array
        edge weight
    n_vertices int
        number of vertices in graph
    r   Nr   '  i     rH   )	ZtocooZsum_duplicatesr(   datarO   floatZeliminate_zerosrowcol)rR   rS   graph
n_verticesepochs_per_samplerX   rY   weightr0   r0   r1   get_graph_elementsX  s    

r   spectralc                 C   sD  |du rt d}t|trF|dkrF|jdd|jd |fdtj}nt|tr|dkrt| |||||d}dt	|
  }	||	 tj|jd	|jd |gd
tj }nt|}
t|
jdkr@tj|
ddjd |
jd k r<t|
}|j|
dd\}}t|dddf }|
|jd| |
jd
tj }n|
}|S )a*  Initialize embedding using graph. This is for direct embeddings.

    Parameters
    ----------
    init : str, optional
        Type of initialization to use. Either random, or spectral, by default "spectral"

    Returns
    -------
    embedding : np.array
        the initialized embedding
    Nrandomg      $g      $@r   )lowhighsizer   )r4   Zmetric_kwds-C6?)Zscaler      axis)rf   r   r   )r   
isinstancestruniformr(   rV   r>   float32r   absrO   normalrT   rL   uniquer	   querymean)Z	_raw_datar   r)   r^   r4   Z_metric_kwdsr]   rb   ZinitialisationZ	expansionZ	init_datatreedistindZnndistr0   r0   r1   init_embedding_from_graph  sP    	
 
r   r   c                 C   s   t || d|    S )a  
     convert distance representation into log probability,
        as a function of a, b params

    Parameters
    ----------
    distances : array
        euclidean distance between two points in embedding
    a : float, optional
        parameter based on min_dist, by default 1.0
    b : float, optional
        parameter based on min_dist, by default 1.0

    Returns
    -------
    float
        log probability in embedding space
    r   )r
   log1p)Z	distancesabr0   r0   r1   #convert_distance_to_log_probability  s    r   r   c                 C   s>   |  t | }d|   t ||  | }|| }|||fS )a  
    Compute cross entropy between low and high probability

    Parameters
    ----------
    probabilities_graph : array
        high dimensional probabilities
    log_probabilities_distance : array
        low dimensional log probabilities
    EPS : float, optional
        offset to ensure log is taken of a positive number, by default 1e-4
    repulsion_strength : float, optional
        strength of repulsion between negative samples, by default 1.0

    Returns
    -------
    attraction_term: float
        attraction term for cross entropy loss
    repellant_term: float
        repellent term for cross entropy loss
    cross_entropy: float
        cross entropy umap loss

    r   )r
   Zlog_sigmoid)probabilities_graphlog_probabilities_distanceZEPSrepulsion_strengthZattraction_termZrepellant_termZCEr0   r0   r1   compute_cross_entropy  s    r   c                 C   s   | du r^t t jj|dt j t jjdddt jjdddt jjdddt jj|ddg} |du r|rt t jj|fdt jjdddt jjdddt jjdddt jjt|ddd	t j|g}| |fS )
a`  
    Generates a set of keras networks for the encoder and decoder if one has not already
    been predefined.

    Parameters
    ----------
    encoder : keras.Sequential
        The encoder Keras network
    decoder : keras.Sequential
        the decoder Keras network
    n_components : int
        the dimensionality of the latent space
    dims : tuple of shape (dim1, dim2, dim3...)
        dimensionality of data
    n_data : number of elements in dataset
        # of elements in training dataset
    parametric_reconstruction : bool
        Whether the decoder is parametric or non-parametric
    init_embedding : array (optional, default None)
        The initial embedding, for nonparametric embeddings

    Returns
    -------
    encoder: keras.Sequential
        encoder keras network
    decoder: keras.Sequential
        decoder keras network
    N)r(   d   Zrelu)units
activationz)r   nameZrecon)r   r   r   )	r#   Z
SequentiallayersZInputFlattenZDenser>   productZReshape)r   r   r)   r   ra   r   r`   r0   r0   r1   rZ     s0    &
rZ   c                    sD   fdd j d dkrdnd fdd}fd	d
}t||\}}	}
}}}du rrtt|dgt|
|	dt||	d }}tjt	t
|}|| tj}|| tj}tjj||f}| }|d}|jdd}|j|tjjjd}|j|tjjjd}|d}|t
||
||fS )a  
    Construct a tf.data.Dataset of edges, sampled by edge weight.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        New data to be transformed.
    graph_ : scipy.sparse.csr.csr_matrix
        Generated UMAP graph
    n_epochs : int
        # of epochs to train each edge
    batch_size : int
        batch size
    parametric_reconstruction : bool
        Whether the decoder is parametric or non-parametric
    c                    s    |  S Nr0   )index)r8   r0   r1   gather_indexh  s    z,construct_edge_dataset.<locals>.gather_indexg&.>g      ?TFc                    sV   r6t | gt jgd }t |gt jgd }nt  | }t  |}||fS )Nr   )tfZpy_functionr   gather)Zedge_toZ	edge_fromedge_to_batchedge_from_batch)r8   r   gather_indices_in_pythonr0   r1   gather_Xo  s    z(construct_edge_dataset.<locals>.gather_Xc                    s8   dt d i}dkr | |d< r,| |d< | |f|fS )Numapr   global_correlationrI   )r
   repeat)r   r   r'   )r   r   r   r0   r1   get_outputsy  s    z+construct_edge_dataset.<locals>.get_outputsNi  r[   r   )Zdrop_remainder)Znum_parallel_callsr   )nbytesr   r[   r>   rP   r   rV   r   ZpermutationrangerL   rW   r   r   ZDatasetZfrom_tensor_slicesshufflebatchmapZexperimentalZAUTOTUNEZprefetch)r8   rR   rS   r   r   r   r   r   _r   rX   rY   r   r   Zedges_to_expZedges_from_expZshuffle_maskr_   r0   )r8   r   r   r   r   r   r1   rQ   P  s:    




rQ   c                 C   s   z0t t|d }tt | d}W n tjtjj	t
tjjtjjtttfy } ztd| | W Y d}~dS d}~0  ty } z(td|  d| d|  W Y d}~dS d}~0 0 dS )	a  
    Checks if a dictionary item can be pickled

    Parameters
    ----------
    key : try
        key for dictionary element
    val : None
        element of dictionary

    Returns
    -------
    picklable: bool
        whether the dictionary item can be pickled
    base64zDid not pickle {}: {}NFzFailed at pickling :z due to T)codecsencoderx   dumpsdecodeloadsPicklingErrorr   errorsZInvalidArgumentError	TypeErrorZInternalErrorZNotFoundErrorOverflowErrorr   AttributeErrorr   r+   r*   )keyvalZpickledr   er0   r0   r1   rd     s&    
rd   Tc                 C   s   t j| d}tt|d}|r0td| t j| d}t j|rjt	j
||_|rjtd| t j| d}t j|rt	j
||_td| t j| d}t j|rt	j
||_td	| |S )
a  
    Load a parametric UMAP model consisting of a umap-learn UMAP object
    and corresponding keras models.

    Parameters
    ----------
    save_location : str
        the folder that the model was saved in
    verbose : bool, optional
        Whether to print the loading steps, by default True

    Returns
    -------
    parametric_umap.ParametricUMAP
        Parametric UMAP objects
    rp   rbz-Pickle of ParametricUMAP model loaded from {}rm   z"Keras encoder model loaded from {}rn   z"Keras decoder model loaded from {}r!   zKeras full model loaded from {})rr   rs   rt   rx   loadrw   rv   r+   existsr#   modelsZ
load_modelr   r   r!   )r{   r<   r   modelr|   r}   r~   r0   r0   r1   load_ParametricUMAP  s(    r   Fc                 C   s  t | } | t j| ddd } |du rF| }t j| t | d|d}n4t j|| jd}|t j|ddd }t| jd g}dg}t j|dd}t j|dd}t | }t |}t |d }t |d }t 	|||f}	t 	|||f}
t 	|||f}	t 	|||f}
t 
|	t |
t || j }t 	||d df}t |}t 	|t |dd ||f }|st j|dd	}|S )
zAdapted from TF Probability.r   Tr   keepdimsNdtyper   int32r   r   )r
   Zconvert_to_tensorr   Zconjr   rL   r(   castZ	transposerM   matmulsqueeze)xr9   r   Z
event_axisZsample_axisZx_permedZy_permedZn_eventsZ	n_samplesZx_permed_flatZy_permed_flatZcovr0   r0   r1   
covariance  s`    





r   c                 C   s>   | t j| ddd } |d ur0|t j|ddd }t| ||dS )Nr   Tr   r   r9   r   )r
   stdr   r   r0   r0   r1   correlationD  s    r   c                   @   s   e Zd Zdd ZdS )StopGradientc                 C   s
   t |S r   )r
   stop_gradient)r,   r   r0   r0   r1   callQ  s    zStopGradient.callN)r   r   r   r   r0   r0   r0   r1   r   P  s   r   c                       sJ   e Zd Zd fdd	Zdd	 Zdd
dZdddZdd Zdd Z  Z	S )rE   NFr   rH   
umap_modelc                    s   t  j|d || _|| _|| _|
| _|	| _|| _|| _|| _	|| _
|pVtjjddd}| j|d tj | _tj | _|d u rtjjdd| _n|| _d S )N)r   r   r   r   )r&   T)Zfrom_logits)r   r   r   r   r   r   r   rC   umap_loss_aumap_loss_br   r#   r$   r%   compiler   r   flattenr   ZSeedGeneratorseed_generatorlossesZBinaryCrossentropyrD   )r,   r   r   rC   r   r   r&   rD   r   r   r   r   r   r.   r0   r1   r   V  s*    
zUMAPModel.__init__c                 C   sZ   |\}}|  |}|  |}||d}| jrV| jr>| |}n| t|}||d< |S )N)embedding_toembedding_fromrI   )r   r   r   r   r
   r   )r,   inputsZto_xZfrom_xr   r   y_predZembedding_to_reconr0   r0   r1   r     s    

zUMAPModel.callc                 K   sx   g }| j D ]}|tj|tj d q
|| | | jdkrV|| 	|| | j
rn|| || t|S )Nr   r   )r   appendr
   r   r#   backendZfloatx
_umap_lossr   _global_correlation_lossr   _parametric_reconstruction_losssum)r,   r   r9   r   Zsample_weightr-   r   lossr0   r0   r1   compute_loss  s    

zUMAPModel.compute_lossc                 C   s  |d }|d }t j|| jdd}t j|| jdd}t |d }tjjt || jd}tj	
 dkrvt||}	n|| }	t jt j|| ddt j||	 ddgdd}
t|
| j| j}t |d }t jt |ft || j fgdd}t|||d\}}}t |S )	Nr   r   r   r   seed
tensorflowr   )r   )r
   r   rC   r(   r#   r   r   Zaranger   configr   r   r   Zconcatenatenormr   r   r   Zoneszerosr   r   )r,   r   r   r   r   Zembedding_neg_toZ
repeat_negZrepeat_neg_batch_dimZshuffled_indicesZembedding_neg_fromZdistance_embeddingr   r   r   Zattraction_lossZrepellant_lossZce_lossr0   r0   r1   r     sF    	

	zUMAPModel._umap_lossc           	      C   s   |  |d }|  |d }dd }||}||}t|dd}t|dd}tj|dd  |d d  dd	}tj|dd  |d d  dd	}|tjj|j| jd
d  }t	t
t|dt|dd}| | j S )Nr   r   c                 S   s   | t |  t |  S r   )r
   r   r   )r   r0   r0   r1   z_score  s    z3UMAPModel._global_correlation_loss.<locals>.z_scoreir   r   r   r   r  g|=)r   r9   )r   r
   Zclipr  r#   r   r   r(   r   r   r   rU   r   )	r,   r9   r   r   Zz_xr  ZdxZdzZcorr_dr0   r0   r1   r     s     ""z"UMAPModel._global_correlation_lossc                 C   s   |  |d |d }|| j S )NrI   )rD   r   )r,   r9   r   r   r0   r0   r1   r     s    z)UMAPModel._parametric_reconstruction_loss)NNFr   rH   Fr   )NNNN)r   )
r   r   r   r   r   r   r   r   r   r   r0   r0   r.   r1   rE   U  s          ) 

5rE   )r   )r   r   )r   r   )N)T)NF)NF)(numpyr>   r   r   warningsr   r   r   Znumbar   rr   Zumap.spectralr   Zsklearn.utilsr   r   rx   Zsklearn.neighborsr	   r  r   ImportErrorr#   r
   r   r   r   r   r   rZ   rQ   rd   r   r   r   r   ZLayerr   ZModelrE   r0   r0   r0   r1   <module>   sZ   
  57 
:
 
5 
DY(
2  
C  
