a
    Df                     @   s6  d dl mZ d dlZd dlZd dlZd dlZd dlm	Z	m
Z
mZmZmZ d dlmZmZ d dlZd dlmZ eejd dZeejjd Zeejjd Zed	g d
Zejddd Zej dddddf Z!ej"ddd Z#ej Z$e%ededfZ&ejddd Z'ej(dd e)dD ejdZ*ej+ej,-ejddd ejddd ee$fejdddddf ejddd ej.ddd ej/ej/ejddd ejejej/ej/ej/ej/d	dddddd Z0ej+ej,-ejddd ejddd e#e$fej"dddddf ejddd ej.ddd ej/ej/ej"ddd ejejej/ej/ej/ej/d	dddddd Z1ej+ej,-ejddd ejddd ee$fejdddddf ejddd ej.ddd ej/ej/ejddd ejejej/ej/ej/ej/d	dddddd Z2ej+dddej,jddd ej,jddd ej,jej,j/dddd Z3ej+dddddd Z4ej+dej,jej,jd d!dmd$d%Z5ej+dej,6e&ej,jej,jd&d!dnd'd(Z7ej+dej,6e&ej,jej,jd&d!dod)d*Z8ej+dej,jej,jd d!dpd+d,Z9ej+dej,jej,jd d!dqd-d.Z:ej+dd/drd1d2Z;ej+dd/dsd3d4Z<ej+dd/dtd5d6Z=ej+d7ej,>ej,j?ej,jdddd8ej,jej,j?ej,jdddd8ej,j?ej,j.ddd0d8gdej,jej,j@ej,jAd9dd:d;d< ZBej+d=ej,>ej,j?ej,j"dddd8ej,jej,j?ej,j"dddd8ej,j?ej,j.ddd0d8gdej,jej,j@ej,jAd9dd:d>d? ZCej+d@ej,j?ej,jdddd8ej,j?ej,jdddd8ej,j?ej,jdAddd8ej,j?ej,jdddd8ej,j?ej,jdAddd8ej,j?ej,jdddd8ej,j?ej,j.ddd0d8gej,j/ej,j>dBddCdDdE ZDej+dFej,j?ej,jdddd8ej,j?ej,j"dddd8ej,j?ej,j"dAddd8ej,j?ej,jdddd8ej,j?ej,jdAddd8ej,j?ej,jdddd8ej,j?ej,j.ddd0d8gej,j/ej,j>dBddCdGdH ZEej+dddIdJdK ZFej+dLej,j/iddCdMdN ZGdudOdPZHej+dd/dQdR ZIdSdT ZJdUdV ZKdWdX ZLe+ dYdZ ZMej+dd[d\d] ZNd^d_ ZOd ZPdZQdAZRd`ZSdaZTdbdc ZUddde ZVej+dej.ddd ejej/dfd0dgdhdi ZWej+ddLejid0djdkdl ZXdS )v    )warnN)
sparse_mulsparse_diff
sparse_sumarr_intersectsparse_dot_product)tau_rand_intnorm)
namedtupleCg:0yE>   FlatTreehyperplanesoffsetschildrenindices	leaf_sizec                 C   s   g | ]}t |d qS )1)bincount.0i r   a/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/pynndescent/rp_trees.py
<listcomp>*       r      dtype)	n_leftn_righthyperplane_vectorhyperplane_offsetmargindr   
left_indexright_indexT)localsfastmathnogilcachec                 C   s  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }t| | }t| | }	t|tk rd}t|	tk rd}	tj|tjd}
t|D ](}| ||f | | ||f |	  |
|< qt|
}t|tk rd}t|D ]}|
| | |
|< qd}d}t|j d tj	}t|j d D ]}d}t|D ]"}||
| | || |f  7 }qBt|tk rt|d ||< || dkr|d7 }n|d7 }n,|dkrd||< |d7 }nd||< |d7 }q2|dks|dkr8d}d}t|j d D ]6}t|d ||< || dkr,|d7 }n|d7 }q tj|tj
d}tj|tj
d}d}d}t|j d D ]>}|| dkr|| ||< |d7 }n|| ||< |d7 }qn|||
dfS )M  Given a set of ``graph_indices`` for graph_data points from ``graph_data``, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses cosine distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The original graph_data to be split
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r         ?r               )shaper   r	   absEPSnpemptyfloat32rangeint8int32)datar   	rng_statedimr(   r)   leftright	left_norm
right_normr$   r'   hyperplane_normr"   r#   sider   r&   indices_leftindices_rightr   r   r   angular_random_projection_split.   sv    ,

 





rF   c                 C   s  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }d}d}	tj|d tjd}
|
d| }|
|d }t|D ]D}| ||f | ||f A }|| ||f @ ||< || ||f @ ||< qd}t|D ]<}|t|
|  7 }|t| ||f  7 }|	t| ||f  7 }	qd}d}t|j d tj}t|j d D ]}d}t|D ]F}|t|| | || |f @  7 }|t|| | || |f @  8 }q^t|t	k rt|d ||< || dkr|d7 }n|d7 }n,|dkrd||< |d7 }nd||< |d7 }qN|dks*|dkrxd}d}t|j d D ]6}t|d ||< || dkrl|d7 }n|d7 }q@tj|tj
d}tj|tj
d}d}d}t|j d D ]>}|| dkr|| ||< |d7 }n|| ||< |d7 }q|||
dfS )r.   r   r   r0   r1   r    N)r2   r   r5   r6   uint8r8   popcntr9   r3   r4   r:   )r;   r   r<   r=   r(   r)   r>   r?   r@   rA   r$   Zpositive_hyperplane_componentZnegative_hyperplane_componentr'   Z
xor_vectorrB   r"   r#   rC   r   r&   rD   rE   r   r   r   )angular_bitpacked_random_projection_split   st    ,
 $





rI   c                 C   sp  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }d}tj|tjd}	t|D ]H}
| ||
f | ||
f  |	|
< ||	|
 | ||
f | ||
f   d 8 }qtd}d}t|j d tj}t|j d D ]}|}t|D ] }
||	|
 | || |
f  7 }qt|tk r^tt|d ||< || dkrT|d7 }n|d7 }q|dkrzd||< |d7 }qd||< |d7 }q|dks|dkrd}d}t|j d D ]6}t|d ||< || dkr|d7 }n|d7 }qtj|tj	d}tj|tj	d}d}d}t|j d D ]>}|| dkrL|| ||< |d7 }n|| ||< |d7 }q$|||	|fS )aP  Given a set of ``graph_indices`` for graph_data points from ``graph_data``, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses euclidean distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The original graph_data to be split
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r   r0   r           @r1   )
r2   r   r5   r6   r7   r8   r9   r3   r4   r:   )r;   r   r<   r=   r(   r)   r>   r?   r%   r$   r'   r"   r#   rC   r   r&   rD   rE   r   r   r   !euclidean_random_projection_split5  sd    ,
"






rK   )normalized_left_datanormalized_right_datarB   r   )r+   r,   r-   r*   c           "      C   sJ  t ||jd  }t ||jd  }|||k7 }||jd  }|| }|| }| || ||d   }	||| ||d   }
| || ||d   }||| ||d   }t|
}t|}t|tk rd}t|tk rd}|
| tj}|| tj}t|	|||\}}t|}t|tk r*d}t	|jd D ]}|| | ||< q8d}d}t
|jd tj}t	|jd D ]}d}| |||  ||| d   }||||  ||| d   }t||||\}}|D ]}||7 }qt|tk r(t |d ||< || dkr|d7 }n|d7 }n,|dkrDd||< |d7 }nd||< |d7 }qz|dksl|dkrd}d}t	|jd D ]6}t |d ||< || dkr|d7 }n|d7 }qtj
|tjd}tj
|tjd} d}d}t	|jd D ]>}|| dkr|| ||< |d7 }n|| | |< |d7 }qt||f}!|| |!dfS )  Given a set of ``graph_indices`` for graph_data points from a sparse graph_data set
    presented in csr sparse format as inds, graph_indptr and graph_data, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses cosine distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    inds: array
        CSR format index array of the matrix
    indptr: array
        CSR format index pointer array of the matrix
    data: array
        CSR format graph_data array of the matrix
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r   r/   r0   r1   r    )r   r2   r	   r3   r4   astyper5   r7   r   r8   r6   r9   r   r:   vstack)"indsindptrr;   r   r<   r(   r)   r>   r?   	left_inds	left_data
right_inds
right_datar@   rA   rL   rM   hyperplane_indshyperplane_datarB   r'   r"   r#   rC   r   r&   i_indsi_data_mul_datavalrD   rE   
hyperplaner   r   r   &sparse_angular_random_projection_split  s    *  





r_   )r+   r,   r-   c                 C   s  t t||jd  }t t||jd  }|||k7 }||jd  }|| }|| }| || ||d   }	||| ||d   }
| || ||d   }||| ||d   }d}t|	|
||\}}t|	|
||\}}|d }t||||t j\}}|D ]}||8 }qd}d}t 	|jd t j
}t|jd D ]}|}| |||  ||| d   }||||  ||| d   }t||||\}}|D ]}||7 }qt|tk rtt|d ||< || dkr|d7 }n|d7 }n,|dkrd||< |d7 }nd||< |d7 }qB|dks8|dkrd}d}t|jd D ]:}tt|d ||< || dkr~|d7 }n|d7 }qNt j	|t jd}t j	|t jd}d}d}t|jd D ]>}|| dkr|| ||< |d7 }n|| ||< |d7 }qt ||f}||||fS )rN   r   r   r0   rJ   r1   r    )r5   r3   r   r2   r   r   r   rO   r7   r6   r9   r8   r4   r:   rP   )rQ   rR   r;   r   r<   r(   r)   r>   r?   rS   rT   rU   rV   r%   rW   rX   Zoffset_indsoffset_datar]   r"   r#   rC   r   r&   rY   rZ   r[   r\   rD   rE   r^   r   r   r   (sparse_euclidean_random_projection_split6  sz       





ra   )left_node_numright_node_num)r,   r*         c	                 C   s  |j d |kr|dkrt| ||\}	}
}}t| |	|||||||d 	 t|d }t| |
|||||||d 	 t|d }|| || |t|t|f |tjdgtjd nJ|tjdgtjd |tj	  |tdtdf || d S Nr   r   r   r    g      )
r2   rK   make_euclidean_treelenappendr5   r:   arrayr7   infr;   r   r   r   r   point_indicesr<   r   	max_depthleft_indicesright_indicesr^   offsetrb   rc   r   r   r   rg     sP    



rg   )r   rb   rc   c	                 C   s  |j d |kr|dkrt| ||\}	}
}}t| |	|||||||d 	 t|d }t| |
|||||||d 	 t|d }|| || |t|t|f |tjdgtjd nJ|tjdgtjd |tj	  |tdtdf || d S rf   )
r2   rF   make_angular_treerh   ri   r5   r:   rj   r7   rk   rl   r   r   r   rr     sP    



rr   c	                 C   s  |j d |kr|dkrt| ||\}	}
}}t| |	|||||||d 	 t|d }t| |
|||||||d 	 t|d }|| || |t|t|f |tjdgtjd nJ|tjdgtjd |tj	  |tdtdf || d S )Nr   r   r   r       )
r2   rI   make_bit_treerh   ri   r5   r:   rj   rG   rk   rl   r   r   r   rt   0  sP    



rt   c                 C   s"  |j d |	kr|
dkrt| ||||\}}}}t| |||||||||	|
d  t|d }t| |||||||||	|
d  t|d }|| || |t|t|f |tjdgtjd nP|tjdgdggtjd |tj	  |tdtdf || d S rf   )
r2   ra   make_sparse_euclidean_treerh   ri   r5   r:   rj   float64rk   rQ   rR   r;   r   r   r   r   rm   r<   r   rn   ro   rp   r^   rq   rb   rc   r   r   r   ru   t  s\    



ru   c                 C   s"  |j d |	kr|
dkrt| ||||\}}}}t| |||||||||	|
d  t|d }t| |||||||||	|
d  t|d }|| || |t|t|f |tjdgtjd nP|tjdgdggtjd |tj	  |tdtdf || d S rf   )
r2   r_   make_sparse_angular_treerh   ri   r5   r:   rj   rv   rk   rw   r   r   r   rx     sZ    


rx   )r,   Fc                 C   s   t | jd t j}tjjt	}tjjt
}tjjt}tjjt}	|rpt| |||||	|||d	 nt| |||||	|||d	 |}
|	D ]}t||
krtt|}
qt||||	|
}|S )Nr   rn   )r5   aranger2   rO   r:   numbatypedList
empty_listdense_hyperplane_typeoffset_typechildren_typepoint_indices_typerr   rg   rh   r   r;   r<   r   angularrn   r   r   r   r   rm   max_leaf_sizepointsresultr   r   r   make_dense_tree  sD    r   c                 C   s   t |jd d t j}tjjt	}tjjt
}	tjjt}
tjjt}|rxt| |||||	|
||||d nt| |||||	|
||||d |}|D ]}t||krtt|}qt||	|
||S )Nr   r   ry   )r5   rz   r2   rO   r:   r{   r|   r}   r~   sparse_hyperplane_typer   r   r   rx   ru   rh   r   )rQ   rR   Zspdatar<   r   r   rn   r   r   r   r   rm   r   r   r   r   r   make_sparse_tree,  sJ    
r   c                 C   s   t | jd t j}tjjt	}tjjt
}tjjt}tjjt}	|rpt| |||||	|||d	 ntd|}
|	D ]}t||
krtt|}
qt||||	|
}|S )Nr   ry   z,Euclidean bit trees are not implemented yet.)r5   rz   r2   rO   r:   r{   r|   r}   r~   bit_hyperplane_typer   r   r   rt   NotImplementedErrorrh   r   r   r   r   r   make_dense_bit_treeb  s0    r   zb1(f4[::1],f4,f4[::1],i8[::1]))readonly)r&   r=   r'   )r+   r*   r-   c                 C   st   |}|j d }t|D ]}|| | ||  7 }qt|tk r`tt|d }|dkrZdS dS n|dkrldS dS d S Nr   r1   r   )r2   r8   r3   r4   r5   r   r^   rq   pointr<   r&   r=   r'   rC   r   r   r   select_side  s    
r   zb1(u1[::1],f4,u1[::1],i8[::1])c                 C   s   |}|j d }t|D ]8}|t| | || @  7 }|t| ||  || @  8 }qt|tk rtt|d }|dkrzdS dS n|dkrdS dS d S r   )r2   r8   rH   r3   r4   r5   r   r   r   r   r   select_side_bit  s    
r   z<i4[::1](f4[::1],f4[:,::1],f4[::1],i4[:,::1],i4[::1],i8[::1])r1   )noderC   )r*   r-   c                 C   sn   d}||df dkrNt || || | |}|dkr@||df }q||df }q|||df  ||df   S Nr   r   )r   r   r   r   r   r   r<   r   rC   r   r   r   search_flat_tree  s    r   z<i4[::1](u1[::1],u1[:,::1],f4[::1],i4[:,::1],i4[::1],i8[::1])c                 C   sn   d}||df dkrNt || || | |}|dkr@||df }q||df }q|||df  ||df   S r   )r   r   r   r   r   search_flat_bit_tree  s    r   )r+   r-   c           
      C   s   |}| j d }| d|d f dk r,|d8 }q| dd |f tj}| dd |f }|t||||7 }t|tk rt|d }	|	dkrdS dS n|dkrdS dS d S )Nr   r   r0   r1   )r2   rO   r5   r:   r   r3   r4   r   )
r^   rq   
point_inds
point_datar<   r&   Zhyperplane_sizerW   rX   rC   r   r   r   sparse_select_side   s"    

r   r   c           	      C   sp   d}||df dkrPt || || | ||}|dkrB||df }q||df }q|||df  ||df   S r   )r   )	r   r   r   r   r   r   r<   r   rC   r   r   r   search_sparse_flat_tree  s    r   c
              
      s  g }
du rt dt||du r(d}|jtt|dfdtjztj	
rtj|dd fdd	t|D }
n^|rtj|dd fd
d	t|D }
n,tj|dd fdd	t|D }
W n" tttfy   td Y n0 t|
S )zBuild a random projection forest with ``n_trees``.

    Parameters
    ----------
    data
    n_neighbors
    n_trees
    leaf_size
    rng_state
    angular

    Returns
    -------
    forest: list
        A list of random projection trees.
    N
   r      )size	sharedmemn_jobsrequirec              
   3   s4   | ],}t tjjj|  d V  qdS ry   N)joblibdelayedr   r   rR   r;   r   r   r;   r   rn   Z
rng_statesr   r   	<genexpr>T  s   
zmake_forest.<locals>.<genexpr>c                 3   s*   | ]"}t t|  d V  qdS r   )r   r   r   r   r   r   r   r   a  s   c                 3   s*   | ]"}t t|  d V  qdS r   )r   r   r   r   r   r   r   r   l  s   zRandom Projection forest initialisation failed due to recursionlimit being reached. Something is a little strange with your graph_data, and this may take longer than normal to compute.)maxr5   r:   randint	INT32_MIN	INT32_MAXrO   int64scipysparseZisspmatrix_csrr   Parallelr8   RuntimeErrorRecursionErrorSystemErrorr   tuple)r;   Zn_neighborsZn_treesr   r<   Zrandom_stater   r   Zbit_treern   r   r   r   r   make_forest,  s2    




r   c                 C   s   d}t t| jD ]0}| j| d dkr| j| d dkr|d7 }qtj||fdtjd}d}t t| jD ]V}| j| d dks| j| d dkrl| j| jd }| j| ||d |f< |d7 }ql|S )Nr   r   r   r    )r8   rh   r   r5   fullr:   r   r2   )treer   n_leavesr   r   Z
leaf_indexr   r   r   r   get_leaves_from_tree  s    $
$
r   c                    s8   t dd | D  tjddd fdd| D }|S )Nc                 S   s   g | ]
}|j qS r   )r   r   Zrp_treer   r   r   r     r   z.rptree_leaf_array_parallel.<locals>.<listcomp>r   r   r   c                 3   s   | ]}t t| V  qd S N)r   r   r   r   r   r   r   r     s   z-rptree_leaf_array_parallel.<locals>.<genexpr>)r5   r   r   r   )	rp_forestr   r   r   r   rptree_leaf_array_parallel  s
    r   c                 C   s,   t | dkrtt| S tdggS d S )Nr   r   )rh   r5   rP   r   rj   )r   r   r   r   rptree_leaf_array  s    r   c           
   
   C   s   | j | d dk rZ|t| j|  }| ||df< | ||df< | j| |||< ||fS | j| ||< | j| ||< |d ||df< |}	t| |||||d || j | d \}}|d ||	df< t| |||||d || j | d \}}||fS d S r   )r   rh   r   r   r   recursive_convert
r   r   r   r   r   Znode_numZ
leaf_startZ	tree_nodeZleaf_endZold_node_numr   r   r   r     s@    

r   c           
   
   C   s  | j | d dk rZ|t| j|  }| ||df< | ||df< | j| |||< ||fS | j| ||d d d | j| jd f< | j| ||< |d ||df< |}	t| |||||d || j | d \}}|d ||	df< t| |||||d || j | d \}}||fS d S r   )r   rh   r   r   r2   r   recursive_convert_sparser   r   r   r   r     sF    

r   )r-   c                 C   sP   d}d}t t| jD ]0}| j| d dk r>|d7 }|d7 }q|d7 }q||fS r   )r8   rh   r   )r   n_nodesr   r   r   r   r   num_nodes_and_leaves  s    

r   c              
   C   s0  t | \}}d}| jd jdkr\| jd jtjkr<|d }n|}tj||f| jd jd}n4d}|}tj|d|ftjd}d|d d dd d f< tj|tjd}tdtj	|dftjd }	tdtj	|tjd }
|rt
| |||	|
ddt| jd  n t| |||	|
ddt| jd  t|||	|
| jS )NFr   r   r1   r    Tr   )r   r   ndimr!   r5   rG   zerosr7   r:   Zonesr   rh   r   r   r   r   )r   	data_sizeZdata_dimr   r   Z	is_sparseZhyperplane_dimr   r   r   r   r   r   r   convert_tree_format  s,    
r   r      c                 C   s   | j | j| j| j| jf}|S r   r   r   r   r   r   r   denumbaify_tree&  s    r   c                 C   s(   t | t | t | t | t | t }|S r   )r   FLAT_TREE_HYPERPLANESFLAT_TREE_OFFSETSFLAT_TREE_CHILDRENFLAT_TREE_INDICESFLAT_TREE_LEAF_SIZEr   r   r   r   renumbaify_tree2  s    r   )intersectionr   r   )parallelr*   r-   c                 C   sr   d}t |jd D ]H}t|| | j| j| j| j|}t|| |}|t 	|jd dk7 }q|t 	|jd  S )Nr0   r   r   )
r{   Zpranger2   r   r   r   r   r   r   r7   )r   neighbor_indicesr;   r<   r   r   Zleaf_indicesr   r   r   r   
score_tree>  s    
r   )r,   r*   r-   c                 C   s   d}t | j}t|D ]}t|}| j| d }| j| d }|dkr|dkrt| j| jd D ]>}| j| | }	t||	 | j| }
|t|
jd dk7 }qdq|t|jd  S )Nr0   r   r   r   )	rh   r   r8   r{   r:   r   r2   r   r7   )r   r   r   r   r   r   Z
left_childZright_childjidxr   r   r   r   score_linked_treeW  s    

r   )rd   re   )rd   re   )rd   re   )rd   re   )rd   re   )rd   Fre   )rd   Fre   )rd   Fre   )NFFre   )Ywarningsr   localenumpyr5   r{   Zscipy.sparser   Zpynndescent.sparser   r   r   r   r   Zpynndescent.utilsr   r	   r   collectionsr
   	setlocale
LC_NUMERICr4   Ziinfor:   minr   r   r   r   r7   r   rv   r   rG   r   r   Ztypeofr   r   rj   r8   rH   ZnjittypesTupler   Zuint32rF   rI   rK   r_   ra   rg   ZListTyperr   rt   ru   rx   r   r   r   booleanArrayZintpZuint16r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s  "2
r"2
o"2
d

v  <
  ;
  <  D  B
)
   5
	
	




    
T

&
(

 	
