a
    vDf	                     @  sP   d Z ddlmZ ddlZddlZddlmZ dd Z	dd Z
d	d
 Zdd ZdS )uc  Implementation of HyperLogLog

This implements the HyperLogLog algorithm for cardinality estimation, found
in

    Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier.
        "HyperLogLog: the analysis of a near-optimal cardinality estimation
        algorithm". 2007 Conference on Analysis of Algorithms. Nice, France
        (2007)

    )annotationsN)hash_pandas_objectc                 C  s:   t j| dt d> }|jddt}d|jdd S )zGCompute the position of the first nonzero bit for each int in an array.       Zaxis!   )npZbitwise_andouterarangeZcumsumastypeboolsum)abits r   g/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/dataframe/hyperloglog.pycompute_first_bit   s    r   c           	      C  s   d|  krdksn t dd| }d|> }t| dd}t|tjrL|j}|tj}||? }t	|}t
||d}|d	 d
 }|jt|ddjtjS )N      zb should be between 8 and 16r   r   F)index)j	first_bitr   r   r   )Z
fill_value)
ValueErrorr   
isinstancepdZSeriesZ_valuesr   r   Zuint32r   Z	DataFramegroupbymaxZreindexr
   valuesZuint8)	objbZnum_bits_discardedmhashesr   r   ZdfZseriesr   r   r   compute_hll_array   s    r"   c                 C  s(   d|> }|  t| | |} | jddS )Nr   r   r   )Zreshapelenr   )Msr   r    r   r   r   reduce_state6   s    r%   c                 C  s   d|> }t | |}ddd|   }|| d|d    | }|d| k rp|dk }|rp|t||  S |dkrd	t| d
  S |S )Nr   gZӼ?g$C?g       @Zf8g      @r   gAl     l        )r%   r   r   r   loglog1p)r$   r   r    MalphaEVr   r   r   estimate_count?   s    
 r,   )__doc__
__future__r   numpyr   Zpandasr   Zpandas.utilr   r   r"   r%   r,   r   r   r   r   <module>   s   	