a
    uDfX                     @  s   d dl mZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ dddZdd	d
Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    )annotationsN)partial)islice)Bagc                 C  s   t | ||d}|t|S )a  Chooses k unique random elements from a bag.

    Returns a new bag containing elements from the population while
    leaving the original population unchanged.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.sample(b, 3).compute())  # doctest: +SKIP
    [1, 3, 5]
    
populationksplit_every)_samplemap_partitions_finalize_sampler   r   r	   res r   \/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/bag/random.pysample   s    r      c                 C  s   t | ||d}|t|S )a7  
    Return a k sized list of elements chosen with replacement.

    Parameters
    ----------
    population: Bag
        Elements to sample.
    k: integer, optional
        Number of elements to sample.
    split_every: int (optional)
        Group partitions into groups of this size while performing reduction.
        Defaults to 8.

    Examples
    --------
    >>> import dask.bag as db
    >>> from dask.bag import random
    >>> b = db.from_sequence(range(5), npartitions=2)
    >>> list(random.choices(b, 3).compute())  # doctest: +SKIP
    [1, 1, 5]
    r   )_sample_with_replacementr   r   r   r   r   r   choices(   s    r   c                 C  s   g }g }d}| D ]4}|\}}| | ||7 }t|}	|||	f q||krZ|sZ||fS g }
|D ]*\}}	|	dkrb||	|  }|
|g|	 7 }
qb|rtjnt}|||
|d|fS )aq  
    Reduce function used on the sample and choice functions.

    Parameters
    ----------
    reduce_iter : iterable
        Each element is a tuple coming generated by the _sample_map_partitions function.
    replace: bool
        If True, sample with replacement. If False, sample without replacement.

    Returns a sequence of uniformly distributed samples;
    r   )r   weightsr   )extendlenappendrndr   &_weighted_sampling_without_replacement)reduce_iterr   replaceZns_kssniZs_iZn_iZk_ipZp_iZsample_funcr   r   r   _sample_reduceB   s$    
r!   c                   s4   fddt tD } fddt||D S )zk
    Source:
        Weighted random sampling with a reservoir, Pavlos S. Efraimidis, Paul G. Spirakis
    c                   s&   g | ]}t t  |  |fqS r   )mathlogr   random).0r   )r   r   r   
<listcomp>m       z:_weighted_sampling_without_replacement.<locals>.<listcomp>c                   s   g | ]} |d   qS )r   r   )r%   x)r   r   r   r&   n   r'   )ranger   heapqnlargest)r   r   r   eltr   )r   r   r   r   h   s    r   c                 C  s4   |dk rt d| jtt|dtt|ddt|dS )Nr   z(Cannot take a negative number of samplesr   Fr   r   Zout_typer	   )
ValueError	reductionr   _sample_map_partitionsr!   r   r   r   r   r   r
   q   s    
r
   c                 C  s    | d }t ||k rtd|S )Nr   zSample larger than population)r   r0   )r   r   r   r   r   r   r   |   s    r   c           	      C  s   g d }}t | }t||D ]}|| |d7 }qttt | }|d t| }t	||D ]N\}}||kr||t
|< |ttt | 9 }|t|7 }|d7 }qf||fS )z
    Reservoir sampling strategy based on the L algorithm
    See https://en.wikipedia.org/wiki/Reservoir_sampling#An_optimal_algorithm
    r   r   )iterr   r   r"   expr#   r   r$   
_geometric	enumerate	randrange)	r   r   	reservoirstream_lengthstreamewnxtr   r   r   r   r2      s    



r2   c                 C  s$   | j tt|dtt|ddt|dS )Nr-   Tr.   r/   )r1   r   '_sample_with_replacement_map_partitionsr!   r   r   r   r   r   r      s    
r   c                   s   t | }t|  fddt|D d }}dd t|D }dd |D }t|}t|dD ]n\} ||krt|D ]D\}	}
|
|krv ||	< ||	  t 9  < ||	  t||	 7  < qvt|}|d7 }q^||fS )z
    Reservoir sampling with replacement, the main idea is to use k reservoirs of size 1
    See Section Applications in http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
    c                   s   g | ]} qS r   r   r%   _r;   r   r   r&      r'   z;_sample_with_replacement_map_partitions.<locals>.<listcomp>r   c                 S  s   g | ]}t  qS r   )r   r$   r?   r   r   r   r&      r'   c                 S  s   g | ]}t |qS r   )r5   )r%   Zwir   r   r   r&      r'   )r3   nextr)   minr6   r   r$   r5   )r   r   r:   r8   r9   r<   r=   Zmin_nxtr   jr   r   rA   r   r>      s     
r>   c                 C  s(   t ttddtd|   d S )Nr   r   )intr"   r#   r   uniform)r    r   r   r   r5      s    r5   )N)r   N)
__future__r   r*   r"   r$   r   	functoolsr   	itertoolsr   Zdask.bag.corer   r   r   r!   r   r
   r   r2   r   r>   r5   r   r   r   r   <module>   s    

&		