a
    vDfy9                     @  s|  d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZm Z  d d	l!m"Z"m#Z#m$Z$ d d
l%m&Z& eZ'eZ(dcddZ)ddddZ*e&dddd Z+dd Z,deddZ-dfddZ.dd Z/dd Z0dd Z1dd  Z2d!d" Z3d#d$ Z4d%d& Z5dgd(d)Z6d*d+ Z7d,d- Z8d.d/ Z9d0d1 Z:d2d3 Z;d4d5 Z<d6d7 Z=dhd8d9Z>did:d;Z?djd<d=Z@d>d? ZAd@dA ZBdBdC ZCdDdE ZDdFdG ZEdkdHdIZFdJdK ZGdLdM ZHdNdO ZIdPdQ ZJdRdS ZKdTdU ZLdVdW ZMdXdY ZNdZd[ ZOd\d] ZPd^d_ ZQeeOd`daZReeOdbdaZSeePd`daZTeePdbdaZUeeQd`daZVeeQdbdaZWdS )l    )annotationsN)partial)is_extension_array_dtype)PerformanceWarning)	partition)PANDAS_GE_131PANDAS_GE_140PANDAS_GE_200!check_apply_dataframe_deprecation$check_applymap_dataframe_deprecationcheck_convert_dtype_deprecationcheck_observed_deprecation)	concatconcat_dispatchgroup_split_dispatchhash_object_dispatchis_categorical_dtypeis_categorical_dtype_dispatchtolisttolist_dispatchunion_categoricals)is_dataframe_likeis_index_likeis_series_like)_deprecated_kwargc                 C  s$   |du r| j | S | j ||f S dS )z"
    .loc for known divisions
    N)locdfZiindexercindexer r   c/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/dataframe/methods.pyr   .   s    
r   c                 C  s   | j d d |f S N)iloc)r   r   r   r   r    r"   8   s    r"   Zconvert_dtypec              	   O  sn   t  T t . | j|i |W  d    W  d    S 1 sB0    Y  W d    n1 s`0    Y  d S r!   )r   r
   applyr   argskwargsr   r   r    r#   <   s    r#   c                 O  s:   t    | j|i |W  d    S 1 s,0    Y  d S r!   )r   applymapr$   r   r   r    r'   C   s    r'   c                 C  s>   zt | ||W S  ty8   | dj dd|f  Y S 0 dS )z$
    .loc for unknown divisions
    r   N)r   KeyErrorheadr   r   r   r    try_locH   s    r*   Tc           
      C  s(  t | jdkr| S tr6|dur,tjdtd i }d}n|p<d}d|i}|dkr| jjs|dur|rr| | j|k } n| | j|k } |dur|r| | j|k } n| | j|k  } | S t| ||| }|s|dur|jj|dfi |}|j	d| }|s$|dur$|jj|dfi |}	|j	|	d }|S )	aY  Index slice start/stop. Can switch include/exclude boundaries.

    Examples
    --------
    >>> df = pd.DataFrame({'x': [10, 20, 30, 40, 50]}, index=[1, 2, 2, 3, 4])
    >>> boundary_slice(df, 2, None)
        x
    2  20
    2  30
    3  40
    4  50
    >>> boundary_slice(df, 1, 3)
        x
    1  10
    2  20
    2  30
    3  40
    >>> boundary_slice(df, 1, 3, right_boundary=False)
        x
    1  10
    2  20
    2  30

    Empty input DataFrames are returned

    >>> df_empty = pd.DataFrame()
    >>> boundary_slice(df_empty, 1, 3)
    Empty DataFrame
    Columns: []
    Index: []
    r   NzXThe `kind` argument is no longer used/supported. It will be dropped in a future release.)categoryr   kindleftright)
lenindexr   warningswarnFutureWarningis_monotonic_increasinggetattrZget_slice_boundr"   )
r   startstopZright_boundaryZleft_boundaryr,   Z	kind_optsresultZright_indexZ
left_indexr   r   r    boundary_sliceR   s<     r9   c                 C  s   t |  S r!   )pdZnotnullsumxr   r   r    index_count   s    r>   c                 C  sh   zDt jdd$ t d | | W  d    W S 1 s80    Y  W n tyb   ttj Y S 0 d S )NT)recordalways)r1   catch_warningssimplefilterZeroDivisionErrornpfloat64nan)snr   r   r    mean_aggregate   s    
,rI   c                 C  s(   t | tjst | tr$tj| |dS | S Nr0   
isinstancerD   Zndarraylistr:   Series)Z	array_varr0   r   r   r    wrap_var_reduction   s    rP   c                 C  s(   t | tjst | tr$tj| |dS | S rJ   rL   )Z
array_skewr0   r   r   r    wrap_skew_reduction   s    rQ   c                 C  s(   t | tjst | tr$tj| |dS | S rJ   rL   )Zarray_kurtosisr0   r   r   r    wrap_kurtosis_reduction   s    rR   c                 C  s   t | |g}|j|dS rJ   )r:   r   reindex)Znumeric_varZtimedelta_varcolumnsvarsr   r   r    var_mixed_concat   s    rV   c                 C  sf   t | dksJ g }tdd | D t d}|D ] }|D ]}||vr6|| q6q.tj| ddd|S )Nr   c                 s  s   | ]}|j V  qd S r!   rK   ).0r=   r   r   r    	<genexpr>       z%describe_aggregate.<locals>.<genexpr>)key   F)axissort)r/   sortedappendr:   r   rS   )valuesnamesZvalues_indexesZidxnamesnamer   r   r    describe_aggregate   s    rc   Fc                 C  s<  t | dksJ | \}}}}}}	t|r6t| }
nt|}
|rxt|}t|}t|}t|	}	|dd }|rt|}t|	}	|dd }|r|
||gddgd}n|
||||gg dd}d	d
 t|j	D |_	t|r|
t|kr| }|
|	gdgd}t
|||gdd}t|r8||_|S )N   c                 S  s
   t | S r!   )r:   to_timedeltar<   r   r   r    <lambda>   rY   z,describe_numeric_aggregate.<locals>.<lambda>c                 S  s
   t | S r!   )r:   to_datetimer<   r   r   r    rf      rY   countminrK   )rh   meanstdri   c                 S  s   g | ]}|d  ddqS )d   g%r   )rW   lr   r   r    
<listcomp>   rY   z.describe_numeric_aggregate.<locals>.<listcomp>maxF)r]   )r/   r   typeZto_framer:   re   r#   rg   r   r0   r   rb   )statsrb   Zis_timedelta_colZis_datetime_colrh   rj   rk   ri   qrq   typZpart1Zpart3r8   r   r   r    describe_numeric_aggregate   s4    






rv   c                 C  sZ  t | }|dk}|dk}|s$|s$J |r4| \}}}n| \}}}}}	t |dkrddg}
ddg}d }|
tjtjg |ddg t}tj|
|||d}|S |jd }|jd }g d	}||g}|r<|j	}t
|}|jd ur|d ur||}n
||}tj
||d
}tj
|	|d
}|ddg |||||g n|||g tj|||dS )N      r   rh   uniquetopfreq)r0   dtyperb   )ry   rh   rz   r{   )tzfirstlast)r0   rb   )r/   extendrD   rF   objectr:   rO   r0   r"   r}   	TimestamptzinfoZ
tz_convertZtz_localize)rs   rb   Zargs_lenZis_datetime_columnZis_categorical_columnZnuniquerh   Ztop_freqZmin_tsZmax_tsdatar0   r|   r8   rz   r{   r`   r}   r~   r   r   r   r    describe_nonnumeric_aggregate   s@    



r   c                 C  s   |du r|S | ||S dS )zApply aggregation function within a cumulative aggregation

    Parameters
    ----------
    aggregate: function (a, a) -> a
        The aggregation function, like add, which is used to and subsequent
        results
    x:
    y:
    Nr   )Z	aggregater=   yr   r   r    _cum_aggregate_apply/  s    r   c                 C  s$   | d u r|S |d u r| S | | S d S r!   r   r=   r   r   r   r    cumsum_aggregate@  s
    r   c                 C  s$   | d u r|S |d u r| S | | S d S r!   r   r   r   r   r    cumprod_aggregateI  s
    r   c                 C  sF   t | st| r2| j| |k |  B || jd dS | |k r>| S |S d S Nr[   r\   r   r   whereisnullndimr   r   r   r    cummin_aggregateR  s    "r   c                 C  sF   t | st| r2| j| |k|  B || jd dS | |kr>| S |S d S r   r   r   r   r   r    cummax_aggregateY  s    "r   c                 G  s   t td|}tt|t| j@ o(t }| jt|d} t : tj	ddt
d | D ]\}}|| |< q\W d    n1 s0    Y  | S )N   )deepignorez DataFrame is highly fragmented *)messager+   )dictr   boolsetrT   r   copyr1   rA   filterwarningsr   items)r   pairsr   rb   valr   r   r    assign`  s    
(r   c                 C  s*   |   }t|s&t|s&tj||d}|S )N)rb   )ry   r   r   r:   rO   )r=   Zseries_nameoutr   r   r    ry   r  s    ry   c                 K  sB   t  ( | jf ddi| W  d    S 1 s40    Y  d S )Nlevelr   )r   groupbyr;   )r=   r]   	ascendinggroupby_kwargsr   r   r    value_counts_combine{  s    r   c                 K  sN   t | fi |}|r,||d ur"|n|  }|r<|j|d}trJ|rJd|_|S )N)r   Z
proportion)r   r;   Zsort_valuesr	   rb   )r=   total_lengthr]   r   	normalizer   r   r   r   r    value_counts_aggregate  s    r   c                 C  s   | j S r!   )nbytesr<   r   r   r    r     s    r   c                 C  s   | j S r!   )sizer<   r   r   r    r     s    r   c                 C  s   | j }t|r|t}|S r!   )r`   r   astyper   )r   r`   r   r   r    r`     s    
r`   c                 C  s,   t j|}t| dkr(| j|||dS | S )Nr   )Zrandom_statefracreplace)rD   randomZRandomStater/   sample)r   stater   r   rsr   r   r    r     s    r   c                 C  s    | j |dd} | j|| _| S r   )ZdroprT   r   )r   rT   r|   r   r   r    drop_columns  s    r   c                 C  s@   |rt | | }n|  }|r<| jjdd r<td|S )Nr   r   zAll NaN partition encountered in `fillna`. Try using ``df.repartition`` to increase the partition size, or specify `limit` in `fillna`.)r5   Zfillnar   r`   allany
ValueError)r   methodcheckr   r   r   r    fillna_check  s    r   c                 C  s   | j ddd S Nr   F)r   observed)r   r;   r   r   r   r    	pivot_agg  s    r   c                 C  s   | j ddd S r   )r   r~   r   r   r   r    pivot_agg_first  s    r   c                 C  s   | j ddd S r   )r   r   r   r   r   r    pivot_agg_last  s    r   c              	   C  s   t j| |||ddddS )Nr;   Fr0   rT   r`   ZaggfuncZdropnar   r:   pivot_tabler   r0   rT   r`   r   r   r    	pivot_sum  s    r   c              	   C  s    t j| |||ddddtjS )Nrh   Fr   )r:   r   r   rD   rE   r   r   r   r    pivot_count  s    r   c              	   C  s   t j| |||ddddS )Nr~   Fr   r   r   r   r   r    pivot_first  s    r   c              	   C  s   t j| |||ddddS )Nr   Fr   r   r   r   r   r    
pivot_last  s    r   c                 C  s   |   } || _| S r!   )r   r0   )r   indr   r   r    assign_index  s    r   c                 C  sJ   | j rd }n,t| r| n| j}t| ||d |d gg}tj|g ddS )Nr   	monotonicr~   r   )r   rT   )emptyr   r"   r5   r:   	DataFrame)r=   propr   r   r   r    _monotonic_chunk  s
    r   c                 C  sf   | j rd }nHt| ddg   }| d  o:t||}||jd |jd gg}tj|g ddS )Nr~   r   r   r   r   r   )rT   )	r   r:   rO   to_numpyravelr   r5   r"   r   )concatenatedr   r   rG   Zis_monotonicr   r   r    _monotonic_combine  s    r   c                 C  s0   t | ddg   }| d  o.t||S )Nr~   r   r   )r:   rO   r   r   r   r5   )r   r   rG   r   r   r    _monotonic_aggregate  s    r   r4   )r   Zis_monotonic_decreasing)N)N)N)TTN)NFF)N)TF)NTFF)T)X
__future__r   r1   	functoolsr   numpyrD   Zpandasr:   Zpandas.api.typesr   Zpandas.errorsr   Ztlzr   Zdask.dataframe._compatr   r   r	   r
   r   r   r   Zdask.dataframe.dispatchr   r   r   r   r   r   r   r   r   Zdask.dataframe.utilsr   r   r   Z
dask.utilsr   Zhash_dfZgroup_splitr   r"   r#   r'   r*   r9   r>   rI   rP   rQ   rR   rV   rc   rv   r   r   r   r   r   r   r   ry   r   r   r   r   r`   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zmonotonic_increasing_chunkZmonotonic_decreasing_chunkZmonotonic_increasing_combineZmonotonic_decreasing_combineZmonotonic_increasing_aggregateZmonotonic_decreasing_aggregater   r   r   r    <module>   s   $,






K	 
*1		
	
 
	


