a
    vDfV                     @  s  d dl mZ d dlZd dlZd dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1m2Z2m3Z3m4Z4 e5de6fi Z7dd Z8dd Z9e%e.ddddddZ:dd Z;dd Z<d d! Z=d"d# Z>G d$d% d%ZG d&d' d'eZ?dS )(    )annotationsN)Integral)is_datetime64_any_dtype)Rolling)normalize_arg)tokenize)BlockwiseDepDict)methods)check_axis_keyword_deprecation)Scalar_Frame_get_divisions_map_partitions_get_meta_map_partitions_maybe_from_pandasapply_and_enforcenew_dd_objectpartitionwise_graph)from_pandas)_maybe_align_partitions)insert_meta_param_descriptionis_dask_collectionis_dataframe_likeis_series_like)unpack_collections)HighLevelGraph)
no_default)Mapplyderived_fromfuncnamehas_keywordCombinedOutputc                 C  s   d}| d ur,t |tr,| jd |kr,t||d urTt |trT|jd |krTt|dd | ||fD }t|}t|| d urt| nd |d urt|nd fS )NzqPartition size is less than overlapping window size. Try using ``df.repartition`` to increase the partition size.r   c                 S  s   g | ]}|d ur|qS N ).0pr#   r#   c/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/dataframe/rolling.py
<listcomp>9       z#_combined_parts.<locals>.<listcomp>)
isinstancer   shapeNotImplementedErrorr	   concatr!   len)Z	prev_partZcurrent_partZ	next_partbeforeaftermsgpartscombinedr#   r#   r&   _combined_parts*   s     
r3   c                 O  s   dd |D }|d \}}}dd |D }| |i |}	|d u rDd }t |tjrT|}d }
|jd dkrz|	jd |jd  }
|r|
r||
9 }|d u r|	j|d  S t |tjr|}|r|
r||
9 }|	j||  S )Nc                 S  s   g | ]}t |tr|qS r#   r)   r!   r$   dfr#   r#   r&   r'   F   r(   z!overlap_chunk.<locals>.<listcomp>r   c                 S  s"   g | ]}t |tr|d  n|qS r   r4   r$   argr#   r#   r&   r'   I   r(   )r)   datetime	timedeltar*   iloc)funcr.   r/   argskwargsdfsr2   Zprev_part_lengthZnext_part_lengthoutZ	expansionr#   r#   r&   overlap_chunkE   s(    rB   T)metaenforce_metadatatransform_divisionsalign_dataframesc             
     s  t |st|r"t|s"t|dn|}|f| }ttrDtt trXt  ttj	spt tj	rt
|jjjstdn,ttrdkrt tr dkstd|	dd}
|	dd}t| sJ |
durt g|R i |	}n(dt|  }
t|  g|R i |	}|
 d	| }
|rt|}zt|}W n6 ty } zt| d
|W Y d}~n
d}~0 0 dd |D }t||| |	|tdd |D r|
dft| tdd |D f|	fi}tj|
||d}t||
S g }g }t|||| ||	 fdd}|D ]l}t|trV||}| | | | q&t!|}t"|\}}|r| | |#| n
| | q&i }d}|	$ D ]:\}}t!|}t"|\}}|#| |||< |rd}qt%| dr&dd t&dd D }|'dt(| | fdd} |rTt)t*|
|  g|R |t+d|}n4|r^|	n|}t)t+|
|  g|R i |d|i}tj|
||d}t,||
S )a]	  Apply a function to each partition, sharing rows with adjacent partitions.

    Parameters
    ----------
    func : function
        The function applied to each partition. If this function accepts
        the special ``partition_info`` keyword argument, it will receive
        information on the partition's relative location within the
        dataframe.
    df: dd.DataFrame, dd.Series
    args, kwargs :
        Positional and keyword arguments to pass to the function.
        Positional arguments are computed on a per-partition basis, while
        keyword arguments are shared across all partitions. The partition
        itself will be the first positional argument, with all other
        arguments passed *after*. Arguments can be ``Scalar``, ``Delayed``,
        or regular Python objects. DataFrame-like args (both dask and
        pandas) will be repartitioned to align (if necessary) before
        applying the function; see ``align_dataframes`` to control this
        behavior.
    enforce_metadata : bool, default True
        Whether to enforce at runtime that the structure of the DataFrame
        produced by ``func`` actually matches the structure of ``meta``.
        This will rename and reorder columns for each partition,
        and will raise an error if this doesn't work,
        but it won't raise if dtypes don't match.
    before : int, timedelta or string timedelta
        The rows to prepend to partition ``i`` from the end of
        partition ``i - 1``.
    after : int, timedelta or string timedelta
        The rows to append to partition ``i`` from the beginning
        of partition ``i + 1``.
    transform_divisions : bool, default True
        Whether to apply the function onto the divisions and apply those
        transformed divisions to the output.
    align_dataframes : bool, default True
        Whether to repartition DataFrame- or Series-like args
        (both dask and pandas) so their divisions align before applying
        the function. This requires all inputs to have known divisions.
        Single-partition inputs will be split into multiple partitions.

        If False, all inputs must have either the same number of partitions
        or a single partition. Single-partition inputs will be broadcast to
        every partition of multi-partition inputs.
    $META

    See Also
    --------
    dd.DataFrame.map_overlap
       zMMust have a `DatetimeIndex` when using string offset for `before` and `after`r   z*before and after must be positive integerstokenNparent_metazoverlap--zx. If you don't want the partitions to be aligned, and are calling `map_overlap` directly, pass `align_dataframes=False`.c                 S  s   g | ]}t |tr|qS r#   )r)   r   r5   r#   r#   r&   r'      r(   zmap_overlap.<locals>.<listcomp>c                 s  s   | ]}t |tV  qd S r"   )r)   r   r8   r#   r#   r&   	<genexpr>   r(   zmap_overlap.<locals>.<genexpr>c                 S  s   g | ]}|j d fqS r7   )_namer8   r#   r#   r&   r'      r(   dependenciesc                   s   i }t | \}}|| t|  \}}|| dt|  }tt||  |D ]*\}\}}	}
||f}t||	|
 f||< qTtj	||| gd}t
||S )Nzoverlap-concat-rM   )_get_previous_partitionsupdate_get_nexts_partitionsr   	enumeratezipZ__dask_keys__r3   r   from_collectionsr   )r9   dskZprevs_parts_dskprevsZnexts_parts_dsknextsname_aiprevcurrentnextkeygraph)r/   r.   	divisionsrC   r#   r&   _handle_frame_argument   s    

z+map_overlap.<locals>._handle_frame_argumentTFpartition_infoc                 S  s   i | ]\}}|f||d qS ))numberdivisionr#   )r$   rY   rc   r#   r#   r&   
<dictcomp>  s   zmap_overlap.<locals>.<dictcomp>c                   s    |i |d| iS )Nra   r#   )ra   r>   r?   )	orig_funcr#   r&   r=     s    zmap_overlap.<locals>.func)rN   Z_func_metarN   )-r   r   r   r   r)   strpdZto_timedeltar:   r;   r   index_meta_nonemptyZinferred_type	TypeErrorr   
ValueErrorpopcallabler   r   r   r   r   allr   tupler   rT   r   r   r   appendr   r   extenditemsr    rR   insertr   r   r   rB   r   )r=   r6   r.   r/   rC   rD   rE   rF   r>   r?   namerI   rH   er@   Zlayerr^   Zargs2rN   r`   r9   Zarg2collectionsZkwargs3simplekvra   rU   Zkwargs4r#   )r/   r.   r_   rC   rf   r&   map_overlap`   s    B









r|   c           
      C  s  i }| j }d}dt| | }|rrt|trrg }td| jD ]*}||f}tj||f|f||< || q:|d nt|t	j
rt| j jdd }	||	k rt|g }td| jD ]2}||f}t||d f||f|f||< || q|d ndg| j }||fS )zE
    Helper to get the nexts partitions required for the overlap
    ziPartition size is less than specified window. Try using ``df.repartition`` to increase the partition sizezoverlap-append-rG   Nre   r   )rL   r   r)   r   rangenpartitionsr   headrr   r:   r;   ri   Seriesr_   diffr<   anyrm   _head_timedelta)
r6   r/   rU   df_nameZtimedelta_partition_messageZname_brW   rY   r]   deltasr#   r#   r&   rQ   =  s0    rQ   c                   s  i }| j  dt| | }|rjt|trjdg}t| jd D ]*}||f}tj |f|f||< || q:nLt|t	j
rt| j}| jdd }||k r\|d }	dg}t| jd D ]}||d  }
t|
| |	}|| | }}||kr|dkr|||  }|d }q||f}t fddt||d D  |d f|f||< || qnLdg}t| jd D ]6}||f}t |fg |d f|f||< || qpndg| j }||fS )zH
    Helper to get the previous partitions required for the overlap
    zoverlap-prepend-NrG   re   r   c                   s   g | ]} |fqS r#   r#   )r$   rz   r   r#   r&   r'     r(   z,_get_previous_partitions.<locals>.<listcomp>)rL   r   r)   r   r}   r~   r   tailrr   r:   r;   ri   r   r_   r   r<   r   max_tail_timedelta)r6   r.   rU   rX   rV   rY   r]   Zdivsr   Zpt_zZpt_iZlbfirstjr#   r   r&   rO   b  sP    


rO   c                 C  s   ||j | j  | k  S )zReturn rows of ``next_`` whose index is before the last
    observation in ``current`` + ``after``.

    Parameters
    ----------
    current : DataFrame
    next_ : DataFrame
    after : timedelta

    Returns
    -------
    overlapped : DataFrame
    )rj   r   )r[   Znext_r/   r#   r#   r&   r     s    r   c                   s   t  fdd| D }|S )a4  Return the concatenated rows of each dataframe in ``prevs`` whose
    index is after the first observation in ``current`` - ``before``.

    Parameters
    ----------
    current : DataFrame
    prevs : list of DataFrame objects
    before : timedelta

    Returns
    -------
    overlapped : DataFrame
    c                   s$   g | ]}||j j    k qS r#   )rj   min)r$   rZ   r.   r[   r#   r&   r'     r(   z#_tail_timedelta.<locals>.<listcomp>)r	   r,   )rV   r[   r.   selectedr#   r   r&   r     s    r   c                   @  s>  e Zd ZdZddddefddZdd Zedd	 Ze	d
d Z
dd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeedd Zeed.ddZeed/dd Zeed!d" Zeed#d$ Zeed%d& Zeed0d(d)Zeed*d+ ZeZd,d- ZdS )1r   z%Provides rolling window calculations.NFc                 C  sj   || _ || _|| _|| _|| _|| _|jjf i |   t	| jt
rJd nd| _| jdv rftdt d S )Nfreq)rj   rG   rowszTUsing axis=1 in Rolling has been deprecated and will be removed in a future version.)objwindowmin_periodscenteraxiswin_typerg   rolling_rolling_kwargsr)   int	_win_typewarningswarnFutureWarning)selfr   r   r   r   r   r   r#   r#   r&   __init__  s    	
zRolling.__init__c                 C  s.   | j | j| j| jd}| jtur*| j|d< |S )N)r   r   r   r   r   )r   r   r   r   r   r   r   r?   r#   r#   r&   r     s    

zRolling._rolling_kwargsc                 C  s,   | j dv p*t| jtr | jdkp*| jjdkS )zm
        Indicator for whether the object has a single partition (True)
        or multiple (False).
        )rG   columnsrG   )r   r)   r   r   r   r~   r   r#   r#   r&   _has_single_partition  s
    

zRolling._has_single_partitionc                 O  sJ   t    | jf i |}W d    n1 s,0    Y  t|||i |S r"   )r
   r   getattr)r6   rolling_kwargsrv   r>   r?   r   r#   r#   r&   pandas_rolling_method  s    .zRolling.pandas_rolling_methodc                 O  s   |   }| j| jj||g|R i |}| jrT| jj| j||g|R ||d|S | jrt| jd }| j| d }n*| jdkrt	
| j}d}n| jd }d}t| j| j||||g|R ||d|S )N)rH   rC      rG   r   r   )r   r   r   rk   r   Zmap_partitionsr   r   r   ri   Z	Timedeltar|   )r   method_namer>   r?   r   rC   r.   r/   r#   r#   r&   _call_method  sX    





zRolling._call_methodc                 C  s
   |  dS )Ncountr   r   r#   r#   r&   r   /  s    zRolling.countc                 C  s
   |  dS )Ncovr   r   r#   r#   r&   r   3  s    zRolling.covc                 C  s
   |  dS )Nsumr   r   r#   r#   r&   r   7  s    zRolling.sumc                 C  s
   |  dS )Nmeanr   r   r#   r#   r&   r   ;  s    zRolling.meanc                 C  s
   |  dS )Nmedianr   r   r#   r#   r&   r   ?  s    zRolling.medianc                 C  s
   |  dS )Nr   r   r   r#   r#   r&   r   C  s    zRolling.minc                 C  s
   |  dS )Nr   r   r   r#   r#   r&   r   G  s    zRolling.maxrG   c                 C  s   | j dddS )NstdrG   ddofr   r   r   r#   r#   r&   r   K  s    zRolling.stdc                 C  s   | j dddS )NvarrG   r   r   r   r#   r#   r&   r   O  s    zRolling.varc                 C  s
   |  dS )Nskewr   r   r#   r#   r&   r   S  s    zRolling.skewc                 C  s
   |  dS )Nkurtr   r   r#   r#   r&   r   W  s    zRolling.kurtc                 C  s   |  d|S )Nquantiler   )r   r   r#   r#   r&   r   [  s    zRolling.quantilecythonc              	   C  s(   |pi }|pd}| j d||||||dS )Nr#   r   )rawengineengine_kwargsr>   r?   r   )r   r=   r   r   r   r>   r?   r#   r#   r&   r   _  s    
zRolling.applyc                 O  s   | j d|g|R i |S )Naggr   )r   r=   r>   r?   r#   r#   r&   	aggregateu  s    zRolling.aggregatec              	   C  sJ   dd }|   }| j|d< | j|d< dddd t| |d	D S )
Nc                 S  s    | \}}dddddd}|| S )Nr   rG   r         r   r   r   r   r   r#   )itemrz   r{   _orderr#   r#   r&   order|  s    zRolling.__repr__.<locals>.orderr   r   zRolling [{}],c                 s  s(   | ] \}}|d ur| d| V  qd S )N=r#   )r$   rz   r{   r#   r#   r&   rK     s   z#Rolling.__repr__.<locals>.<genexpr>)r]   )r   r   r   formatjoinsortedrt   )r   r   r   r#   r#   r&   __repr__{  s    


zRolling.__repr__)rG   )rG   )Fr   NNN)__name__
__module____qualname____doc__r   r   r   propertyr   staticmethodr   r   r   
pd_Rollingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r#   r#   r&   r     s`   


(









     
r   c                      sJ   e Zd Zd fdd	Z fddZedddd	d
Z fddZ  ZS )RollingGroupbyNFr   c           	        s   |j | _ |j| _|j}| jd urnt| jtr6| jg}n
t| j}t|jtrZ||j n|	|j || }t
 j||||||d d S )Nr   )_groupby_kwargsZ_slice_groupby_slicer   r)   rh   listZbyrr   rs   superr   )	r   groupbyr   r   r   r   r   r   Zsliced_plus	__class__r#   r&   r     s&    	


zRollingGroupby.__init__c                   s(   t   }|dd dv r$|d |S )Nr   )r   rj   )r   r   getrn   r   r   r#   r&   r     s    

zRollingGroupby._rolling_kwargsgroupby_kwargsgroupby_slicec          	      O  sH   | j f i |}|r|| }|jf i |}t|||i |jddS )Nre   )level)r   r   r   Z
sort_index)	r6   r   rv   r   r   r>   r?   r   r   r#   r#   r&   r     s
    
z$RollingGroupby.pandas_rolling_methodc                   s$   t  j|g|R | j| jd|S )Nr   )r   r   r   r   )r   r   r>   r?   r   r#   r&   r     s    zRollingGroupby._call_method)NNFNr   )	r   r   r   r   r   r   r   r   __classcell__r#   r#   r   r&   r     s        !r   )@
__future__r   r:   r   Znumbersr   Zpandasri   Zpandas.api.typesr   Zpandas.core.windowr   r   Zdask.array.corer   Z	dask.baser   Zdask.blockwiser   Zdask.dataframer	   Zdask.dataframe._compatr
   Zdask.dataframe.corer   r   r   r   r   r   r   r   Zdask.dataframe.ior   Zdask.dataframe.multir   Zdask.dataframe.utilsr   r   r   r   Zdask.delayedr   Zdask.highlevelgraphr   Zdask.typingr   Z
dask.utilsr   r   r   r   r    typerq   r!   r3   rB   r|   rQ   rO   r   r   r   r#   r#   r#   r&   <module>   sH   (
 ]%A L