a
    vDfWe                     @  s  U d dl mZ d dlZd dlmZ d dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lmZmZmZmZmZ d dlmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8m9Z9m:Z: d dl;m<Z<m=Z= d dl>m?Z?m@Z@mAZA G dd deZBeddeBddZCe4DejEdd ZFe4DejGe4DejHe4DejIe4DejJdd ZFe(DejejfdbddZFe(DejdcddZFejejejejKfZLdeMd< zd dlNmOZP eLePjQf7 ZLW n eRy0   Y n0 e-DejfddddZSe/Dejfdd  ZTe"Dejfd!d" ZUe,Dejd#d$ ZVe+DejWe(DejWded%d&ZXe)DeLdfd'd(ZYe+DeZd)d* Z[e+Dejd+d, Z\e+Dejd-d. Z]e+Dejdgd/d0Z^e*Ded1d2 Z_e*Dej`d3d4 Zae1Dejejejejbfdhd6d7Zce#Dejd8d9 Zde#Dejd:d; Zee#Dejd<d= Zfe#Ded>d? Zge#DeZd@dA Zhe&DejejejfdidDdEZiG dFdG dGe<ejZke$DejejejfdjdHdIZle!DejejejfdkdKdLZmeDejejejfdldMdNZne0Dej`ejejejbfdOdP Zoe'DejejejpjqjrejEfdQdR Zse%DejejfdSdT ZteDejejfdmdVdWZue.DejejejfdXdY ZvG dZd[ d[eBZweCxdew  e!yd\e"yd\e$yd\e#yd\e&yd\e+yd\e(yd\e)yd\eyd\e/yd\e0yd\d]d^ Zze*yd_e0yd_d`da Z{dS )n    )annotationsN)Iterable)	is_scalarunion_categoricals)Array)percentile_lookup_percentile)CreationDispatchDaskBackendEntrypoint)PANDAS_GE_220is_any_real_numeric_dtype)	DataFrameIndexScalarSeries_Frame)categorical_dtype_dispatchconcatconcat_dispatchfrom_pyarrow_table_dispatchget_parallel_typegroup_split_dispatchgrouper_dispatchhash_object_dispatchis_categorical_dtype_dispatchmake_meta_dispatchmake_meta_objmeta_lib_from_arraymeta_nonemptypartd_encode_dispatchpyarrow_schema_dispatchto_pandas_dispatchto_pyarrow_table_dispatchtolist_dispatchunion_categoricals_dispatch)make_array_nonemptymake_scalar)_empty_series_nonempty_scalar_scalar_from_dtypeis_float_na_dtypeis_integer_na_dtype)SimpleSizeofsizeof)is_arraylikeis_series_liketypenamec                   @  s   e Zd ZdZedddddZeddd	d
ZeddddZeddddZeddddZ	edddddZ
dS )DataFrameBackendEntrypointzoDask-DataFrame version of ``DaskBackendEntrypoint``

    See Also
    --------
    PandasBackendEntrypoint
    dictint)datanpartitionsc                K  s   t dS )a  Create a DataFrame collection from a dictionary

        Parameters
        ----------
        data : dict
            Of the form {field : array-like} or {field : dict}.
        npartitions : int
            The desired number of output partitions.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.io.from_dict
        NNotImplementedError)r5   r6   kwargs r:   d/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/dataframe/backends.py	from_dict9   s    z$DataFrameBackendEntrypoint.from_dictz
str | list)pathc                 K  s   t dS )a$  Read Parquet files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.parquet.core.read_parquet
        Nr7   r=   r9   r:   r:   r;   read_parquetL   s    z'DataFrameBackendEntrypoint.read_parquet)url_pathc                 K  s   t dS )a  Read json files into a DataFrame collection

        Parameters
        ----------
        url_path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.json.read_json
        Nr7   )r@   r9   r:   r:   r;   	read_json]   s    z$DataFrameBackendEntrypoint.read_jsonc                 K  s   t dS )a  Read ORC files into a DataFrame collection

        Parameters
        ----------
        path : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.orc.core.read_orc
        Nr7   r>   r:   r:   r;   read_orcn   s    z#DataFrameBackendEntrypoint.read_orc)urlpathc                 K  s   t dS )a  Read CSV files into a DataFrame collection

        Parameters
        ----------
        urlpath : str or list
            Source path(s).
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.csv.read_csv
        Nr7   )rC   r9   r:   r:   r;   read_csv   s    z#DataFrameBackendEntrypoint.read_csvstr)patternkeyc                 K  s   t dS )aT  Read HDF5 files into a DataFrame collection

        Parameters
        ----------
        pattern : str or list
            Source path(s).
        key : str
            Group identifier in the store.
        **kwargs :
            Optional backend kwargs.

        See Also
        --------
        dask.dataframe.io.hdf.read_hdf
        Nr7   )rF   rG   r9   r:   r:   r;   read_hdf   s    z#DataFrameBackendEntrypoint.read_hdfN)__name__
__module____qualname____doc__staticmethodr<   r?   rA   rB   rD   rH   r:   r:   r:   r;   r2   1   s   r2   Z	dataframepandasdataframe_creation_dispatch)module_namedefaultZentrypoint_classnamec                 C  s   t | S N)r*   dtyper:   r:   r;   _   s    rV   c                 C  s   | S rS   r:   xr:   r:   r;   rV      s    c                 C  s*   | j d d jdd}|jjdd|_|S Nr   T)deep)iloccopyindex)rX   r]   outr:   r:   r;   rV      s    c                 C  s   | dd j ddS rY   r\   rX   r]   r:   r:   r;   rV      s    ztuple[type, ...]meta_object_typesc                 C  s   dd l }|jj| |dS )Nr   )preserve_index)pyarrowZSchemafrom_pandas)objrb   par:   r:   r;   get_pyarrow_schema_pandas   s    rg   c                 K  s   dd l }|jj| fi |S Nr   )rc   ZTablerd   )re   r9   rf   r:   r:   r;   get_pyarrow_table_from_pandas   s    ri   c                   s>   dd l ddd fdd}|d|}|jf d|i|S )Nr   zpa.DataTypeobject)pyarrow_dtypereturnc                   s4   |    hv r0td jjv r0tdS d S )Nrc   )Zlarge_stringstringpdZStringDtypedtypesvalues)rk   metarf   r:   r;   default_types_mapper   s    
z?get_pandas_dataframe_from_pyarrow.<locals>.default_types_mappertypes_mapper)rc   popZ	to_pandas)rr   tabler9   rs   rt   r:   rq   r;   !get_pandas_dataframe_from_pyarrow   s    
rw   c                 C  s   ddl m} |S )Nr   )PandasBlocks)Zpartdrx   )rV   rx   r:   r:   r;   partd_pandas_blocks   s    ry   c                 C  s   t | S rS   )r)   r`   r:   r:   r;   make_meta_pandas_datetime_tz   s    rz   c                   s@  t | r| jr| dd S  dur*t  t| trTtj fdd|  D  dS t| trt	| dkrt
| d | d  dS t| trt| tstdd	 | D std
|  tj fdd| D dd | D  dS t| ds| durzt| }t|W S  ty   Y n0 t| r.t| S td|  dS )a  Create an empty pandas object containing the desired metadata.

    Parameters
    ----------
    x : dict, tuple, list, pd.Series, pd.DataFrame, pd.Index, dtype, scalar
        To create a DataFrame, provide a `dict` mapping of `{name: dtype}`, or
        an iterable of `(name, dtype)` tuples. To create a `Series`, provide a
        tuple of `(name, dtype)`. If a pandas object, names, dtypes, and index
        should match the desired output. If a dtype or scalar, a scalar of the
        same dtype is returned.
    index :  pd.Index, optional
        Any pandas index to use in the metadata. If none provided, a
        `RangeIndex` will be used.

    Examples
    --------

    >>> make_meta_object([('a', 'i8'), ('b', 'O')])
    Empty DataFrame
    Columns: [a, b]
    Index: []
    >>> make_meta_object(('a', 'f8'))
    Series([], Name: a, dtype: float64)
    >>> make_meta_object('i8')
    1
    Nr   c                   s    i | ]\}}|t || d qS r]   r(   .0cdr|   r:   r;   
<dictcomp>(      z$make_meta_object.<locals>.<dictcomp>r|         c                 s  s$   | ]}t |tot|d kV  qdS )r   N)
isinstancetuplelenr   ir:   r:   r;   	<genexpr>-  r   z#make_meta_object.<locals>.<genexpr>z2Expected iterable of tuples of (name, dtype), got c                   s    i | ]\}}|t || d qS r{   r}   r~   r|   r:   r;   r   0  r   c                 S  s   g | ]\}}|qS r:   r:   r~   r:   r:   r;   
<listcomp>1  r   z$make_meta_object.<locals>.<listcomp>)columnsr]   rU   z'Don't know how to create metadata from )r/   shaper   r   r3   rn   r   itemsr   r   r(   r   rE   all
ValueErrorhasattrnprU   r*   	Exceptionr   r)   	TypeError)rX   r]   rU   r:   r|   r;   make_meta_object  s6    



r   c                 C  s*   t | rt| S tdtt|  dS )zCreate a nonempty pandas object from the given metadata.

    Returns a pandas DataFrame, Series, or Index that contains two rows
    of fake data.
    z>Expected Pandas-like Index, Series, DataFrame, or scalar, got N)r   r)   r   r1   typerW   r:   r:   r;   meta_nonempty_objectE  s    
r   c                 C  s   t | j}t }t }tt| jD ]N}| jd d |f }|j}||vrft| jd d |f |d||< || ||< q$t	j
||tt| jd}| j|_| j|_|S )N)idx)r]   r   )r   r]   r3   ranger   r   r[   rU   _nonempty_seriesrn   r   r   Zarangeattrs)rX   r   Z	dt_s_dictr5   r   Zseriesdtresr:   r:   r;   meta_nonempty_dataframeU  s    
r   c                 C  s  t | }|tju r&tjd| j| jdS t| rD|ddg| j| jdS |tju rd}ztj|d| j| j	| jdW S  t
y   | jd u r|dgnd }tj||d| j| j	| jd Y S 0 n|tju rtjdd| j| jdS |tju rbtdd}ztj|d| j| jdW S  t
y\   tdd}| jd u r<||d gnd }tj||d| j| jd Y S 0 n8|tju rt| jd	krtjt| j| jd
}ntjjdd	g| j| jd}tj|| jdS |tju r*dd | jD }dd | jD }ztj||| jdW S  ty&   tj||| jd Y S 0 np|tju rt | jtjv rbtjt| j| j| jdS | jtkrtjddg| jdS tjddg| j| jdS tdt t |  d S )Nr   )rR   rU   r   
1970-01-01)startperiodsfreqtzrR   z
1970-01-02)r   r   r   rR   Dr   )ordered
categoriesr   rR   c                 S  s   g | ]}t |qS r:   )_nonempty_index)r   lr:   r:   r;   r     r   z#_nonempty_index.<locals>.<listcomp>c                 S  s   g | ]}d d gqS )r   r:   r   r:   r:   r;   r     r   )levelscodesnames)r   labelsr   )rU   rR   TFabz'Don't know how to handle index of type )!r   rn   Z
RangeIndexrR   rU   r   ZDatetimeIndexZ
date_ranger   r   r   ZPeriodIndexZperiod_rangeZTimedeltaIndexr   Ztimedelta64Ztimedelta_rangeCategoricalIndexr   r   Categoricalr   r   
from_codes
MultiIndexr   r   r   r   r&   _lookupboolr1   )r   typr   r5   r   r   r:   r:   r;   r   f  sn    


r   c                 C  s  |d u rt | j}| j}t| dkr8| jd gd }nt|tjr`tjd|j	d}||g}nXt|tj
rt| jjr| jjd gd }| jj}nt | jj}| jjd d }tj||| jjd}nt|rtjdd g|d}nt|rtjdd g|d}nt|tjr,|j}td	|td
|g}nt|tjrXt|j}tj||g|d}n`t|tjrt|j}tj||g|d}n4t|tjv rt|}nt|}tj||g|d}tj|| j|d}| j|_|S )Nr   r   r   )r   r   r   rT   g      ?2000Z2001)rR   r]   ) r   r]   rU   r   r[   r   rn   DatetimeTZDtype	Timestampr   CategoricalDtypecatr   r   r   r,   arrayr+   ZPeriodDtyper   PeriodZSparseDtyper*   subtypeZIntervalDtyper   r&   r   r   r   rR   r   )sr   rU   r5   entryZcatsr   r^   r:   r:   r;   r     sF    




r   c                 C  s
   t | jS rS   )r   _metarW   r:   r:   r;   _meta_lib_from_array_da  s    r   c                 C  s   t S rS   )rn   rW   r:   r:   r;   _meta_lib_from_array_numpy  s    r   Fc                 C  s   t jjj| ||dS )N)sort_categoriesignore_order)rn   apitypesr   )Zto_unionr   r   r:   r:   r;   union_categoricals_pandas  s    r   c                 C  s   t S rS   )r   rV   r:   r:   r;   get_parallel_type_series  s    r   c                 C  s   t S rS   )r   r   r:   r:   r;   get_parallel_type_dataframe  s    r   c                 C  s   t S rS   )r   r   r:   r:   r;   get_parallel_type_index  s    r   c                 C  s
   t | jS rS   )r   r   )or:   r:   r;   get_parallel_type_frame  s    r   c                 C  s   t S rS   )r   r   r:   r:   r;   get_parallel_type_object  s    r   Tutf8c                 C  s   t jj| ||||dS )N)r]   encodinghash_key
categorize)rn   utilZhash_pandas_object)re   r]   r   r   r   r:   r:   r;   hash_object_pandas  s    
r   c                      s"   e Zd Zdd fddZ  ZS )ShuffleGroupResultr4   )rl   c                   s8   t   }|  D ] \}}|t|7 }|t|7 }q|S )ag  
        The result of the shuffle split are typically small dictionaries
        (#keys << 100; typically <= 32) The splits are often non-uniformly
        distributed. Some of the splits may even be empty. Sampling the
        dictionary for size estimation can cause severe errors.

        See also https://github.com/dask/distributed/issues/4962
        )super
__sizeof__r   r.   )selfZ
total_sizekdf	__class__r:   r;   r     s
    	
zShuffleGroupResult.__sizeof__)rI   rJ   rK   r   __classcell__r:   r:   r   r;   r     s   r   c                   s|   t |r|j}tjj|jtjdd|\}}| 	| |
 } fddt|d d |dd  D }ttt||S )NFr_   c                   s8   g | ]0\}}r& j || jd dn j || qS T)Zdrop)r[   reset_index)r   r   r   Zdf2ignore_indexr:   r;   r   *  s   z&group_split_pandas.<locals>.<listcomp>r   r   )r0   rp   rn   Z_libsZalgosZgroupsort_indexerastyper   ZintpZtakeZcumsumzipr   r   )r   r   r   r   Zindexer	locationspartsr:   r   r;   group_split_pandas!  s    
r   outerc                   s  | dd}|dkr,tj f||d|S t d tjrxt d tjrtdt D ]&}t | tjs\ | d |< q\tjt	 |d d j
dS t d tjrb d  dd   }	tfd	d
|	D r
 fddtjD }
tjj|
jdS jftdd
 |	D  }t|}ztjj|jdW S  ty`   t| Y S 0  d  dd  S  d j}t|tjpt|tjotdd
 |jD }|rdd  D }tdd  D }n }d }|rt|d tjrntdd
 |D r|str0|}|d jdk}nzdd |D }t R tdt  |rftdt! tjdd |D fd|i| }W d    n1 s0    Y  t|tj"r| r||  jtjfdd|D fd|i|}|j}|j#D ]}|D ] }|$|}|d ur q.qg }|D ]X}||j%v rV|||  n6tj&t|ddd}tj'(||j)j*|j)j+}|| q6t	||d||< t|s||_q|j,|jd}nXt > tdt  |rtdt! tj||dd}W d    n1 s0    Y  nt|d j-tj.rl|d u rNtdd |D }tj"t	||d||d j
dS t 8 |rtdt! tj|fd|i|}W d    n1 s0    Y  |d ur||_|S )Nr   Fr   )axisjoinr   category)r   r   c                 3  s&   | ]}t |tjo|j jkV  qd S rS   )r   rn   r   nlevels)r   r   )firstr:   r;   r   K  s   z concat_pandas.<locals>.<genexpr>c                   s"   g | ] t  fd dD qS )c                   s   g | ]}|  qS r:   )Z_get_level_valuesr   nr:   r;   r   P  r   z,concat_pandas.<locals>.<listcomp>.<listcomp>)r   )r   )dfsr   r;   r   O  s   z!concat_pandas.<locals>.<listcomp>)r   c                 s  s   | ]}|j V  qd S rS   )Z_values)r   r   r:   r:   r;   r   U  r   c                 s  s   | ]}t |tjV  qd S rS   )r   rn   r   r   r:   r:   r;   r   b  r   c                 S  s   g | ]}|j d dqS r   )r   r   r   r:   r:   r;   r   f  r   c                 S  s   g | ]
}|j qS r:   r|   r   r:   r:   r;   r   g  r   c                 s  s   | ]}t |tjV  qd S rS   )r   rn   r   r   r:   r:   r;   r   p  r   c                 S  s2   g | ]*}t |tjr|n| j|jd idqS )r   r   )r   rn   r   to_framerenamerR   r   r:   r:   r;   r   y  s   
ignorec                 S  s   g | ]}|j d k jqS )r   )ro   r   Tr   r:   r:   r;   r     r   r   c                   s   g | ]}||j   qS r:   )r   intersectionr   )not_catr:   r;   r     r   r   i8rT   r   )r   sortc                 S  s   g | ]
}|j qS r:   r|   r   r:   r:   r;   r     r   )r]   rR   )/ru   rn   r   r   r   r   r   r   r   r   rR   r   r   r   Zfrom_arraysr   rp   r   r   Zconcatenatefrom_tuplesr   appendr]   anyr   r   r   ro   warningscatch_warningssimplefilterRuntimeWarningFutureWarningr   
differencegetr   fullr   r   r   r   r   ZreindexrU   r   )r   r   r   uniformZfilter_warningr   r9   r   r   restZarraysZ	to_concatZ
new_tuplesZ
dfs0_indexZhas_categoricalindexZdfs2indZdfs3Zcat_maskr^   Ztemp_indcolr   sampler   r   r5   r:   )r   r   r   r;   concat_pandas1  s    





*





2


6
r  c                 C  s   t jjj| |dS )Nr   )rn   r   r   r   r   r:   r:   r;   categorical_dtype_pandas  s    r  c                 C  s   |   S rS   tolistre   r:   r:   r;   tolist_numpy_or_pandas  s    r  c                 C  s"   t | dr| j}n| }t|tjS )NrU   )r   rU   r   rn   r   )re   rU   r:   r:   r;   is_categorical_dtype_pandas  s    
r  c                 C  s
   t jjjS rS   )rn   coregroupbyZGrouperr  r:   r:   r;   get_grouper_pandas  s    r  linearc                 C  s   t | ||S rS   r   )r   qinterpolationr:   r:   r;   
percentile  s    r  c                 K  s   | S rS   r:   )r5   r9   r:   r:   r;   to_pandas_dispatch_from_pandas  s    r  c                   @  s.   e Zd ZdZedd ZeddddZdS )	PandasBackendEntrypointzPandas-Backend Entrypoint Class for Dask-DataFrame

    Note that all DataFrame-creation functions are defined
    and registered 'in-place' within the ``dask.dataframe``
    ``io`` module.
    c                 C  s   t S rS   )r"   )clsr:   r:   r;   to_backend_dispatch  s    z+PandasBackendEntrypoint.to_backend_dispatchr   )r5   c                 K  s2   t |jtjtjtjfr|S |j|  fi |S rS   )r   r   rn   r   r   r   Zmap_partitionsr  )r  r5   r9   r:   r:   r;   
to_backend  s    z"PandasBackendEntrypoint.to_backendN)rI   rJ   rK   rL   classmethodr  r   r:   r:   r:   r;   r    s
   
r  cudfc                  C  s   dd l } d S rh   Z	dask_cudfr#  r:   r:   r;   _register_cudf  s    r$  cupyc                    sX   z@dd l  dd l} t| j fdd}t| jdd }W n tyR   Y n0 d S )Nr   c                   s    S rS   r:   rW   r"  r:   r;   meta_lib_from_array_cupy  s    z8_register_cupy_to_cudf.<locals>.meta_lib_from_array_cupyc                 S  s   |   S rS   r  rW   r:   r:   r;   tolist_cupy!  s    z+_register_cupy_to_cudf.<locals>.tolist_cupy)r"  r%  r   registerndarrayr$   ImportError)r%  r'  r(  r:   r&  r;   _register_cupy_to_cudf  s    

r,  )N)N)N)N)N)N)FF)Tr   NT)F)r   r   FTF)NF)r  )|
__future__r   r  collections.abcr   numpyr   rN   rn   Zpandas.api.typesr   r   Zdask.array.corer   Zdask.array.dispatchr   Zdask.array.percentiler	   Zdask.backendsr
   r   Zdask.dataframe._compatr   r   Zdask.dataframe.corer   r   r   r   r   Zdask.dataframe.dispatchr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   Zdask.dataframe.extensionsr&   r'   Zdask.dataframe.utilsr(   r)   r*   r+   r,   Zdask.sizeofr-   r.   Z
dask.utilsr/   r0   r1   r2   rO   r)  rU   rV   r   Z	Timedeltar   ZIntervalr   ra   __annotations__Zscipy.sparsesparsespZspmatrixr+  rg   ri   rw   ry   r   rz   r   rj   r   r   r   r   r   r*  r   r   r   r   r   r   r   r   r   r3   r   r   r  r  r  r   
extensionsZExtensionDtyper  r  r  r  r  Zregister_backendZregister_lazyr$  r,  r:   r:   r:   r;   <module>   s   Ts












A




E
,










       



