a
    vDf\.                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZ d d	lmZmZ erenejZdd
ddddefddZdddZdddZdS )    )annotationsN)is_list_like	is_scalar)methods)PANDAS_GE_200)	DataFrameSeriesapply_concat_applymap_partitionshas_known_categories)
no_default)Mget_meta_library_Fc              
     s   t  tjtjfr4tj f|||||||d|S d}	d}
t  trjt sXt|	t st|
nt  tr|du r j	dk
 rt|	 j	dk
 rt|	 jjdgdj}nt fd	d
|D st|	t fdd
|D st|
tt j f|||||||d|S )a
  
    Convert categorical variable into dummy/indicator variables.

    Data must have category dtype to infer result's ``columns``.

    Parameters
    ----------
    data : Series, or DataFrame
        For Series, the dtype must be categorical.
        For DataFrame, at least one column must be categorical.
    prefix : string, list of strings, or dict of strings, default None
        String to append DataFrame column names.
        Pass a list with length equal to the number of columns
        when calling get_dummies on a DataFrame. Alternatively, `prefix`
        can be a dictionary mapping column names to prefixes.
    prefix_sep : string, default '_'
        If appending prefix, separator/delimiter to use. Or pass a
        list or dictionary as with `prefix.`
    dummy_na : bool, default False
        Add a column to indicate NaNs, if False NaNs are ignored.
    columns : list-like, default None
        Column names in the DataFrame to be encoded.
        If `columns` is None then all the columns with
        `category` dtype will be converted.
    sparse : bool, default False
        Whether the dummy columns should be sparse or not.  Returns
        SparseDataFrame if `data` is a Series or if all columns are included.
        Otherwise returns a DataFrame with some SparseBlocks.

        .. versionadded:: 0.18.2

    drop_first : bool, default False
        Whether to get k-1 dummies out of k categorical levels by removing the
        first level.

    dtype : dtype, default bool
        Data type for new columns. Only a single dtype is allowed.

        .. versionadded:: 0.18.2

    Returns
    -------
    dummies : DataFrame

    Examples
    --------
    Dask's version only works with Categorical data, as this is the only way to
    know the output shape without computing all the data.

    >>> import pandas as pd
    >>> import dask.dataframe as dd
    >>> s = dd.from_pandas(pd.Series(list('abca')), npartitions=2)
    >>> dd.get_dummies(s)
    Traceback (most recent call last):
        ...
    NotImplementedError: `get_dummies` with non-categorical dtypes is not supported...

    With categorical data:

    >>> s = dd.from_pandas(pd.Series(list('abca'), dtype='category'), npartitions=2)
    >>> dd.get_dummies(s)  # doctest: +NORMALIZE_WHITESPACE
    Dask DataFrame Structure:
                       a      b      c
    npartitions=2
    0              bool  bool  bool
    2                ...    ...    ...
    3                ...    ...    ...
    Dask Name: get_dummies, 2 graph layers
    >>> dd.get_dummies(s).compute()  # doctest: +ELLIPSIS
           a      b      c
    0   True  False  False
    1  False   True  False
    2  False  False   True
    3   True  False  False

    See Also
    --------
    pandas.get_dummies
    )prefix
prefix_sepdummy_nacolumnssparse
drop_firstdtypez`get_dummies` with non-categorical dtypes is not supported. Please use `df.categorize()` beforehand to convert to categorical dtype.z`get_dummies` with unknown categories is not supported. Please use `column.cat.as_known()` or `df.categorize()` beforehand to ensure known categoriesNobjectstringcategory)includec                 3  s   | ]}t  | V  qd S N)r   is_categorical_dtype.0cdata c/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/dataframe/reshape.py	<genexpr>       zget_dummies.<locals>.<genexpr>c                 3  s   | ]}t  | V  qd S r   r   r   r!   r#   r$   r%      r&   )
isinstancepdr   r   get_dummiesr   r   NotImplementedErrorr   dtypesany_metaZselect_dtypesr   allr
   r   )r"   r   r   r   r   r   r   r   kwargsZnot_cat_msgZunknown_cat_msgr#   r!   r$   r)      s`    Z	




r)   meanc                 C  sd  t |r|du rtdt |r(|du r0tdt| | sFtdt| | sZtdt|rttdd |D st |stdg d	}t |r||vrtd
ddd |D  tj	| | j
j|d}t |r|}ntjjt||fd|gd}|dv rtt |r0tj|| | jt| j| d}nBtj|t| j| d}|D ]"}	||	 | | j|	 ||	< qNntj|tjt| j| d}|||d}
|dv rt| gtjtj|d|
d}|dv rt| gtjtj|d|
d}|dkr|S |dkr|S |dkr|| S |dkr8t| gtjtj|d|
dS |dkr\t| gtjtj|d|
dS tdS ) a  
    Create a spreadsheet-style pivot table as a DataFrame. Target ``columns``
    must have category dtype to infer result's ``columns``.
    ``index``, ``columns``, and ``aggfunc`` must be all scalar.
    ``values`` can be scalar or list-like.

    Parameters
    ----------
    df : DataFrame
    index : scalar
        column to be index
    columns : scalar
        column to be columns
    values : scalar or list(scalar)
        column(s) to aggregate
    aggfunc : {'mean', 'sum', 'count', 'first', 'last'}, default 'mean'

    Returns
    -------
    table : DataFrame

    See Also
    --------
    pandas.DataFrame.pivot_table
    Nz.'index' must be the name of an existing columnz0'columns' must be the name of an existing columnz 'columns' must be category dtypezs'columns' must have known categories. Please use `df[columns].cat.as_known()` beforehand to ensure known categoriesc                 S  s   g | ]}t |qS r#   )r   )r   vr#   r#   r$   
<listcomp>   r&   zpivot_table.<locals>.<listcomp>z4'values' must refer to an existing column or columns)r0   sumcountfirstlastzaggfunc must be either z, c                 s  s   | ]}d | d V  qdS )'Nr#   )r   xr#   r#   r$   r%      r&   zpivot_table.<locals>.<genexpr>)name)names)r5   r6   )r   r   index)r   r;   )r;   r   values)r3   r0   Zpivot_table_sum)chunkZ	aggregatemetatokenZchunk_kwargs)r4   r0   Zpivot_table_countr3   r4   r0   r5   Zpivot_table_firstr6   Zpivot_table_last)r   
ValueErrorr   r   r   r   r.   joinr(   ZCategoricalIndexcat
categoriesZ
MultiIndexZfrom_productsortedr   r   Indexr-   Zastyper+   npfloat64r	   Z	pivot_sumZ	pivot_aggZpivot_countZpivot_firstZpivot_agg_firstZ
pivot_lastZpivot_agg_last)Zdfr;   r   r<   ZaggfuncZavailable_aggfuncsZcolumns_contentsZnew_columnsr>   Z	value_colr/   Zpv_sumZpv_countr#   r#   r$   pivot_table   s    

"
	
	




	rH   valuec                 C  sP   t jddi, | jtjt|||||ddW  d   S 1 sB0    Y  dS )a  
    Unpivots a DataFrame from wide format to long format, optionally leaving identifier variables set.

    This function is useful to massage a DataFrame into a format where one or more columns are identifier variables
    (``id_vars``), while all other columns, considered measured variables (``value_vars``), are "unpivoted" to the row
    axis, leaving just two non-identifier columns, 'variable' and 'value'.

    Parameters
    ----------
    frame : DataFrame
    id_vars : tuple, list, or ndarray, optional
        Column(s) to use as identifier variables.
    value_vars : tuple, list, or ndarray, optional
        Column(s) to unpivot. If not specified, uses all columns that
        are not set as `id_vars`.
    var_name : scalar
        Name to use for the 'variable' column. If None it uses
        ``frame.columns.name`` or 'variable'.
    value_name : scalar, default 'value'
        Name to use for the 'value' column.
    col_level : int or string, optional
        If columns are a MultiIndex then use this level to melt.

    Returns
    -------
    DataFrame
        Unpivoted DataFrame.

    See Also
    --------
    pandas.DataFrame.melt
    zdataframe.convert-stringFmelt)r>   id_vars
value_varsvar_name
value_name	col_levelr?   N)daskconfigsetr
   r   rJ   r   )framerK   rL   rM   rN   rO   r#   r#   r$   rJ   =  s    )rJ   )NNNr0   )NNNrI   N) 
__future__r   numpyrF   Zpandasr(   Zpandas.api.typesr   r   rP   Zdask.dataframer   Zdask.dataframe._compatr   Zdask.dataframe.corer   r   r	   r
   Zdask.dataframe.utilsr   Zdask.typingr   Z
dask.utilsr   r   boolZuint8Z_get_dummies_dtype_defaultr)   rH   rJ   r#   r#   r#   r$   <module>   s8   
 
      