a
    tDf8?                  
   @  s\  d Z ddlmZ ddlZddlmZ ddlZddlm	Z
 ddlmZ ddlmZ ddlmZ zddlZW n. ey Z zedeW Y dZ[n
dZ[0 0 dd	lmZ dd
lmZ eddZeddZeddZeddZeddZeddZeddZeddZdddddddZg dZ eej!dDddZ"eej!dEdd Z#eej!dFd!d"Z$dGd#d$Z%eej!dHd%d&Z&eej!dId'd(Z'eej!dJd)d*Z(eej!dKd+d,Z)eej!dLd-d.Z*eej!dMd/d0Z+eej!d1d2 Z,eej!dNd3d4Z-eej.ed5Z/eej0ed5Z1d6d7 Z2d8d9 Z3d:d; Z4d<d= Z5dOd>d?Z6dPd@dAZ7dQdBdCZ8dS )Rzu
Statistical functions and tests, following scipy.stats.

Some differences

- We don't handle missing values at all

    )annotationsN)
namedtuple)delayed)wrap_elemwise)derived_fromz4`dask.array.stats` requires `scipy` to be installed.)special)distributionsF_onewayResult)Z	statisticZpvalueKurtosistestResultNormaltestResultPower_divergenceResultSkewtestResultTtest_1sampResultTtest_indResultTtest_relResult   g      gUUUUUU?)pearsonzlog-likelihoodzfreeman-tukeyzmod-log-likelihoodZneymanzcressie-read)	ttest_indttest_1samp	ttest_rel	chisquarepower_divergenceskewskewtestkurtosiskurtosistest
normaltestf_onewaymomentTc                 C  s   t j| |dd}t j||dd}| j| }|j| }|rLt||||\}}	nt||||\}}	tt | |t |||	|}
ttdd|
 S )Nr   ddof   Znout)	davarshape_equal_var_ttest_denom_unequal_var_ttest_denom_ttest_ind_from_statsmeanr   r   )abaxisZ	equal_varv1v2n1n2dfdenomres r6   ]/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/dask/array/stats.pyr   Y   s    

r   	propagatec                 C  s   |dkrt d| j| }|d }t| || }tj| |dd}t|t| }tjddd t	||}	W d    n1 s0    Y  t
||	\}	}
ttdd|	|
S 	Nr8   >`nan_policy` other than 'propagate' have not been implemented.r   r!   ignoredivideinvalidr#   r$   )NotImplementedErrorr'   r%   r+   r&   sqrtfloatnperrstater=   _ttest_finishr   r   )r,   Zpopmeanr.   
nan_policynr3   dvr4   tprobr6   r6   r7   r   j   s    
*r   c                 C  s   |dkrt d| j| }t|d }| | tj}tj||dd}t||}t	|t| }	tj
ddd t||	}
W d    n1 s0    Y  t||
\}
}ttdd|
|S r9   )r?   r'   rA   ZastyperB   float64r%   r&   r+   r@   rC   r=   rD   r   r   )r,   r-   r.   rE   rF   r3   rG   rH   dmr4   rI   rJ   r6   r6   r7   r   }   s    
*r   c                 C  s   t | |||ddS )a3  Calculate a one-way chi-square test.

    Please see the docstring for :py:func:`scipy.stats.chisquare` for
    complete information including notes, references, and examples.

    Some inconsistencies with the Dask version may exist.

    The chi-square test tests the null hypothesis that the categorical
    data has the given frequencies.

    Parameters
    ----------
    f_obs : array_like
        Observed frequencies in each category.
    f_exp : array_like, optional
        Expected frequencies in each category.  By default the categories are
        assumed to be equally likely.
    ddof : int, optional
        "Delta degrees of freedom": adjustment to the degrees of freedom
        for the p-value.  The p-value is computed using a chi-squared
        distribution with ``k - 1 - ddof`` degrees of freedom, where `k`
        is the number of observed frequencies.  The default value of `ddof`
        is 0.
    axis : int or None, optional
        The axis of the broadcast result of `f_obs` and `f_exp` along which to
        apply the test.  If axis is None, all values in `f_obs` are treated
        as a single data set.  Default is 0.

    Returns
    -------
    res: Delayed Power_divergenceResult
        An object containing attributes:

        chisq : float or ndarray
            The chi-squared test statistic.  The value is a float if `axis` is
            None or `f_obs` and `f_exp` are 1-D.
        pvalue : float or ndarray
            The p-value of the test.  The value is a float if `ddof` and the
            return value `chisq` are scalars.

    r   )f_expr"   r.   lambda_)r   )f_obsrM   r"   r.   r6   r6   r7   r      s    *r   c           
      C  s*  t |trH|tvr>ttt dd }td|d| t| }n|d u rTd}|d ur^n| j|dd}|dkr| | d | }n`|dkrd	t| | |  }nD|dkrd	t|||   }n(| | | | d  }|d
| |d   }|j	|d}t
||d}ttjj||d | }	ttdd||	S )Nr   r   zinvalid string for lambda_: z. Valid strings are T)r.   Zkeepdimsr#   r          @      ?r.   r$   )
isinstancestr_power_div_lambda_namesreprlistkeys
ValueErrorr+   _xlogysum_countr   r   chi2sfr   )
rO   rM   r"   r.   rN   namesZtermsstatZnum_obspr6   r6   r7   r      s4    

r   c           	      C  st   |dkrt d| j| }t| d|}t| d|}|dk}t| ||d  d}|s^t d|jdkrp| S |S )	Nr8   r:   r#      r   g      ?g        bias=False is not implemented.)r?   r'   r    r%   wherendimmin)	r,   r.   biasrE   rF   m2Zm3zerovalsr6   r6   r7   r      s    

r   c                 C  sL  |dkrt dt| |}t| j| }|dk r@tdt| |t|d |d  d|d    }d	|d d
|  d  |d  |d  |d |d  |d  |d   }dtd|d   }dtdt|  }td|d  }	t	
|dkd|}|t	||	 t	||	 d d   }
ttdd|
dtjt	|
 S )Nr8   r:      zFskewtest is not valid with less than 8 samples; %i samples were given.r   rb         @r#         @   F   rP         	   r   rQ   r   r$   )r?   r   rA   r'   rY   intmathr@   logrB   rd   r   r   r   normr^   abs)r,   r.   rE   b2rF   yZbeta2ZW2deltaalphaZr6   r6   r7   r     s:    
&(r   c              
   C  s   |dkrt d| j| }t| d|}t| d|}|dk}tjdd}	z*t|d||d  }
W tjf i |	 ntjf i |	 0 |st d	|r|
d
 S |
jdkr|
 S |
S d S )Nr8   r:   r#      r   r;   )allrP   rc   rb   )	r?   r'   r    rB   Zseterrr%   rd   re   rf   )r,   r.   fisherrg   rE   rF   rh   Zm4ri   Zolderrrj   r6   r6   r7   r   &  s&    
$
r   c              	   C  s  |dkrt dt| j| }t| |dd}d|d  |d  }d| |d  |d	  |d |d
  |d	  |d   }|| t| }d|| d|  d  |d |d   td|d	  |d  ||d  |d	    }dd| d| tdd|d      }	ddd|	   }
d|td|	d    }t|dk d|}t|dk |
tdd|	  | d}|
| tdd|	   }t|dkd|}|jdkr|d }t	t
dd|dtjt| S )Nr8   r:   F)r   rm   r   g      8@r#   rb         ?rp   rl   rq   rr   g       @rP   g      @g      "@r   c   gUUUUUU?r6   r$   )r?   rA   r'   r   rB   r@   rd   powerre   r   r
   r   rv   r^   rw   )r,   r.   rE   rF   rx   EZvarb2xZ	sqrtbeta1AZterm1r4   Zterm2r|   r6   r6   r7   r   D  s8    6,*&r   c                 C  s\   |dkrt dt| |\}}t| |\}}|| ||  }ttdd|ttjj|dS )Nr8   r:   r#   r$   )r?   r   r   r   r   r   r]   r^   )r,   r.   rE   s_kZk2r6   r6   r7   r   i  s    r   c                  G  s   t | }t| }t |}| }||8 }t|t|t|  }d}| D ] }|t|| tt | 7 }qJ|t|t| 8 }|| }|d }	|| }
|t|	 }|t|
 }|| }t|	|
|}tt	dd||S )Nr   r   r#   r$   )
lenr%   Zconcatenater+   _sum_of_squares_square_of_sumsrA   _fdtrcr   r	   )argsZ
num_groupsZalldataZbignoffsetZsstotZssbnr,   ZsswnZdfbnZdfwnZmsbZmswfrJ   r6   r6   r7   r   v  s$    
r   c                 C  s    |dkrt dtj| ||dS )Nr8   r:   rR   )r?   r%   r    )r,   r    r.   rE   r6   r6   r7   r      s
    r    )sourcec                 C  sJ   || d }|d |  |d |  | }t |d| d|   }||fS )NrP   r   r   )r%   r@   )r/   r1   r0   r2   r3   Zsvarr4   r6   r6   r7   r(     s    r(   c                 C  s   | | }|| }t jddd< || d |d |d  |d |d    }W d    n1 s`0    Y  tt|d|}t|| }||fS )Nr;   r<   r#   r   )rB   rC   r%   rd   isnanr@   )r/   r1   r0   r2   Zvn1Zvn2r3   r4   r6   r6   r7   r)     s    Jr)   c                 C  sX   | | }t jddd t||}W d    n1 s80    Y  t||\}}||fS )Nr;   r<   )rB   rC   r%   r=   rD   )Zmean1Zmean2r4   r3   rG   rI   rJ   r6   r6   r7   r*     s
    *r*   c                 C  s6   t tjjt|| d }|jdkr.|d }||fS )z+Common code between all 3 t-test functions.r#   r   r6   )r   r   rI   r^   r%   absolutere   )r3   rI   rJ   r6   r6   r7   rD     s
    
rD   c                 C  s   |d u r| j S | j| S d S )N)sizer'   )r   r.   r6   r6   r7   r\     s    r\   c                 C  s   t | |  |S )a  
    Squares each element of the input array, and returns the sum(s) of that.
    Parameters
    ----------
    a : array_like
        Input array.
    axis : int or None, optional
        Axis along which to calculate. Default is 0. If None, compute over
        the whole array `a`.
    Returns
    -------
    sum_of_squares : ndarray
        The sum along the given axis for (a**2).
    See also
    --------
    _square_of_sums : The square(s) of the sum(s) (the opposite of
    `_sum_of_squares`).
    r%   r[   )r,   r.   r6   r6   r7   r     s    r   c                 C  s   t | |}|| S )a  
    Sums elements of the input array, and returns the square(s) of that sum.
    Parameters
    ----------
    a : array_like
        Input array.
    axis : int or None, optional
        Axis along which to calculate. Default is 0. If None, compute over
        the whole array `a`.
    Returns
    -------
    square_of_sums : float or ndarray
        The square of the sum over `axis`.
    See also
    --------
    _sum_of_squares : The sum of squares (the opposite of `square_of_sums`).
    r   )r,   r.   r   r6   r6   r7   r     s    r   )r   T)r   r8   )r   r8   )Nr   r   )Nr   r   N)r   Tr8   )r   r8   )r   TTr8   )r   r8   )r   r8   )r   r   r8   )N)r   )r   )9__doc__
__future__r   rt   collectionsr   numpyrB   Z
dask.arrayarrayr%   Zdaskr   Zdask.array.ufuncr   Z
dask.utilsr   Zscipy.statsZscipyImportErrorer   r   r	   r
   r   r   r   r   r   r   rU   __all__statsr   r   r   r   r   r   r   r   r   r   r   r    ZxlogyrZ   Zfdtrcr   r(   r)   r*   rD   r\   r   r   r6   r6   r6   r7   <module>   s|    







	
-,$
"	

