a
    öDf*                     @  s  d dl mZ ddlmZ ddlmZmZ edd Zeedd	d Z	eed
d^dd
Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zedd Zeejddd d!d_d$d%Zeejd&d'd(d`d)d*Zed+d, Zed-d. Zed/d0 Zeejddd d!dad1d2Zeejd3d'd(dbd4d5Zed6d7 Zeed8dcd9d8Zed:d; Zejed<ddd=d>Z ee!d?ded@d?Z"edAdB Z#ee!dCdfdDdCZ$edEdEdEdFdGdHZ%edgdEdEdEdEdIdJdKZ&edEdEdLdMdNZ'edEdEdEdOdPdQZ(dEdRdSdTZ)dEdRdUdVZ*dWdX Z+dYdZ Z,edhdEd[d\d]Z-d"S )i    )annotations   )jit   )coremathc                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :param div: Block
    r    )xdivr   r   e/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/triton/language/standard.pycdiv   s    
r   sigmoidc                 C  s   ddt |    S Nr   )r   expr	   r   r   r   r      s    softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumZfdiv)r	   Zieee_roundingznumZdenr   r   r   r      s    

c                 C  s   t | | jgS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )r   viewnumelr   r   r   r   ravel'   s    r   c                 C  sR   | | | }|| }|| }|| }t || |}|||  }	|| | }
|	|
fS )a  
    Transforms indices of a row-major size_i*size_j matrix into those
    of one where indices are row major for each group of size_j rows.
    For example, for size_i = size_j = 4 and size_g = 2, it will transform
    [[0 , 1 , 2 , 3 ],
     [4 , 5 , 6 , 7 ],
     [8 , 9 , 10, 11],
     [12, 13, 14, 15]]
    into
    [[0, 2,  4 , 6 ],
     [1, 3,  5 , 7 ],
     [8, 10, 12, 14],
     [9, 11, 13, 15]]
    )minimum)ijZsize_iZsize_jZsize_gZijZsize_gjZgroup_idZoff_iZnew_iZnew_jr   r   r   	swizzle2d2   s    r   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zerosS   s    
r    c                 C  s   t | j| jS N)r    r   r   )inputr   r   r   
zeros_like`   s    r#   c                 C  s   t | |S )z
    Computes the element-wise minimum of :code:`x` and :code:`y`.

    :param input: the first input tensor
    :type input: Block
    :param other: the second input tensor
    :type other: Block
    )r   minr	   yr   r   r   r   e   s    
r   c                 C  s   t | |S )z
    Computes the element-wise maximum of :code:`x` and :code:`y`.

    :param input: the first input tensor
    :type input: Block
    :param other: the second input tensor
    :type other: Block
    )r   r   r%   r   r   r   maximumr   s    
r'   c           	      C  sJ   |r| |ko||k }nd}| |kp$|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtZv_retZi_retr   r   r   _argmax_combine   s    r2   c                 C  s   t | |||dS NTr2   r+   r,   r-   r.   r   r   r   _argmax_combine_tie_break_left   s    r6   c                 C  s   t | |||dS r(   r4   r5   r   r   r   _argmax_combine_tie_break_fast   s    r7   return_indicesreturn_indices_tie_break_left)Zreturn_indices_argtie_break_argNTc                 C  s   t | } |r0|r t | |tS t | |tS n^t | jjt dk rt | j rf| 	t j
} n| j stJ | 	t j} t | |tS d S N    )r   _promote_reduction_input_reduce_with_indicesr6   r7   	constexprr   primitive_bitwidthis_floatingtofloat32is_integer_typeint32reducer'   r"   axisr8   r9   r   r   r   r      s    
r   zmaximum indexr/   )r:   c                 C  s   t | |d|d\}}|S NT)r8   r9   )r   r"   rH   r/   _retr   r   r   argmax   s    rM   c           	      C  sJ   |r| |ko||k }nd}| |k p$|}t || |}t |||}||fS r(   r)   )	r+   r,   r-   r.   r/   r0   ltZ	value_retZ	index_retr   r   r   _argmin_combine   s    rO   c                 C  s   t | |||dS r3   rO   r5   r   r   r   _argmin_combine_tie_break_left   s    rQ   c                 C  s   t | |||dS r(   rP   r5   r   r   r   _argmin_combine_tie_break_fast   s    rR   c                 C  s   t | } |r0|r t | |tS t | |tS nXt | jjdk rzt | j r`| 	t j
} n| j snJ | 	t j} t | |tS d S r;   )r   r=   r>   rQ   rR   r?   r   r@   rA   rB   rC   rD   rE   rF   r   rG   r   r   r   r$      s    
r$   zminimum indexc                 C  s   t | |d|d\}}|S rI   )r$   rJ   r   r   r   argmin   s    rS   c                 C  s   | | S r!   r   abr   r   r   _sum_combine   s    rW   r   c                 C  s   t | } t | |tS r!   )r   r=   rF   rW   r"   rH   r   r   r   r      s    
c                 C  s   | |A S r!   r   rT   r   r   r   _xor_combine   s    rY   zxor sumc                 C  s:   | j j}| stdtj| |d} tj| |t||dS )Nz#xor_sum only supported for integers)_builder)rZ   
_generator)typeZscalarZis_int
ValueErrorr   r=   rF   rY   )r"   rH   rZ   r[   Z	scalar_tyr   r   r   xor_sum   s
    r^   cumsumc                 C  s   t | } t | |tS r!   )r   r=   associative_scanrW   rX   r   r   r   r_     s    
c                 C  s   | | S r!   r   rT   r   r   r   _prod_combine  s    ra   cumprodc                 C  s   t | } t | |tS r!   )r   r=   r`   ra   rX   r   r   r   rb     s    
zcore.constexpr)n_dimsidxposc                 C  sr   t || k  t |dkp |dk t dd}|dkr@d| }t d| D ] }|| d | krLt ||}qL|S )Nr   r   r   )r   static_assertZarangestatic_rangeexpand_dims)rc   rd   re   r&   nr   r   r   
_indicator'  s    rj   )rc   rd   re   keep_dimc                 C  s:   t | t||| |d | }|r6t||d | }|S r   )r   rj   r   rh   )r	   rc   rd   re   rk   r&   r   r   r   _take_slice5  s    rl   )rc   rd   c                 C  s   t | ||d}t | ||d}| }|}|}| j rt| jjdkrLtj}	n<t| jjdkrftj}	n"t| jjdkrtj}	nt	d| j
|	dd}|j
|	dd}|j
|	dd}|
|j}t|}
|t||k|A ||A |
A }|j
| jdd}|S )	Nr   r      r<   @   zUnsupported dtypeT)Zbitcast)rl   r   rA   r   r?   r@   int16rE   int64r]   rB   r#   r*   )r	   	desc_maskrc   rd   lrZx_intZl_intZr_intZ	dtype_intZzeror&   r   r   r   _compare_and_swap>  s*    
rt   )rc   active_dims
order_typec                 C  sR   t ||k |dkr$t||d}n|}t |D ]}t| |||d | } q2| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   )r   rf   rj   rg   rt   )r	   rc   ru   rv   rq   r   r   r   r   _bitonic_mergeY  s    rw   )r   c                 C  s.   d}| j }|dkr$|dL }|d7 }q
t|S )Nr   r   valuer   r?   )r   log2ri   r   r   r   _log2m  s    
r{   c                 C  s$   | j }t||d @ dko |dkS )Nr   r   rx   )r   ri   r   r   r   _is_power_of_twov  s    r|   c                 C  s   t | tjr| jS | S r!   )
isinstancer   r?   ry   )or   r   r   _unwrap_if_constexpr{  s    r   c                 C  sF   t | } t |}| d u r$t|d } | t|d ks<J dt| S )Nr   z4Currently only support sorting on the last dimension)r   lenr   r?   )dimr   r   r   r   _get_sort_dim  s    r   )
descendingc                 C  s   t t| jt|| j  t t| j t | dgt| j }t dt| jt|| j d D ]6}t	|t| j||t| jt|| j kr|nd}qft || j} | S )Nr   r   )
r   rf   r|   r   r   r   Zreshaper{   rg   rw   )r	   r   r   r&   r   r   r   r   sort  s    &r   )F)NFT)T)NFT)T)N)NNN)r   )r   )T)Nr   ).
__future__r   Zruntime.jitr    r   r   r   Z_add_math_1arg_docstrr   r   r   r   r    r#   r   r'   r2   r6   r7   Z_add_reduction_docstrr   rM   rO   rQ   rR   r$   rS   rW   r   rY   builtinr^   Z_add_scan_docstrr_   ra   rb   rj   rl   rt   rw   r{   r|   r   r   r   r   r   r   r   <module>   s   



 











	
			