a
    öDfU                     @  s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ d	d
lmZ ddlmZ edZ G dd dej!Z"ddddZ#G dd dZ$G dd dZ%G dd dee  Z&G dd de&e  Z'edddddZ(edddddd d!d!d"d#d$dZ(d/dddddd%d d!d!d&d'd(dZ(G d)d* d*Z)G d+d, d,Z*d-d. Z+dS )0    )annotationsdivisionN)defaultdict
namedtuple)cached_property)	CallableGenericIterableListOptionalTypeVarUnioncastoverload   )TMAInfos)get_backendget_cuda_version_key   )InterpretedFunction)driverTc                      s>   e Zd ZdZdd fddZdd Zdd	 Zd
d Z  ZS )DependenciesFinderz
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.
    Nonereturnc                   s*   t    t|d | _|| _d S )Nutf-8)super__init__hashlibsha1encode	hexdigestretglobals)selfr$   src	__class__ _/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/triton/runtime/jit.pyr       s    
zDependenciesFinder.__init__c                 C  s   | j |jd S N)r$   getid)r%   noder)   r)   r*   
visit_Name%   s    zDependenciesFinder.visit_Namec                 C  s`   |  |j}t|tjr&|  |j}q|d u sPt|dddksPt|dddrTd S t||jS )N__name__ Ztritonz.triton)visitvalue
isinstanceast	Attributegetattrendswithattr)r%   r.   lhsr)   r)   r*   visit_Attribute(   s    z"DependenciesFinder.visit_Attributec                 C  s   |  |j}|d u rd S t|r&d S |jrF|jdsBd|jv rFd S t|tsbJ d|j d|j	}t
t|dd}| j| | d| _t| j | _d S )Nztriton.z.triton.z
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisnoinlineFr   )r2   funcinspect	isbuiltin
__module__
startswithr4   JITFunctionr0   	cache_keystrr7   r#   r!   r   r    r"   )r%   r.   r=   Zfunc_cache_keyr<   r)   r)   r*   
visit_Call1   s     
zDependenciesFinder.visit_Call)	r0   r@   __qualname____doc__r   r/   r;   rE   __classcell__r)   r)   r'   r*   r      s
   	r   rD   r   c                 C  s&   t | tr| jS t | tr| S t| S r+   )r4   typer0   rD   repr)tyr)   r)   r*   _normalize_tyG   s
    

rL   c                   @  s^   e Zd ZdZddddddZedd	 Zed
d Zedd Ze	dd Z
e	dd ZdS )KernelParamzRepresents a parameter to a @jit'ed function.

    A parameter is just the name plus metadata; a parameter plus a value is a
    KernelArg.
    intzinspect.Parameterbool)numparamdo_not_specializec                 C  s   || _ || _|| _d S r+   )rP   _paramrR   )r%   rP   rQ   rR   r)   r)   r*   r   V   s    zKernelParam.__init__c                 C  s   | j jS r+   )rS   namer%   r)   r)   r*   rT   [   s    zKernelParam.namec                 C  s(   | j jr| j jtjjkrdS t| j jS Nr1   )rS   
annotationr>   	ParameteremptyrL   rU   r)   r)   r*   rW   _   s    zKernelParam.annotationc                 C  s
   d| j v S )NZ	constexpr)rW   rU   r)   r)   r*   is_constexpre   s    zKernelParam.is_constexprc                 C  s   | j jS r+   )rS   defaultrU   r)   r)   r*   r[   i   s    zKernelParam.defaultc                 C  s   | j jtjjkS r+   )rS   r[   r>   rX   rY   rU   r)   r)   r*   has_defaultm   s    zKernelParam.has_defaultN)r0   r@   rF   rG   r   r   rT   rW   rZ   propertyr[   r\   r)   r)   r)   r*   rM   O   s   



rM   c                   @  s4   e Zd ZdZdd Zedd Zdd Zdd	 Zd
S )	KernelArgz`Represents an argument to a @jit'ed function.

    An argument is a parameter plus a value.
    c                 C  s   || _ || _d S r+   )r3   rQ   )r%   r3   rQ   r)   r)   r*   r   x   s    zKernelArg.__init__c                 C  s   | j jS r+   )rQ   rT   rU   r)   r)   r*   rT   |   s    zKernelArg.namec                 C  s@   | j j}d|v r| jjS |dkr$dS |dkr0dS t| jS d S )NZTensorrO   i1floatfp32)rQ   rW   r3   dtyperB   _key_of)r%   rW   r)   r)   r*   signature_key   s    zKernelArg.signature_keyc                 C  sp   | j jrJ z| j tj dkfW S  ty6   Y n0 t| jtrl| jtj dk| jtj	 dk| jdkfS dS )Nr   r   )F)
rQ   rR   r3   data_ptrrB   divisibilityAttributeErrorr4   rN   divisibility_8rU   r)   r)   r*   specialization_key   s    zKernelArg.specialization_keyN)	r0   r@   rF   rG   r   r]   rT   rd   ri   r)   r)   r)   r*   r^   r   s   
r^   c                   @  s$   e Zd ZU ded< ddddZdS )KernelInterfacer   runr   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)rk   )argskwargsrm   r%   r)   r*   <lambda>       z-KernelInterface.__getitem__.<locals>.<lambda>r)   )r%   rm   r)   rq   r*   __getitem__   s    zKernelInterface.__getitem__N)r0   r@   rF   __annotations__rt   r)   r)   r)   r*   rj      s   
rj   c                      s   e Zd ZdZdZdZedd Zedd Zedd	 Z	ed
d Z
dd Zedd Zdd Zdd Zdd Zd$ddZedd Zdd Zdd Zdd Z fd d!Zd"d# Z  ZS )%rB   N      c                 C  s   t | dr| jS t| trdS t| trVd| kr<| dkr<dS d| krP| dkrPdS d	S n2t| trdd
S | d u rpd S tdt|  d|  d S )Nrb   r_   i   ii32l            l    u64i64ra   zUnsupported type z for )hasattrrb   r4   rO   rN   r`   	TypeErrorrI   argr)   r)   r*   rc      s    



zJITFunction._key_ofc                 C  s$   z
| j jW S  ty   Y dS 0 d S rV   )devicerI   rg   r}   r)   r)   r*   
_device_of   s    
zJITFunction._device_ofc              	   C  s(   z
|   W S  ttfy"   Y dS 0 d S )NF)	is_pinnedrg   r|   r}   r)   r)   r*   _pinned_memory_of   s    
zJITFunction._pinned_memory_ofc                 C  sD   t | dr|  tj dkS t| tr:| d dk| dkfS | d u fS )Nre   r   rv   r   r{   re   rB   rf   r4   rN   r}   r)   r)   r*   _spec_of   s
    

zJITFunction._spec_ofc                   s   ddl m} dd  dd  fddt| j|D }fd	dt| j|D }d
d t| j|D }dd t| j|D }||B }|t|t|t|t|S )Nr   )AttrsDescriptorc                 S  sD   t | dr|  tj dkS t| tr4| tj dkS | d u r@dS dS )Nre   r   TFr   xr)   r)   r*   is_divisible_by_16   s    

z3JITFunction._get_config.<locals>.is_divisible_by_16c                 S  s(   t | tr| tj dkS | d u r$dS dS )Nr   TF)r4   rN   rB   rh   r   r)   r)   r*   is_divisible_by_8   s
    
z2JITFunction._get_config.<locals>.is_divisible_by_8c                   s$   h | ]\}} |r|j s|jqS r)   rR   rP   .0rQ   r~   )r   r)   r*   	<setcomp>   s   z*JITFunction._get_config.<locals>.<setcomp>c                   s$   h | ]\}} |r|j s|jqS r)   r   r   )r   r)   r*   r      s   c                 S  s8   h | ]0\}}t |trt |ts|d kr|js|jqS )r   )r4   rN   rO   rR   rP   r   r)   r)   r*   r      s   "c                 S  s$   h | ]\}}|d u r|j s|jqS r+   r   r   r)   r)   r*   r     rs   )compilerr   zipparamstuple)r%   ro   r   Zdivisible_by_16Zdivisible_by_8
equal_to_1Z	none_argsZids_of_folded_argsr)   )r   r   r*   _get_config   s"    	




zJITFunction._get_configc                 C  s   | d u rdS t | dd }dddddddd	d
ddddddddddd}t| D ]}|||< qVt| t rr| S d||  S )Nz*i8.r_   Zfp8e4nvZfp8e5Zfp8e4b15Z
fp8e4b15x4Zfp16Zbf16ra   Zfp64i8Zi16rx   rz   u8u16u32ry   )rO   Z
float8e4nvZfloat8e5Zfloat8e4b15Zfloat8e4b15x4Zfloat8_e4m3fnZfloat8_e5m2float16Zbfloat16float32float64Zint8int16int32int64Zuint8Zuint16Zuint32Zuint64*)rD   splitlistvaluesr4   )keyZ	dtype_strZtysvr)   r)   r*   _type_of  s4    
zJITFunction._type_ofc                 C  s   t t| j|}|S r+   )dictr   
constexprs)r%   constexpr_key	constantsr)   r)   r*   _make_constants+  s    zJITFunction._make_constantsc                 C  s   t jd u rdS | jj}| jj}ddd t| j|d D }| d| d| d| d	| d
|	 d| d}t|}G dd d}t	||||||||	|
|d
}t j|||||d|i|dddS )NFz, c                 S  s    g | ]\}}|j  d | qS )z: rT   )r   rQ   rK   r)   r)   r*   
<listcomp>B  rs   z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_warp_specialization=z, enable_fp_fusion=]()c                   @  s   e Zd Zdd ZdS )z.JITFunction._call_hook.<locals>.LegacyCompilerc                 S  s   || _ || _d S r+   )modulerT   )r%   r   rT   r)   r)   r*   r   H  s    z7JITFunction._call_hook.<locals>.LegacyCompiler.__init__N)r0   r@   rF   r   r)   r)   r)   r*   LegacyCompilerF  s   r   )
	signaturer   r   	num_warpsnum_ctas
num_stagesenable_warp_specializationenable_fp_fusionextern_libsconfigsr   )r   rJ   fncompileZis_manual_warmupZalready_compiled)
rB   
cache_hookr   r0   r@   joinr   r   rD   r   )r%   r   r   r   r   r   r   r   r   r   r   r   rT   r   Z	arg_reprsrJ   r   rp   r)   r)   r*   
_call_hook/  s8    
 .
zJITFunction._call_hookc                  s  ddl m}m}m} ddlm} d|vs0J dd|vs@J dd|vsPJ d	t }	t|	}
t	 }||}j
|d
< ||fdd| D }jj|i |}|  t|jtjksJ |d usJ t|r|t|j}t|}|d }|dkr|d nd}|dkr(|d nd}dd t|j jD }tdd |D }tdd |D }tdd |D }t |||f}|j|	 vrbjdd |D  f  fdd|D }| D ]$\}}t|rtd| dq·fdd|D }|||	|jjjjj j! r6d S ||| d }|||j"dj|	 |< j|	 | }|sdd |D }|j#||||j|j|j$d |j$d |j$d |j%|
|j&|j'|j(|gt)|j*d |R   |S )Nr   )CompiledKernelr   	ASTSource)CUDABackendZdevice_typez=device_type option is deprecated; current target will be usedr   z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be useddebugc                   s    i | ]\}}| j vr||qS r)   )__dict__)r   kr   )optionsr)   r*   
<dictcomp>r  rs   z#JITFunction.run.<locals>.<dictcomp>r   r   c                 S  s   g | ]\\}}}t ||qS r)   )r^   )r   _	arg_valuerQ   r)   r)   r*   r     rs   z#JITFunction.run.<locals>.<listcomp>c                 s  s   | ]}|j js| V  qd S r+   )rQ   rZ   rd   r   r~   r)   r)   r*   	<genexpr>  rs   z"JITFunction.run.<locals>.<genexpr>c                 s  s   | ]}|j js| V  qd S r+   )rQ   rR   ri   r   r)   r)   r*   r     rs   c                 s  s   | ]}|j jr|jV  qd S r+   rQ   rZ   r3   r   r)   r)   r*   r     rs   c                 S  s   g | ]
}|j qS r)   )r3   r   r)   r)   r*   r     rs   c                   s<   i | ]4}|j js,|j j d  jv s,|jdu r|j j|jqS )r   N)rQ   rZ   rP   r   r3   r   )r   r)   r*   r     s   $zCallable constexpr at index z is not supportedc                   s,   i | ]$}|j js|j j  |jqS r)   )rQ   rZ   rP   r   rc   r3   r   rU   r)   r*   r     s   )targetr   c                 S  s   g | ]}|j js|jqS r)   r   r   r)   r)   r*   r     rs   tensormaps_info)+r   r   r   r   Zcompiler.backends.cudar   r   Zget_current_deviceZget_current_streamZget_current_targetr   Zparse_optionsitemsr   bindapply_defaultslen	argumentsr   callabler   r   r   r   cacher   r|   r   r   r   r   r   r   r   r   rk   Zcluster_dimsZsharedfunctionZlaunch_enter_hookZlaunch_exit_hookZassemble_tensormap_to_argmetadata)r%   rm   rn   ro   rp   r   r   r   r   r   r   r   backendZ
bound_argsZ	grid_sizeZgrid_0Zgrid_1Zgrid_2Zsig_keyZspec_keyr   r   r   ir~   r   r&   kernelr)   )r   r   r%   r*   rk   c  sx    





zJITFunction.runc           	      C  s>  |r|ng }|| _ |j| _|| _t|| _|| _t|d | _g | _	t
| jj D ]2\}}|ot||v pt|j|v }| j	t||| qXtt|| _| j| jdd  | _tt| _d | _d | _tjdddkrdn|| _|| _t | _ dd | j	D | _!d	d | j	D | _"|j#| _#|j$| _$|j%| _%|j| _d S )
Nr   defZTRITON_DEBUG01Tc                 S  s   g | ]
}|j qS r)   r   r   pr)   r)   r*   r     rs   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS r)   )rZ   rP   r   r)   r)   r*   r     rs   )&r   r@   r   versionr>   r   rR   getsourcelinesstarting_line_numberr   	enumerate
parametersr   rT   appendrM   textwrapdedent	getsourcer&   findr   r   r   hashr   osenvironr,   r   r<   r   r   	arg_namesr   rG   r0   __globals__)	r%   r   r   rR   r   r<   r   rQ   Zdnsr)   r)   r*   r     s2    
zJITFunction.__init__c                 C  s@   | j d u r:t| j| jd}||   |jt| j | _ | j S )N)r$   r&   )	r   r   r   r&   r2   parser#   rD   r   )r%   Zdependencies_finderr)   r)   r*   rC     s
    
zJITFunction.cache_keyc                O  s   | j ttj||dd|S )NTrl   )rk   map
MockTensor
wrap_dtype)r%   rm   ro   rp   r)   r)   r*   rn     s    zJITFunction.warmupc                 C  sH   t | j}t|t jsJ t|jdks.J t|jd t jsDJ |S )Nr   r   )r5   r   r&   r4   Moduler   bodyFunctionDef)r%   treer)   r)   r*   r     s
    zJITFunction.parsec                 O  s   t dd S )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)RuntimeError)r%   ro   rp   r)   r)   r*   __call__  s    zJITFunction.__call__c                   s$   t t| || |dkr d | _d S )Nr&   )r   rB   __setattr__r   )r%   rT   r3   r'   r)   r*   r     s    zJITFunction.__setattr__c                 C  s   d| j  d| jj dS )NzJITFunction(:r   )r   r   r0   rU   r)   r)   r*   __repr__  s    zJITFunction.__repr__)NNNN)r0   r@   rF   r   rf   rh   staticmethodrc   r   r   r   r   r   r   r   rk   r   r]   rC   rn   r   r   r   r   rH   r)   r)   r'   r*   rB      s2   



+
4M
)
rB   JITFunction[T]r   r   c                 C  s   d S r+   r)   r   r)   r)   r*   jit  s    r   r   rR   r   r<   zOptional[Iterable[int]]zOptional[bool]zCallable[[T], JITFunction[T]])rR   r   r<   r   c                 C  s   d S r+   r)   r  r)   r)   r*   r     s    zOptional[T]z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])r   rR   r   r<   r   c                  s2   ddd fdd}| dur*|| S |S dS )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r   r   r   c                   s:   t | sJ tdddkr$t| S t|  dS d S )NZTRITON_INTERPRETr   r   r  )r   r   getenvr   rB   r   r   rR   r<   r   r)   r*   	decorator,  s    zjit.<locals>.decoratorNr)   )r   r   rR   r   r<   r  r)   r  r*   r     s    c                   @  s0   e Zd ZdZedd Zdd Zedd ZdS )	r   zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nrb   Ztorch)r(   r0   r@   r   r}   r)   r)   r*   r   K  s    zMockTensor.wrap_dtypec                 C  s
   || _ d S r+   )rb   )r%   rb   r)   r)   r*   r   Q  s    zMockTensor.__init__c                   C  s   dS )Nr   r)   r)   r)   r)   r*   re   T  s    zMockTensor.data_ptrN)r0   r@   rF   rG   r   r   r   re   r)   r)   r)   r*   r   E  s   
r   c                   @  s:   e Zd Zdd Zdd Zdd Zddd	d
Zdd ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r+   )rb   baseZis_cudar   shape)r%   r  rb   r)   r)   r*   r   [  s
    zTensorWrapper.__init__c                 C  s
   | j  S r+   )r  re   rU   r)   r)   r*   re   b  s    zTensorWrapper.data_ptrc                 C  s   | j |S r+   )r  stride)r%   r   r)   r)   r*   r  e  s    zTensorWrapper.striderD   r   c                 C  s   d| j  d| j dS )NzTensorWrapper[r   r   )rb   r  rU   r)   r)   r*   __str__h  s    zTensorWrapper.__str__c                 C  s
   | j  S r+   )r  element_sizerU   r)   r)   r*   r
  k  s    zTensorWrapper.element_sizeN)r0   r@   rF   r   re   r  r	  r
  r)   r)   r)   r*   r  Y  s
   r  c                 C  sV   t | tr*|| jjkr| jS t| j|S n(t| dr>t| |S tdt|  dd S )Nre   zCannot reinterpret a r   )r4   r  r  rb   r{   r|   rI   )Ztensorrb   r)   r)   r*   reinterpreto  s    


r  )N),
__future__r   r   r5   	functoolsr   r>   r   r   collectionsr   r   r   typingr   r   r	   r
   r   r   r   r   r   Z_C.libtriton.tritonr   Zcommon.backendr   r   interpreterr   Zruntime.driverr   r   NodeVisitorr   rL   rM   r^   rj   rB   r   r   r  r  r)   r)   r)   r*   <module>   sP   ,.#,  Y 3