a
    öDfV+                     @  s  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ d dlmZ ddlmZ d dlmZ d dlZeG dd dZdZdZeeedZdZdZeeedZdd ZddddZ G dd dZ!G dd dZ"d#dd Z#G d!d" d"Z$dS )$    )annotationsN   )get_env_varsir)OutOfResources)get_cache_manager)driver   InfoFromBackendForTensorMap)CUDABackend)	dataclass)ast_to_ttir)Pathc                   @  sN   e Zd ZU dZded< dZded< dZded< dZded< dd Zd	d
 Z	dS )AttrsDescriptorNsetdivisible_by_16
equal_to_1ids_of_folded_argsdivisible_by_8c                 C  sL   | j d u rt | _ | jd u r$t | _| jd u r6t | _| jd u rHt | _d S N)r   r   r   r   r   self r   e/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/triton/compiler/compiler.py__post_init__   s    



zAttrsDescriptor.__post_init__c                 C  s,   t dd | j D }t|d S )Nc                 S  s   g | ]}t |qS r   )sorted).0xr   r   r   
<listcomp>&       z(AttrsDescriptor.hash.<locals>.<listcomp>utf-8)str__dict__valueshashlibmd5encode	hexdigestr   keyr   r   r   hash%   s    zAttrsDescriptor.hash)
__name__
__module____qualname__r   __annotations__r   r   r   r   r+   r   r   r   r   r      s   

r   z^\s*tt\.func\s+(?:public\s+)?(@\w+)(\((?:%\w+: [\S\s]+(?: \{\S+ = \S+ : \S+\})?(?:, )?)*\))\s*(attributes \{[\S\s]+\})?\s+\{\s*$z=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\))ttirttgirZptxz%\w+: ((?:[^,\s<]+|<[^>]+>)+),?z\.param\s+\.(\w+)c                 C  s*   t d| }|d ur&dt|d S | S )Nz!tt\.ptr<([^,]+)*r	   )researchconvert_type_reprgroup)r   matchr   r   r   r5   C   s    r5   r"   )srcc                 C  st   d}t || }t|dks$J dt|d }t d| }t|dks\t|dks\J d|rp|t|d 9 }|S )Nz&"triton_gpu.num-warps"\s?=\s?(\d+)\s?:r	   z(Expected exactly one match for num_warpsr   z4"triton_gpu.num-warp-groups-per-cta"\s?=\s?(\d+)\s?:zLExpected triton_gpu.num-warp-groups-per-cta attribute to appear 0 or 1 times)r3   findalllenint)r8   Zttgir_num_warps_patternZnum_warps_matches	num_warpsZnum_warp_groups_matchesr   r   r   _get_num_warps_from_ir_strL   s    r=   c                   @  s<   e Zd ZdddddZdd Zdd	 Zd
d Zdd ZdS )	ASTSourceNNone)returnc                 C  sv   || _ d| _|j| _|| _|| _|| _t| jtrNdd t	| j
dD | _| jd u r`t | _| jd u rrt | _d S )Nr0   c                 S  s   i | ]\}}||  qS r   )strip)r   kvr   r   r   
<dictcomp>j   r    z&ASTSource.__init__.<locals>.<dictcomp>,)fnextr,   name	signature	constantsattrs
isinstancer"   	enumeratesplitdictr   )r   rF   rI   rJ   rK   r   r   r   __init__b   s    

zASTSource.__init__c                 C  s@   | j j d| j  d| j  d| j }t|	d
 S )N-r!   )rF   	cache_keyrK   r+   rI   r$   rJ   r%   r&   r'   r(   r)   r   r   r   r+   p   s    ,zASTSource.hashc                 C  s   t | j| |dS )N)options)r   rF   )r   rS   r   r   r   make_irt   s    zASTSource.make_irc                 C  s   dt dd | jjD iS )Nr   c                 S  s   g | ]}t |qS r   )r;   )r   rB   r   r   r   r   y   r    z&ASTSource.metadata.<locals>.<listcomp>)tuplerK   r   r   r   r   r   metadataw   s    zASTSource.metadatac                 C  s   t  S r   rO   r   r   r   r   parse_options{   s    zASTSource.parse_options)NNr,   r-   r.   rP   r+   rT   rV   rX   r   r   r   r   r>   `   s
   r>   c                   @  s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )IRSourcec                 C  s   || _ t|}|jdd  | _| | _tt| j | jtj	}|
d| _|
d}tt| j |}dd t|D | _d S )Nr	   r   c                 S  s   i | ]\}}|t |qS r   )r5   )r   rB   tyr   r   r   rD      r    z%IRSource.__init__.<locals>.<dictcomp>)pathr   suffixrG   	read_textr8   r3   r4   prototype_pattern	MULTILINEr6   rH   r9   arg_type_patternrM   rI   )r   r\   r7   rI   typesr   r   r   rP      s    

zIRSource.__init__c                 C  s   t | jd S )Nr!   )r%   r&   r8   r'   r(   r   r   r   r   r+      s    zIRSource.hashc                 C  s    t  }t | j|}||_|S r   )r   contextZparse_mlir_moduler\   )r   rS   rc   moduler   r   r   rT      s    zIRSource.make_irc                 C  s   t  S r   rW   r   r   r   r   rV      s    zIRSource.metadatac                 C  s   | j dkrdt| jiS t S )Nr1   r<   )rG   r=   r8   rO   r   r   r   r   rX      s    
zIRSource.parse_optionsNrY   r   r   r   r   rZ      s
   rZ   c              
   C  s  |d u rt  }t|}t| ts<t| ts4J dt| } |  }|t|pRt fi |}| 	  d|	  d|	  dt
tt   }t|d }t|}| j d}||pi }	|	|}
|
d urtt|
 }|| |}t||
S d|i|jt |  }t }||| t|  !| j"}| #|}t| |d  D ]@\}}|||}|$|| j d| |	| j d| < |}qf|j$tj%|t&d|dd	|	|< |'||	 || |}t||	|S )
Nz'source must be either AST or a filepathrQ   r!   .jsontarget.)defaultF)binary)(r   Zget_current_targetr   rL   r>   r"   rZ   rX   rO   r+   	frozensetr   r   itemsr%   r&   r'   r(   r   rH   Z	get_groupgetjsonloadsr   r^   Zmake_launcher_stubCompiledKernelr#   rV   Z
add_stageslistkeysindexrG   rT   putdumpsvarsZ	put_group)r8   rf   rS   backendZextra_optionsr*   r+   Zfn_cache_managerZmetadata_filenameZmetadata_groupmetadata_pathrV   so_pathZstagesZfirst_stagerd   rG   Z
compile_irZnext_moduler   r   r   compile   sP    
4




(
ry   c                      s<   e Zd ZdZdZdd Zdd Z fddZdd	 Z  Z	S )
ro   Nc                 C  s  t |}dd l}|jd|}|j|}|j| t|d| _t	
| | _d| jv rpdd | jd D ng | jd< t| jd D ]"\}}t| jd | jd | _q| j D ]\}}	t| ||	 qdd |j|j d	D }
d
d |
D | _| jtj | _d | _d | _d S )Nr   Z__triton_launcherZlaunchtensormaps_infoc                 S  s   g | ]}t |qS r   r
   )r   er   r   r   r      r    z+CompiledKernel.__init__.<locals>.<listcomp>r   c                 S  s   g | ]}|j d kr|qS )re   )r]   r   filer   r   r   r      r    z.*c                 S  s<   i | ]4}|j d d |j d d tjkr0| n| qS )r	   N)r]   r   
binary_ext
read_bytesr^   r|   r   r   r   rD      s   z+CompiledKernel.__init__.<locals>.<dictcomp>)r   importlib.utilutilspec_from_file_locationmodule_from_specloaderexec_modulegetattrrunrm   rn   r^   rV   rM   rU   r   rk   setattrparentglobstemZasmr   r~   kernelrd   function)r   rx   rw   	importlibspecmodi_r*   valZ	asm_filesr   r   r   rP      s,    zCompiledKernel.__init__c                 C  sj   | j d urd S t }tj|d }| j|kr>t| j|dtj| j| j	| j|\| _ | _
| _| _d S )NZmax_shared_memzshared memory)rd   r   get_current_deviceutilsZget_device_propertiessharedr   Zload_binaryrH   r   r   Zn_regsZn_spills)r   deviceZ
max_sharedr   r   r   _init_handles   s    

zCompiledKernel._init_handlesc                   s   |dkr|    t |S )Nr   )r   super__getattribute__)r   rH   	__class__r   r   r      s    zCompiledKernel.__getattribute__c                   s       d d fdd
}|S )N)streamc                   s   t j|}| d u r(t  }t |} j d  d  d jjjd jd jd j	| j
tjtjg|R   d S )Nr   r	   r   )r   Zassemble_tensormap_to_argrz   r   Zget_current_streamr   r<   Znum_ctasZcluster_dimsr   r   ro   launch_enter_hooklaunch_exit_hook)r   argsZargs_expandr   gridr   r   r   runner  s    
&
z*CompiledKernel.__getitem__.<locals>.runner)r   )r   r   r   r   r   r   __getitem__  s    	zCompiledKernel.__getitem__)
r,   r-   r.   r   r   rP   r   r   r   __classcell__r   r   r   r   ro      s   ro   )NN)%
__future__r   r%   rm   Z_C.libtriton.tritonr   r   Zruntime.autotunerr   Zruntime.cacher   Zruntime.driverr   r   r   Zbackends.cudar   Zdataclassesr   Zcode_generatorr   pathlibr   r3   r   Zmlir_prototype_patternZptx_prototype_patternr_   Zmlir_arg_type_patternZptx_arg_type_patternra   r5   r=   r>   rZ   ry   ro   r   r   r   r   <module>   s@   	
/