a
    DfA                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ d dlmZmZmZ d dl m!Z! G dd dejZ"e #de j$Z%G dd deZ&G dd deZ'G dd deZ(dS )    N)cached_property)ir)cgutilsconfig	debuginfoitanium_manglertypestypingutils)
Dispatcher)BaseContext)BaseCallConvMinimalCallConv)	cmathdecl)	datamodel   )nvvm)codegen	nvvmutilsufuncs)cuda_data_managerc                       s$   e Zd Zdd Z fddZ  ZS )CUDATypingContextc                 C   s   ddl m}m}m}m} ddlm}m} | |j	 | |j	 | |j	 | t
j	 | |j	 | |j	 | |j d S )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl
cffi_utils) r   r   r   r   numba.core.typingr   r   install_registryregistryr   Ztyping_registry)selfr   r   r   r   r   r    r#   ^/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/numba/cuda/target.pyload_additional_registries   s    z,CUDATypingContext.load_additional_registriesc                    s   ddl m} t|trt||sz
|j}W nh ty   |jsFtd|j	 }d|d< |
dd|d< |
dd|d< ||j|}||_|}Y n0 tt| |S )	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTZdevicedebugFopt)Znumba.cuda.dispatcherr&   
isinstancer   Z_CUDATypingContext__dispatcherAttributeErrorZ_can_compile
ValueErrortargetoptionscopygetZpy_funcsuperr   resolve_value_type)r"   valr&   r,   Zdisp	__class__r#   r$   r0   #   s"    



z$CUDATypingContext.resolve_value_type)__name__
__module____qualname__r%   r0   __classcell__r#   r#   r2   r$   r      s   r   z	[^a-z0-9]c                       s   e Zd ZdZdZd* fdd	Zedd Zedd Zd	d
 Z	dd Z
dd Zdd Zedd Zedd Zedd ZdddddZd+ddZdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Z  ZS ),CUDATargetContextTcudac                    s    t  || ttj| _d S N)r/   __init__r   chainr   Zdefault_managerdata_model_manager)r"   Z	typingctxtargetr2   r#   r$   r;   G   s    zCUDATargetContext.__init__c                 C   s   t jS r:   )r   	DIBuilderr"   r#   r#   r$   r?   M   s    zCUDATargetContext.DIBuilderc                 C   s   dS )NFr#   r@   r#   r#   r$   enable_boundscheckQ   s    z$CUDATargetContext.enable_boundscheckc                 C   s   | j |S r:   )_internal_codegenZ_create_empty_module)r"   namer#   r#   r$   create_moduleW   s    zCUDATargetContext.create_modulec                 C   s   t d| _d | _d S )Nznumba.cuda.jit)r   ZJITCUDACodegenrB   _target_datar@   r#   r#   r$   initZ   s    zCUDATargetContext.initc                 C   s   ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddlm} ddlm} d	d
lm}m}m}m}m} ddlm} | |j | |
j | |j | |j | |	j | |j | |j d S )Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)cffiimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )ndarray)Znumba.cpythonrG   rH   rI   rJ   rK   rL   rM   rN   rO   Z
numba.miscrP   Znumba.nprQ   rR   r   rS   rT   rU   rV   r   Znumba.np.unsaferW   r    r!   Zimpl_registry)r"   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   r   rW   r#   r#   r$   r%   ^   s     z,CUDATargetContext.load_additional_registriesc                 C   s   | j S r:   )rB   r@   r#   r#   r$   r   v   s    zCUDATargetContext.codegenc                 C   s"   | j d u rtt j| _ | j S r:   )rE   llZcreate_target_datar   ZNVVMZdata_layoutr@   r#   r#   r$   target_datay   s    
zCUDATargetContext.target_datac                    s*   ddl m  d}t fdd|D }|S )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   r9   )Z	threadIdxZblockDimZblockIdxZgridDimZlaneidZwarpsizec                    s   g | ]}t  |fqS r#   )r   Module).0ZncrZ   r#   r$   
<listcomp>   s   z;CUDATargetContext.nonconst_module_attrs.<locals>.<listcomp>)Znumbar9   tuple)r"   Z	nonconstsZnonconsts_with_modr#   rZ   r$   nonconst_module_attrs   s    z'CUDATargetContext.nonconst_module_attrsc                 C   s   t | S r:   )CUDACallConvr@   r#   r#   r$   	call_conv   s    zCUDATargetContext.call_convr#   Nabi_tagsuidc                C   s   t j||||dS )Nrb   )r   Zmangle)r"   rC   argtypesrc   rd   r#   r#   r$   mangler   s    
zCUDATargetContext.manglerc	              	   C   sV   t j|jdd}	|  j|j d|	||d}
|
| | |
||	||||}|
|fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        lineinfo:      Whether to emit line info.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapynsZ_kernel_)Z
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   Zcreate_libraryrC   Zadd_linking_librarygenerate_kernel_wrapper)r"   Zcodelibfndescr'   lineinforj   filenamelinenumrk   kernel_namelibrarywrapperr#   r#   r$   prepare_cuda_kernel   s    

z%CUDATargetContext.prepare_cuda_kernelc           !   	      s  |j }| |}	t|	j}
tt |
}| dttd| j	
tjg|
 }t||j}tj|jdd}t|| t d}|s|r|o| }| j|| |d}| ||j|| |||  fdd}|d	}g }g }d
D ](}||d|  ||d|  q|	| j}| j	||tj||\}}|r|t||j |  W d   n1 s0    Y  | |!|j" t#|j$j%d}|&|||j'dd}|(|d}t)*|}| |h t+d
|D ] \}}|,|} |-| | qt+d
|D ] \}}|.|} |-| | qW d   n1 sR0    Y  W d   n1 sr0    Y  |  t/0  |1 |s|r|2  |2  t3j4rt56| |7 jS )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    rg   rh   r   )modulefilepathZcgctxdirectives_onlyc                    s4    j |  }ttd|}t|jjd |_|S )Nrw   )	rC   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)ZpostfixrC   gvZwrapfnZwrapper_moduler#   r$   define_error_gv   s    
zBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gvZ__errcode__Zxyzz	__tid%s__z__ctaid%s__N	monotonicr   )8re   Zget_arg_packerlistargument_typesr   FunctionTypeZVoidTyperD   r|   ra   get_return_typer   ZpyobjectZFunctionrm   r   rl   rC   Z	IRBuilderZappend_basic_blockr?   Zmark_subprogramargsZmark_locationappendZfrom_argumentscall_functionvoidr   Z	if_likelyZis_okZret_voidZif_thennot_Zis_python_excr}   r~   r   ZcmpxchgcodeZextract_valuer   ZSRegBuilderziptidstoreZctaidr   Zset_cuda_kernelZadd_ir_modulefinalizer   Z	DUMP_LLVMr
   Z	dump_llvmZget_function)!r"   rt   ro   rs   r'   rp   rq   rr   re   arginfoargtysZwrapfntyfntyfuncprefixedbuilderrz   r   r   Zgv_excZgv_tidZgv_ctaidiZcallargsstatus_oldZxchgchangedZsregZdimptrr1   r#   r   r$   rn      s|    



(


P

z)CUDATargetContext.generate_kernel_wrapperc              	      s   |j } fddt|jddD }ttdt|}t||}tj	}t
j||jd|d}	d|	_d	|	_||	_ |j}
 |
}d
|d   |	_ttd}||	|d} | |} fdd|jD } fdd|jD } j||||jj|||j|jdd | S )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        c                    s   g | ]}  tj|qS r#   )get_constantr   byte)r\   r   r@   r#   r$   r]     s   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>A)order   Z_cudapy_cmem	addrspaceinternalT   r   genericc                    s   g | ]}  tj|qS r#   r   r   Zintpr\   sr@   r#   r$   r]   6      c                    s   g | ]}  tj|qS r#   r   r   r@   r#   r$   r]   7  r   N)datashapestridesitemsizeparentZmeminfo) rx   itertobytesr   	ArrayTyper|   lenr}   r   ADDRSPACE_CONSTANTr   r{   r~   linkageglobal_constantr   Zget_data_typeZdtypeZget_abi_sizeof
bit_lengthalignPointerTypeaddrspacecastZ
make_arrayr   r   Zpopulate_arraybitcastr   r   r   Z	_getvalue)r"   r   ZarytyZarrlmodZ	constvalsZ
constarytyZconstaryr   r   Zlldtyper   ZptrtyZgenptrZaryZkshapeZkstridesr#   r@   r$   make_constant_array  s8    

z%CUDATargetContext.make_constant_arrayc                 C   s   t |dd }ddt|g}|j|}|du rdt j||j	|t
jd}d|_d|_||_|j	jj}||t
jS )	r   zutf-8    $Z__conststring__Nr   r   T)r   Zmake_bytearrayencodejoinr   Zmangle_identifierglobalsr.   r{   r~   r   r   r   r   r   r   elementr   Z
as_pointer)r"   modstringtextrC   r   Zchartyr#   r#   r$   insert_const_string@  s    
z%CUDATargetContext.insert_const_stringc                 C   s0   |j }| ||}ttd}|||dS )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        r   r   )rx   r   r   r   r|   r   )r"   r   r   r   r   Z	charptrtyr#   r#   r$   insert_string_const_addrspaceV  s    z/CUDATargetContext.insert_string_const_addrspacec                 C   s   dS )zRun O1 function passes
        Nr#   r"   r   r#   r#   r$   optimize_functionb  s    z#CUDATargetContext.optimize_functionc                 C   s
   t |S r:   )r   get_ufunc_info)r"   Z	ufunc_keyr#   r#   r$   r   o  s    z CUDATargetContext.get_ufunc_info)r9   )N)r4   r5   r6   Zimplement_powi_as_math_callZstrict_alignmentr;   propertyr?   rA   rD   rF   r%   r   rY   r   r_   ra   rf   rv   rn   r   r   r   r   r   r7   r#   r#   r2   r$   r8   C   s4   




 
$\+r8   c                   @   s   e Zd ZdS )r`   N)r4   r5   r6   r#   r#   r#   r$   r`   s  s   r`   c                   @   s\   e Zd ZdZdd Zdd ZdddZd	d
 Zdd ZdddZ	dd Z
dd Zdd ZdS )CUDACABICallConvz
    Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
    function signature is:

        <Python return type> (<Python arguments>)

    Exceptions are unsupported in this convention.
    c                 C   s   d S r:   r#   )r"   r   r#   r#   r$   _make_call_helper  s    z"CUDACABICallConv._make_call_helperc                 C   s
   | |S r:   )ret)r"   r   retvalr#   r#   r$   return_value  s    zCUDACABICallConv.return_valueNc                 C   s   d}t |d S )Nz7Python exceptions are unsupported in the CUDA C/C++ ABINotImplementedError)r"   r   excZexc_argsloc	func_namemsgr#   r#   r$   return_user_exc  s    z CUDACABICallConv.return_user_excc                 C   s   d}t |d S )Nz2Return status is unsupported in the CUDA C/C++ ABIr   )r"   r   r   r   r#   r#   r$   return_status_propagate  s    z(CUDACABICallConv.return_status_propagatec                 C   s*   |  |}t|j}t| ||}|S )zM
        Get the LLVM IR Function type for *restype* and *argtypes*.
        )_get_arg_packerr   r   r   r   r   )r"   restypere   r   r   r#   r#   r$   get_function_type  s    

z"CUDACABICallConv.get_function_typeFc                 C   s2   |rJ |  |}|| |dd |D  dS )zA
        Set names and attributes of function arguments.
        c                 S   s   g | ]}d | qS )zarg.r#   )r\   ar#   r#   r$   r]     r   z6CUDACABICallConv.decorate_function.<locals>.<listcomp>N)r   Zassign_namesget_arguments)r"   fnr   Zfe_argtypesZnoaliasr   r#   r#   r$   decorate_function  s
    
z"CUDACABICallConv.decorate_functionc                 C   s   |j S )z@
        Get the Python-level arguments of LLVM *func*.
        )r   r   r#   r#   r$   r     s    zCUDACABICallConv.get_argumentsc                 C   s>   |  |}|||}|||}d}	| j|||}
|	|
fS )z3
        Call the Numba-compiled *callee*.
        N)r   Zas_argumentscallcontextZget_returned_value)r"   r   ZcalleeZrestyr   r   r   Zrealargsr   r   outr#   r#   r$   r     s    
zCUDACABICallConv.call_functionc                 C   s   | j j|  S r:   )r   r=   r   )r"   tyr#   r#   r$   r     s    z CUDACABICallConv.get_return_type)NNN)F)r4   r5   r6   __doc__r   r   r   r   r   r   r   r   r   r#   r#   r#   r$   r   w  s   	  
	
	r   ))re	functoolsr   Zllvmlite.bindingZbindingrX   Zllvmliter   Z
numba.corer   r   r   r   r   r	   r
   Znumba.core.dispatcherr   Znumba.core.baser   Znumba.core.callconvr   r   r   r   r   Zcudadrvr   Z
numba.cudar   r   r   Znumba.cuda.modelsr   r   compileIZVALID_CHARSr8   r`   r   r#   r#   r#   r$   <module>   s&   $*  2