a
    Df1;                     @  s$  d Z ddlmZ ddlZe ZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZ ddlZddlZddlZddlZddlmZ ddlmZ ddlmZ dadada G dd	 d	Z!e! Z"d
hZ#G dd de$Z%dPddZ&e$dd dD Z'dQddZ(dd e'd d< dd e'd d< dd Z)dd e'd d< dd e'd
 d< dd e'd  d< d!d e'd" d< dRd#d$Z*d%d e'd d&< d'd e'd d&< d(d e'd d&< d)d* Z+d+d e'd
 d&< d,d e'd  d&< d-d e'd" d&< e$d.d dD Z,d/d e,d d< d0d e,d d< d1d2 Z-d3d e,d d< d4d e,d
 d< d5d e,d" d< d6d e,d  d< d7d e,d d&< d8d e,d d&< d9d e,d d&< d:d e,d
 d&< d;d e,d  d&< d<d e,d" d&< dSd?d@Z.dAdB Z/dCa0e1dDe1dEdFfdGdHZ2dIdJ Z3dKdL Z4dMdN Z5e6dOkr e7e5ej8 dS )Taw  
Simple test of read and write times for columnar data formats:
  python filetimes.py <filepath> [pandas|dask [hdf5base [xcolumn [ycolumn] [categories...]]]]

Test files may be generated starting from any file format supported by Pandas:
  python -c "import filetimes ; filetimes.base='<hdf5base>' ; filetimes.categories=['<cat1>','<cat2>']; filetimes.timed_write('<file>')"
    )annotationsN)export_image)transfer_functions)distributedFc                   @  sD   e Zd Zd\ZZZdZg ZdZdZ	dZ
dZedddd	Ze ZdS )

Parameters)dataxypandasiOߑ   Ng   Ј BZhiveF)Zfile_schemeZ	has_nullsZwrite_index)__name__
__module____qualname__baser   r	   dftype
categories	chunksizeZ	cat_widthcolumns	cachesizedict	parq_optsmultiprocessing	cpu_count	n_workers r   r   j/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/datashader/examples/filetimes.pyr   $   s   
r   parqc                   @  s   e Zd ZdZdS )KwargszWUsed to distinguish between dictionary argument values, and
    keyword-arguments.
    N)r   r   r   __doc__r   r   r   r   r   5   s   r   c                 C  sF  t |}i }|r0t|d tr0| }|| trddd |D }ddd | D }td	| j
||rxd| nddd	 t }| |i |}	|d
ur2|tvri }
tjdkrd|
d< tjD ]}|	| jdi |
|	|< qtjdkr2tr2td
urt|	}	t|	 ntr*tddd	 |	 }	t }|| |	fS )zBenchmark when "fn" function gets called on "args" tuple.
    "args" may have a Kwargs instance at the end.
    If "filetype" is provided, it may be used to convert columns to
    categorical dtypes after reading (the "loading" is assumed).
    z, c                 S  s*   g | ]"}t |d rt| nt|qS )head)hasattrstrr    ).0Zposargr   r   r   
<listcomp>I       zbenchmark.<locals>.<listcomp>c                 S  s   g | ]\}}d  ||qS )z{}={})format)r#   kvr   r   r   r$   J   r%   zDEBUG: {}({}{}) TflushNr
   Fcopycategorydaskz#DEBUG: Force-loading Dask dataframe)r-   )list
isinstancer   popupdateDEBUGjoinitemsprintr&   r   timefiletypes_storing_categoriespr   r   astypeDD_FORCE_LOADDASK_CLIENTpersistr   wait)fnargsfiletypeZposargskwargsZlastargZprintable_posargsZprintable_kwargsstartresoptscendr   r   r   	benchmark;   s6    
$




rH   c                 C  s   g | ]}|t  fqS r   r   r#   fr   r   r   r$   m   r%   r$   )r   snappy.parqgz.parqfeatherh5csvc                 C  s4   t j| rtj| |dS | dd}tj||dS )Nusecols.csv*.csv)ospathisfileddread_csvreplace)filepathrR   filepath_exprr   r   r   read_csv_dasko   s    r]   c                 C  s   t t| t|jdf|S NrQ   )rH   r]   r   r   r[   r9   rA   r   r   r   <lambda>v   r%   r`   rP   r.   c                 C  s"   t tj| |jt|j|jdf|S )N)r   r   )rH   rX   read_hdfr   r   r   r   r_   r   r   r   r`   w   r%   rO   c                 C  s    t j| tjd}tj|tjdS )Nr   )Znpartitions)rN   read_dataframer9   r   rX   Zfrom_pandasr   r[   dfr   r   r   read_feather_daskx   s    rf   c                 C  s   t t| f|S N)rH   rf   r_   r   r   r   r`   {   r%   rN   c                 C  s   t tj| td|jdf|S NF)indexr   rH   rX   Zread_parquetr   r   r_   r   r   r   r`   |   r%   c                 C  s   t tj| td|jdf|S rh   rj   r_   r   r   r   r`   }   r%   rM   c                 C  s   t tj| td|jdf|S rh   rj   r_   r   r   r   r`   ~   r%   rL   c                   sH   t j| rtj|  dS | dd}t|}t fdd|D S )NrQ   rS   rT   c                 3  s   | ]}t j| d V  qdS )rQ   N)pdrY   rJ   rQ   r   r   	<genexpr>   r%   z"read_csv_pandas.<locals>.<genexpr>)rU   rV   rW   rk   rY   rZ   globconcat)r[   rR   r\   Z	filepathsr   rQ   r   read_csv_pandas   s
    
ro   c                 C  s   t t| t|jdf|S r^   )rH   ro   r   r   r_   r   r   r   r`      r%   r
   c                 C  s   t tj| |jt|jdf|S )Nrb   )rH   rk   ra   r   r   r   r_   r   r   r   r`      r%   c                 C  s   t tj| f|S rg   )rH   rN   rc   r_   r   r   r   r`      r%   c                 C  s   t |  S rg   )fpZParquetFileZ	to_pandas)r[   r   r   r   read_parq_pandas   s    rq   c                 C  s   t t| f|S rg   rH   rq   r_   r   r   r   r`      r%   c                 C  s   t t| f|S rg   rr   r_   r   r   r   r`      r%   c                 C  s   t t| f|S rg   rr   r_   r   r   r   r`      r%   c                 C  s   g | ]}|t  fqS r   rI   rJ   r   r   r   r$      r%   c                 C  s   t | j|ddtddfS )NrS   rT   Fri   )rH   to_csvrZ   r   re   r[   r9   r   r   r   r`      r%   c                 C  s   t | j||jfS rg   )rH   to_hdfr   ru   r   r   r   r`      r%   c                 C  s   t | | S rg   )rN   write_dataframecomputerd   r   r   r   write_feather_dask   s    ry   c                 C  s   t t|| fS rg   )rH   ry   ru   r   r   r   r`      r%   c                 C  s   t tj|| fS rg   )rH   rX   
to_parquetru   r   r   r   r`      r%   c                 C  s   t tj|| tddfS )NSNAPPYcompressionrH   rX   rz   r   ru   r   r   r   r`      r%   c                 C  s   t tj|| tddfS )NGZIPr|   r~   ru   r   r   r   r`      r%   c                 C  s   t | j|tddfS )NFrs   )rH   rt   r   ru   r   r   r   r`      r%   c                 C  s   t | j|t|jddfS )Ntable)keyr&   )rH   rv   r   r   ru   r   r   r   r`      r%   c                 C  s   t tj| |fS rg   )rH   rN   rw   ru   r   r   r   r`      r%   c                 C  s   t tj|| tf i |jfS rg   rH   rp   writer   r   ru   r   r   r   r`      r%   c                 C  s"   t tj|| tf ddi|jfS )Nr}   r   r   ru   r   r   r   r`      r%   c                 C  s"   t tj|| tf ddi|jfS )Nr}   r{   r   ru   r   r   r   r`      r%   doubletimesc                 C  s  |dv sJ |t _t| |\}}t D ]r}tj| \}}tj|\}	}
|tjj	 |	 d | }tj
|rtd|dd q(|dd }|tvrt jD ]B}|dkr|| jdkr|| jd	||< q|| t||< q|d
kr|jD ]&}|| jdkr|| tj||< qt| |d}|du rNtd||dd n&|||t \}}td|||dd |tvr(t jD ]}|| d||< qq(dS )ztAccepts any file with a dataframe readable by the given dataframe type, and writes it out as a variety of file types)singler   .z{:28} (keeping existing)Tr*   r   r   objectutf8r   float64Nz"{:28} {:7} Operation not supportedz{:28} {:7} {:05.2f}r-   )r9   r   
timed_readr   keysrU   rV   splitsplitextsepexistsr6   r&   r8   r   dtyper"   encoder:   r   npfloat32get)r[   r   fsizeZoutput_directoryre   durationext	directoryfilenamebasename	extensionfnamerA   rF   colnamecoderD   r   r   r   timed_write   s6    




r   c                 C  sv   t j| \}}|dd  }|dd }t| |d }|d u rFdS tjgtjg tj	 t_
|| t|\}}||fS )Nr   r   r   )Nr   )rU   rV   r   r   readr   r9   r   r	   r   r   )r[   r   r   r   rA   r   r   re   r   r   r   r      s    r   )NNi  g     h@Tc           
      C  sl   t   }tj||td td d}|| tjtj}t   }|rL|j|j	fat
t||dd}	|	|| fS )Nr   r   )x_rangey_ranger   )Zexport_path)r7   dsZCanvasCACHED_RANGESZpointsr9   r   r	   r   r   r   tfZshade)
re   r[   Z
plot_widthZplot_heightcache_rangesrC   ZcvsZaggrG   imgr   r   r   	timed_agg   s    r   c                 C  s   d}t j| rt j| S | drRt| ddD ]}|t j|7 }q8|S t | D ]2\}}}|D ]"}t j||}|t j|7 }qjq\|S )Nr   rP   rS   rT   )	rU   rV   rW   getsizeendswithrm   rZ   walkr4   )rV   totalZ	csv_fpathdirpathdirnames	filenamesrK   rp   r   r   r   get_size   s    
r   c                   C  s   t t jjd S )N    .A)resourceZ	getrusageZRUSAGE_SELFZ	ru_maxrssr   r   r   r   get_proc_mem   s    r   c                 C  sf  t jtt jd}|d |d |d |d |d |jddd	 |jd
ddd |jddd dttjd |jdddd |jdddd |	| dd  }|j
d u r|jrtddd nV|j
dkrddlm} |tj}|  n|j
dkrda|jr$td|j
dd |jdkrd|jrdtjtjdd }t|a|jrtd! n,|jdkr|jrtd"|jrtd# |j}tj|\}}|jt_|jt_|jt_|jt_|jt_|ja t rtd$t! dd t"|tj\}	}
|	d u r,|
d%kr(td&|tjdd dS t rFtd't! dd t#|	|d(d(|j$ d)\}}t r|	j%dd*}tjdkr|& }td+|dd |' }td,|d- dd |	j(D ]}td.||	| j) qtd/t! dd t#|	||j$ d)\}}t rtd0t! dd t*|}t*|d1 }t++ }td2|tj|
| |
|||||t, 	dd dS )3N)epilogformatter_classr[   r   r   r   r	   r   +)nargsz--debug
store_truez-Enable increased verbosity and DEBUG messages)actionhelpz--cache)r=   cacheyzEnable caching: "persist" causes Dask dataframes to force loading into memory; "cachey" uses dask.cache.Cache with a cachesize of {}. Caching is disabled by default)choicesdefaultr   z--distributedzOEnable the distributed scheduler instead of the threaded, which is the default.z--recalc-rangeszdTell datashader to recalculate the ranges on each aggregation, instead of caching them (by default).r   zDEBUG: Cache disabledTr*   r   r   )Cacher=   zDEBUG: Cache "{}" mode enabledr.   )r   Zthreads_per_workerz)DEBUG: "distributed" scheduler is enabledzR--distributed argument is only available with the dask dataframe type (not pandas)z&DEBUG: "threaded" scheduler is enabledz(DEBUG: Memory usage (before read):	{} MBr   z#{:28} {:6}  Operation not supportedz'DEBUG: Memory usage (after read):	{} MB   )r   )deepzDEBUG:zDEBUG: DataFrame size:			{} MBr   zDEBUG: column "{}" dtype: {}z'DEBUG: Memory usage (after agg1):	{} MBz'DEBUG: Memory usage (after agg2):	{} MBz.pngzq{:28} {:6}  Aggregate1:{:06.2f} ({:06.2f}+{:06.2f})  Aggregate2:{:06.2f}  In:{:011d}  Out:{:011d}  Total:{:06.2f})-argparseArgumentParserr   RawTextHelpFormatteradd_argumentr&   intr9   r   
parse_argscachedebugr6   Z
dask.cacher   registerr;   r   r   ZLocalClusterr   ZClientr<   
ValueErrorr[   rU   rV   r   r   r   r	   r   r3   r   r   r   Zrecalc_rangesZmemory_usagerx   sumr   r   r   r7   global_start)argvparserr@   r   r   Zlocal_clusterr[   r   r   re   Zloadtimer   Zaggtime1Z	mem_usageZmem_usage_totalr   Zaggtime2Zin_sizeZout_sizeZ
global_endr   r   r   main  s    













r   __main__)N)N)N)r   r   )9r   
__future__r   r7   r   rU   os.pathsysrm   r   r   r   r
   rk   Zdask.dataframeZ	dataframerX   numpyr   Z
datashaderr   rN   Zfastparquetrp   Zdatashader.utilsr   r   r   r.   r   r3   r;   r<   r   r9   r8   r   r   rH   r   r]   rf   ro   rq   r   ry   r   r   r   r   r   r   r   r   r   exitr   r   r   r   r   <module>   sz   8
2


)X
