a
    lDfOS                     @   sr   d dl Z d dlZd dlZddlmZ eedZdd Zdd Z	G d	d
 d
e
ZG dd dZG dd dZdS )    N   )build_re c                 C   s   t dd| S )Nz([.?*+^$[\]\\(){}|-])z\\\1)resub)string r   \/nfs/NAS7/SABIOD/METHODE/ermites/ermites_venv/lib/python3.9/site-packages/linkify_it/main.py
_escape_re   s    r
   c                 C   s*   z|  |}W n ty$   d}Y n0 |S )N)index
ValueError)textZsearch_valueresultr   r   r	   	_index_of   s
    
r   c                       s    e Zd ZdZ fddZ  ZS )SchemaErrorzLinkify schema errorc                    s   d ||}t | d S )Nz%(LinkifyIt) Invalid schema '{}': '{}')formatsuper__init__)selfnamevalmessage	__class__r   r	   r      s    zSchemaError.__init__)__name__
__module____qualname____doc__r   __classcell__r   r   r   r	   r      s   r   c                   @   s    e Zd ZdZdd Zdd ZdS )Matcha  Match result.

    Attributes:
        schema (str): Prefix (protocol) for matched string.
        index (int): First position of matched string.
        last_index (int): Next position after matched string.
        raw (str): Matched string.
        text (str): Notmalized text of matched string.
        url (str): Normalized url of matched string.

    Args:
        linkifyit (:class:`linkify_it.main.LinkifyIt`) LinkifyIt object
        shift (int): text searh position
    c                 C   s   d | jj| jj| jS )Nz{}.{}({!r}))r   r   r   r   __dict__r   r   r   r	   __repr__0   s    zMatch.__repr__c                 C   sP   |j }|j}|j|| }|j | _|| | _|| | _|| _|| _	|| _
d S N)_index_last_index_text_cache_schemalowerschemar   
last_indexrawr   url)r   Z	linkifyitshiftstartendr   r   r   r	   r   5   s    

zMatch.__init__N)r   r   r   r   r#   r   r   r   r   r	   r        s   r    c                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
d*ddZdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd+d$d%Zd&d' Zd(d) ZdS ),	LinkifyIta  Creates new linkifier instance with optional additional schemas.

    By default understands:

    - ``http(s)://...`` , ``ftp://...``, ``mailto:...`` & ``//...`` links
    - "fuzzy" links and emails (example.com, foo@bar.com).

    ``schemas`` is an dict where each key/value describes protocol/rule:

    - **key** - link prefix (usually, protocol name with ``:`` at the end, ``skype:``
      for example). `linkify-it` makes shure that prefix is not preceeded with
      alphanumeric char. Only whitespaces and punctuation allowed.

    - **value** - rule to check tail after link prefix

      - *str* - just alias to existing rule
      - *dict*

        - *validate* - either a ``re.Pattern``, ``re str`` (start with ``^``, and don't
          include the link prefix itself), or a validator ``function`` which, given
          arguments *self*, *text* and *pos* returns the length of a match in *text*
          starting at index *pos*. *pos* is the index right after the link prefix.
        - *normalize* - optional function to normalize text & url of matched
          result (for example, for @twitter mentions).

    ``options`` is an dict:

    - **fuzzyLink** - recognige URL-s without ``http(s):`` prefix. Default ``True``.
    - **fuzzyIP** - allow IPs in fuzzy links above. Can conflict with some texts
      like version numbers. Default ``False``.
    - **fuzzyEmail** - recognize emails without ``mailto:`` prefix.
    - **---** - set `True` to terminate link with `---` (if it's considered as long
      dash).

    Args:
        schemas (dict): Optional. Additional schemas to validate (prefix/validator)
        options (dict): { fuzzy_link | fuzzy_email | fuzzy_ip: True | False }.
            Default: {"fuzzy_link": True, "fuzzy_email": True, "fuzzy_ip": False}.
    c                 C   sl   ||d  }| j ds@d| j d  | j d  | j d  | j d< t j| j d |t jd}|rht| S dS )Nhttpz^\/\/src_authZsrc_host_port_strictsrc_pathflagsr   r   getsearch
IGNORECASElengroupr   r   postailfoundsr   r   r	   _validate_httpk   s    zLinkifyIt._validate_httpc                 C   s   ||d  }| j dsjd| j d  d | j d  d | j d  d | j d	  | j d
  | j d  | j d< t j| j d |t jd}|r|dkr||d  dkrdS |dkr||d  dkrdS t|dS dS )NZnot_http^r3   z(?:localhost|(?:(?:Z
src_domainz)\.)+Zsrc_domain_root)Zsrc_portZsrc_host_terminatorr4   r5      :r   /r7   r=   r   r   r	   _validate_double_slash}   s<    	z LinkifyIt._validate_double_slashc                 C   sh   ||d  }| j ds:d| j d  d | j d  | j d< t j| j d |t jd}|rdt|dS dS )NmailtorB   Zsrc_email_name@Zsrc_host_strictr5   r   r7   r=   r   r   r	   _validate_mailto   s    zLinkifyIt._validate_mailtoc                 C   s   d| _ d| _d S )Nr   r   )r%   r'   r"   r   r   r	   _reset_scan_cache   s    zLinkifyIt._reset_scan_cachec                    s    fdd}|S )Nc                    sL   | |d  }t  tr*tj |tjd}nt |}|rHt|dS dS )Nr5   r   )
isinstancestrr   r9   r:   r;   r<   )r   r>   r?   r@   regexr   r	   func   s    
z)LinkifyIt._create_validator.<locals>.funcr   )r   rO   rP   r   rN   r	   _create_validator   s    zLinkifyIt._create_validatorc                    s    fdd}|S )Nc                    s     |  d S r$   )	normalize)matchr"   r   r	   rP      s    z*LinkifyIt._create_normalizer.<locals>.funcr   )r   rP   r   r"   r	   _create_normalizer   s    zLinkifyIt._create_normalizerc                 C   s"   t | |}| j|j d | |S )NrR   )r    	_compiledr*   )r   r.   rS   r   r   r	   _create_match   s    
zLinkifyIt._create_matchNc                 C   s   dddd| _ d| jiddd| jid| jid| _d| _dd	| _|r^| j | | j | _	n| j | _	d
| _
d
| _d| _d| _|r| j| | j| _n| j| _i | _| j| _d| _i | _|   d S )NTF)
fuzzy_linkfuzzy_emailfuzzy_ipvalidatehttp:)r[   zhttps:zftp:z//mailto:aW  a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]uM   biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф|r   r   )Zdefault_optionsrA   rG   rJ   Zdefault_schemastlds_2ch_src_resplitZtlds_defaultupdate_optsr%   r&   r(   r'   _schemasrU   _tlds_tlds_replacedr   _compile)r   Zschemasoptionsr   r   r	   r      s>    	

zLinkifyIt.__init__c           
         sD  t  j _t j}    js2| j	 | jd  d
| jd<  fdd}| jd  jd< | jd  jd	< | jd
  jd< | jd  jd< g }i  _ j D ]\}}|du rqddd}| j|< t|tr"t|dtr |d|d< nt|dtrF |d|d< n`t|dtjrj|d|d< n<t|dtjrttd|d  j|d< n
t||t|dtjr|d|d< qt|dtjrttd|d  j|d< q|ds  |d< qt||qt|tr:|| qt||q|D ]\} j j|shqJ j j|  d  j| d<  j j|  d  j| d< qJd  d jd< d
dd  j D }d jd  d | d }	|	 jd< |	 jd< d jd   jd< d|	 d   jd  d!  jd"<    dS )#z Schemas compiler. Build regexps.Zsrc_xnr]   src_tldsc                    s   |  d jd S )Nz%TLDS%rg   )replacer   )Ztplr"   r   r	   untpl  s    z!LinkifyIt._compile.<locals>.untplZtpl_email_fuzzyemail_fuzzyZtpl_link_fuzzy
link_fuzzyZtpl_link_no_ip_fuzzylink_no_ip_fuzzyZtpl_host_fuzzy_testhost_fuzzy_testN)rZ   linkrZ   rP   rR   )rZ   rR   r   c                 S   s(   g | ] \}}t |d kr|rt|qS )r   )r;   r
   ).0r   r   r   r   r	   
<listcomp>^  s   z&LinkifyIt._compile.<locals>.<listcomp>u   (^|(?!_)(?:[><｜]|Zsrc_ZPCcz))(rC   schema_testschema_searchrB   schema_at_start(z)|(z)|@pretest)r   ra   r   copydeepcopyrc   _on_compilerd   appendr^   joinrU   rb   itemsrL   dictr8   RE_TYPErQ   rM   types
MethodTypeFunctionTypesetattrr1   rP   r   rT   rK   )
r   tldsri   aliasesr   r   ZcompiledaliasslistZre_schema_testr   r"   r	   re      s    




	

zLinkifyIt._compilec                 C   s   || j |< |   | S )a^  Add new rule definition. (chainable)

        See :class:`linkify_it.main.LinkifyIt` init description for details.
        ``schema`` is a link prefix (``skype:``, for example), and ``definition``
        is a ``str`` to alias to another schema, or an ``dict`` with ``validate`` and
        optionally `normalize` definitions. To disable an existing rule, use
        ``.add(<schema>, None)``.

        Args:
            schema (str): rule name (fixed pattern prefix)
            definition (`str` or `re.Pattern`): schema definition

        Return:
            :class:`linkify_it.main.LinkifyIt`
        )rb   re   )r   r*   Z
definitionr   r   r	   addv  s    
zLinkifyIt.addc                 C   s   | j | | S )a9  Override default options. (chainable)

        Missed properties will not be changed.

        Args:
            options (dict): ``keys``: [``fuzzy_link`` | ``fuzzy_email`` | ``fuzzy_ip``].
                ``values``: [``True`` | ``False``]

        Return:
            :class:`linkify_it.main.LinkifyIt`
        )ra   r`   )r   rf   r   r   r	   set  s    zLinkifyIt.setc                 C   s  || _ d| _t|sdS tj| jd |tjdr| jd }d}tj|||d tjd}|D ]}|d}| |	 d |	 d f}| 
||d	 |}|r\|d	 | _|dt|d  | _|dt|d  | | _ qq\| jd
r| jdrtj| jd |tjd}|r(|d}	nd}	|	dkr| jdk sN|	| jk r| jdrh| jd }
n
| jd }
tj|
|tjd}|r|dt|	 d  }| jdk s|| jk rd| _|| _|dt|  | _| jdr| jdrt|d}|dkrtj| jd |tjd}|r|dt|	 d  }|dt|  }| jdk s|| jk s|| jkr|| jkrd| _|| _|| _| jdkS )a  Searches linkifiable pattern and returns ``True`` on success or ``False``
        on fail.

        Args:
            text (str): text to search

        Returns:
            bool: ``True`` if a linkable pattern was found, otherwise it is ``False``.
        r   Frq   r5   rr   r   Nr      rW   r[   rm   rY   rk   rl   r   rX   r\   rI   rj   )r'   r%   r;   r   r9   r:   finditerr0   r<   groupstest_schema_atr(   r/   r&   ra   r8   rU   r   )r   r   rO   r+   Zmatched_itermatchedmlengthZmatched_tldZtld_pospatternmlr.   Zat_posmeZ
next_shiftr   r   r	   test  sn    







zLinkifyIt.testc                 C   s    t j| j d |t jdrdS dS )ab  Very quick check, that can give false positives.

        Returns true if link MAY BE can exists. Can be used for speed optimization,
        when you need to check that link NOT exists.

        Args:
            text (str): text to search

        Returns:
            bool: ``True`` if a linkable pattern was found, otherwise it is ``False``.
        ru   r5   TF)r   r9   r:   )r   r   r   r   r	   ru     s    zLinkifyIt.pretestc                 C   s0   | j | sdS | j | d||S )ab  Similar to :meth:`linkify_it.main.LinkifyIt.test` but checks only
        specific protocol tail exactly at given position.

        Args:
            text (str): text to scan
            name (str): rule (schema) name
            position (int): length of found pattern (0 on fail).

        Returns:
            int: text (str): text to search
        r   rZ   )rU   r8   r)   )r   r   r   positionr   r   r	   r     s    zLinkifyIt.test_schema_atc                 C   s   d}g }| j dkr2| j|kr2|| | | j}|rB||d n|}| |rz|| | || jd }|| j7 }qFt|r|S dS )a  Returns ``list`` of found link descriptions or ``None`` on fail.

        We strongly recommend to use :meth:`linkify_it.main.LinkifyIt.test`
        first, for best speed.

        Args:
            text (str): text to search

        Returns:
            ``list`` or ``None``: Result match description:
                * **schema** - link schema, can be empty for fuzzy links, or ``//``
                  for protocol-neutral  links.
                * **index** - offset of matched text
                * **last_index** - offset of matched text
                * **raw** - offset of matched text
                * **text** - normalized text
                * **url** - link, generated from matched text
        r   N)r%   r'   ry   rV   r&   r   r;   )r   r   r.   r   r?   r   r   r	   rS   	  s    
zLinkifyIt.matchc                 C   s   || _ d| _t|sdS tj| jd |tjd}|s8dS | | d | d f}| ||d t|d }|sxdS |d | _	|
dt|d  | _|
dt|d  | | _| dS )zReturns fully-formed (not fuzzy) link if it starts at the beginning
        of the string, and null otherwise.

        Args:
            text (str): text to search

        Retuns:
            ``Match`` or ``None``
        r   Nrs   r5   r   r   r   )r'   r%   r;   r   r9   r:   r<   r   r   r(   r/   r&   rV   )r   r   r@   r   r   r   r   r	   match_at_start3  s    
zLinkifyIt.match_at_startFc                 C   s`   t |tr|n|g}|s0|| _d| _|   | S | j| ttt| jdd| _|   | S )u  Load (or merge) new tlds list. (chainable)

        Those are user for fuzzy links (without prefix) to avoid false positives.
        By default this algorythm used:

        * hostname with any 2-letter root zones are ok.
        * biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
          are ok.
        * encoded (`xn--...`) root zones are ok.

        If list is replaced, then exact match for 2-chars root zones will be checked.

        Args:
            list_tlds (list or str): ``list of tlds`` or ``tlds string``
            keep_old (bool): merge with current list if q`True`q (q`Falseq` by default)
        T)reverse)rL   listrc   rd   re   extendsortedr   )r   Z	list_tldsZkeep_oldZ_listr   r   r	   r   S  s    zLinkifyIt.tldsc                 C   s@   |j sd|j |_|j dkr<tjd|jtjds<d|j |_dS )zDefault normalizer (if schema does not define it's own).

        Args:
            match (:class:`linkify_it.main.Match`): Match result
        zhttp://r\   z^mailto:r5   N)r*   r-   r   r9   r:   )r   rS   r   r   r	   rR   r  s    
zLinkifyIt.normalizec                 C   s   dS )z"Override to modify basic RegExp-s.Nr   r"   r   r   r	   rx     s    zLinkifyIt._on_compile)NN)F)r   r   r   r   rA   rG   rJ   rK   rQ   rT   rV   r   re   r   r   r   ru   r   rS   r   r   rR   rx   r   r   r   r	   r1   B   s(   (
2{N* 
r1   )rv   r   r~   Zucrer   typecompiler}   r
   r   	Exceptionr   r    r1   r   r   r   r	   <module>   s   	"