
    9i*                       d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
d
d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZd
d
d	 	 	 	 	 	 	 	 	 ddZd
d
d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZd
d
d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZd
d	 	 	 	 	 	 	 ddZd
d	 	 	 	 	 	 	 ddZy
)    )annotations)CallableHashableSequence)conv_sequences)is_none)EditopsOpcodes)_block_similarity)editops)opcodes)
similarityN)	processorscore_cutoffc                   | ||       }  ||      }t        | |      \  } }t        |       t        |      z   }t        | |      }|d|z  z
  }|||k  r|S |dz   S )a  
    Calculates the minimum number of insertions and deletions
    required to change one sequence into the other. This is equivalent to the
    Levenshtein distance with a substitution weight of 2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the distance is bigger than score_cutoff,
        score_cutoff + 1 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    distance : int
        distance between s1 and s2

    Examples
    --------
    Find the Indel distance between two strings:

    >>> from rapidfuzz.distance import Indel
    >>> Indel.distance("lewenstein", "levenshtein")
    3

    Setting a maximum distance allows the implementation to select
    a more efficient implementation:

    >>> Indel.distance("lewenstein", "levenshtein", score_cutoff=1)
    2

          )r   lenlcs_seq_similarity)s1s2r   r   maximumlcs_simdists          e/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/rapidfuzz/distance/Indel_py.pydistancer      sv    ^ r]r]B#FB"gBG R(GQ[ D (DL,@4W|VWGWW    c                v    t        |      t        |      z   }t        | ||      }|d|z  z
  }|||k  r|S |dz   S )Nr   r   )r   lcs_seq_block_similarity)blockr   r   r   r   r   r   s          r   _block_distancer!   K   sO     "gBG&ub"5GQ[ D (DL,@4W|VWGWWr   c                   | ||       }  ||      }t        | |      \  } }t        |       t        |      z   }t        | |      }||z
  }|||k\  r|S dS )a  
    Calculates the Indel similarity in the range [max, 0].

    This is calculated as ``(len1 + len2) - distance``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : int, optional
        Maximum distance between s1 and s2, that is
        considered as a result. If the similarity is smaller than score_cutoff,
        0 is returned instead. Default is None, which deactivates
        this behaviour.

    Returns
    -------
    similarity : int
        similarity between s1 and s2
    r   )r   r   r   )r   r   r   r   r   r   sims          r   r   r   W   sk    @ r]r]B#FB"gBGBD
D.C'3,+>3FQFr   c                   t        |       st        |      ry| ||       }  ||      }t        | |      \  } }t        |       t        |      z   }t        | |      }|r||z  nd}|||k  r|S dS )a8  
    Calculates a normalized levenshtein similarity in the range [1, 0].

    This is calculated as ``distance / (len1 + len2)``.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
        which deactivates this behaviour.

    Returns
    -------
    norm_dist : float
        normalized distance between s1 and s2 as a float between 0 and 1.0
          ?r   r   )r   r   r   r   )r   r   r   r   r   r   	norm_dists          r   normalized_distancer'      s    > r{gbkr]r]B#FB"gBGBD")wqI%-l1J9RQRRr   c                r    t        |      t        |      z   }t        | ||      }|r||z  nd}|||k  r|S dS )Nr   r   )r   r!   )r    r   r   r   r   r   r&   s          r   _block_normalized_distancer)      sI     "gBG5"b)D")wqI%-l1J9RQRRr   c                   t        |       st        |      ry| ||       }  ||      }t        | |      \  } }t        | |      }d|z
  }|||k\  r|S dS )a  
    Calculates a normalized indel similarity in the range [0, 1].

    This is calculated as ``1 - normalized_distance``

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.
    score_cutoff : float, optional
        Optional argument for a score threshold as a float between 0 and 1.0.
        For norm_sim < score_cutoff 0 is returned instead. Default is 0,
        which deactivates this behaviour.

    Returns
    -------
    norm_sim : float
        normalized similarity between s1 and s2 as a float between 0 and 1.0

    Examples
    --------
    Find the normalized Indel similarity between two strings:

    >>> from rapidfuzz.distance import Indel
    >>> Indel.normalized_similarity("lewenstein", "levenshtein")
    0.85714285714285

    Setting a score_cutoff allows the implementation to select
    a more efficient implementation:

    >>> Indel.normalized_similarity("lewenstein", "levenshtein", score_cutoff=0.9)
    0.0

    When a different processor is used s1 and s2 do not have to be strings

    >>> Indel.normalized_similarity(["lewenstein"], ["levenshtein"], processor=lambda s: s[0])
    0.8571428571428572
    g        r%   r   )r   r   r'   )r   r   r   r   r&   norm_sims         r   normalized_similarityr,      si    d r{gbkr]r]B#FB#B+IYH$,L0H8PqPr   c                <    t        | ||      }d|z
  }|||k\  r|S dS )Nr%   r   )r)   )r    r   r   r   r&   r+   s         r   _block_normalized_similarityr.      s2     +5"b9IYH$,L0H8PqPr   r   c                   t        | ||      S )ua  
    Return Editops describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    editops : Editops
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Indel
    >>> for tag, src_pos, dest_pos in Indel.editops("qabxcd", "abycdf"):
    ...    print(("%7s s1[%d] s2[%d]" % (tag, src_pos, dest_pos)))
     delete s1[0] s2[0]
     delete s1[3] s2[2]
     insert s1[4] s2[2]
     insert s1[6] s2[5]
    r/   )lcs_seq_editopsr   r   r   s      r   r   r     s    X 2rY77r   c                   t        | ||      S )u  
    Return Opcodes describing how to turn s1 into s2.

    Parameters
    ----------
    s1 : Sequence[Hashable]
        First string to compare.
    s2 : Sequence[Hashable]
        Second string to compare.
    processor: callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. Default is None, which deactivates this behaviour.

    Returns
    -------
    opcodes : Opcodes
        edit operations required to turn s1 into s2

    Notes
    -----
    The alignment is calculated using an algorithm of Heikki Hyyrö, which is
    described [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.

    References
    ----------
    .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
           Stringology (2004).

    Examples
    --------
    >>> from rapidfuzz.distance import Indel

    >>> a = "qabxcd"
    >>> b = "abycdf"
    >>> for tag, i1, i2, j1, j2 in Indel.opcodes(a, b):
    ...    print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
    ...           (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
     delete a[0:1] (q) b[0:0] ()
      equal a[1:3] (ab) b[0:2] (ab)
     delete a[3:4] (x) b[2:2] ()
     insert a[4:4] () b[2:3] (y)
      equal a[4:6] (cd) b[3:5] (cd)
     insert a[6:6] () b[5:6] (f)
    r/   )lcs_seq_opcodesr2   s      r   r   r   4  s    d 2rY77r   )
r   Sequence[Hashable]r   r5   r   (Callable[..., Sequence[Hashable]] | Noner   
int | Nonereturnint)N)
r    dict[Hashable, int]r   r5   r   r5   r   r7   r8   r9   )
r   r5   r   r5   r   r6   r   float | Noner8   float)
r    r:   r   r5   r   r5   r   r;   r8   r<   )r   r5   r   r5   r   r6   r8   r	   )r   r5   r   r5   r   r6   r8   r
   )
__future__r   typingr   r   r   rapidfuzz._common_pyr   rapidfuzz._utilsr   !rapidfuzz.distance._initialize_pyr	   r
   rapidfuzz.distance.LCSseq_pyr   r   r   r1   r   r4   r   r   r   r!   r'   r)   r,   r.    r   r   <module>rD      sD   # / / / $ > V C C I ;?#7X7X7X 8	7X
 7X 	7X|  $		X	X	X 		X 		X
 		X  ;?#(G(G(G 8	(G
 (G 	(G^ ;?!%*S*S*S 8	*S
 *S *Sb "&		S	S	S 		S 		S
 	S  ;?!%<Q<Q<Q 8	<Q
 <Q <QF "&	QQQ 	Q 	Q
 Q ;?	,8,8,8 8	,8
 ,8f ;?	282828 8	28
 28r   