
    ǔ9i"/                     V   d Z ddlZddlZddlZddlmZmZ ddlZdee   dee   de	fdZ
dee   d	ee   fd
Zdee   d	ee   de	e	ffdZdededede	de	de	fdZ	 	 	 ddee   dede	de	deegee   f   dee	   fdZ	 	 	 ddeee      dee   de	de	deegee   f   deee	      fdZy)zLEPOR score implementation.    N)CallableList	reference
hypothesisreturnc                     t        |       }t        |      }||k(  ry||k  rt        j                  d||z  z
        S t        j                  d||z  z
        S )a  
    This function calculates the length penalty(LP) for the LEPOR metric, which is defined to embrace the penaltyvfor
    both longer and shorter hypothesis compared with the reference translations.
    Refer from Eq (2) on https://aclanthology.org/C12-2044

    :param reference: Reference sentence
    :type reference: str
    :param hypothesis: Hypothesis sentence
    :type hypothesis: str

    :return: Penalty of difference in length in reference and hypothesis sentence.
    :rtype: float
       )lenmathexp)r   r   ref_lenhyp_lens       ^/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/nltk/translate/lepor.pylength_penaltyr      s[     )nG*oG'	7	xxWw./00xxWw./00    
ref_tokens
hyp_tokensc                    g }t        |      }t        |       }t        |      D ]  \  }}| j                  |      dk(  r|j                  d       -| j                  |      dk(  r!|j                  | j	                  |             bt        |       D cg c]  \  }}||k(  s| }	}}g }
t        |	      D ]|  \  }}d|dz
  cxk  r|k  r+n n(d|dz
  cxk  r|k  rn n| |dz
     ||dz
     k(  rd|
|<   ?d|dz   cxk  r|k  r+n n(d|dz   cxk  r|k  rn n| |dz      ||dz      k(  rd|
|<   xd|
|<   ~ |
j                  d      dk(  r%|j                  |	|
j	                  d                J|
j                  d      dkD  rFd}d}t        |
|	      D ]   \  }}|s	t        ||z
        }||kD  s|}|}" |j                  |       d}d}|	D ]  }t        ||z
        }||kD  s|}|} |j                  |       |	D ]  }t        ||z
        }||kD  s|}|} |j                  |       
 |D cg c]  }|dk7  s	|dz    }}|S c c}}w c c}w )a  
    This function computes the context-dependent n-gram word alignment tasks that
    takes into account the surrounding context (neighbouring words) of the potential
    word to select a better matching pairs between the output and the reference.

    This alignment task is used to compute the ngram positional difference penalty
    component of the LEPOR score. Generally, the function finds the matching tokens
    between the reference and hypothesis, then find the indices of longest matching
    n-grams by checking the left and right unigram window of the matching tokens.

    :param ref_tokens: A list of tokens in reference sentence.
    :type ref_tokens: List[str]
    :param hyp_tokens: A list of tokens in hypothesis sentence.
    :type hyp_tokens: List[str]
    r   r	   TF)r
   	enumeratecountappendindexzipabs)r   r   
alignmentsr   r   	hyp_index	hyp_tokeni	ref_tokenref_indexes
is_matchedind	ref_indexmin_distance	min_indexmatchdistanceas                     r   	alignmentr*   ,   s     J *oG*oG )* 5 G-	9I&!+b!i(A-j..y9:
 '0
&;"ayI?UK  J"+K"8 ,Y 	A//IM3G3"9q=1Z	A5NN&*JsO 	A//IM3G3"9q=1Z	A5NN&*JsO ',JsO#,* %*!!+j.>.>t.D"EF !!$'!+ 	(+J(D 2$E9#&y9'<#=#l2+3L(1I2 !!),
  !	!, .I"9y#89H,.'/$-		.
 !!),!, .I"9y#89H,.'/$-		.
 !!),OG-T ",7AqBw!a%7J7C@ 8s   I	"I	3
I>Ic           
      (   t        | |      }t        |      }g }t        |      D ]=  \  }}|j                  t	        |dz   t        |      z  |t        |       z  z
               ? t        |      t        |      z  }t        j                  |       |fS )aS  
    This function calculates the n-gram position difference penalty (NPosPenal) described in the LEPOR paper.
    The NPosPenal is an exponential of the length normalized n-gram matches between the reference and the hypothesis.

    :param ref_tokens: A list of words in reference sentence.
    :type ref_tokens: List[str]
    :param hyp_tokens: A list of words in hypothesis sentence.
    :type hyp_tokens: List[str]

    :return: A tuple containing two elements:
             - NPosPenal: N-gram positional penalty.
             - match_count: Count of matched n-grams.
    :rtype: tuple
    r	   )r*   r
   r   r   r   sumr   r   )r   r   r   match_countpdr   r)   npds           r   ngram_positional_penaltyr0      s    $ :z2Jj/K
 
B*% H1
		#q1uJ/!c*o2EEFGH b'C
O
#C88SD>;&&r   r-   reference_lengthhypothesis_lengthalphabetac                 |    t         j                  j                  }| |z  }| |z  }||z   |||z   z  |||z   z  z   z  }|S )a  
    Function will calculate the precision and recall of matched words and calculate a final score on wighting
    using alpha and beta parameters.

    :param match_count: Number of words in hypothesis aligned with reference.
    :type match_count: int
    :param reference_length: Length of the reference sentence
    :type reference_length: int
    :param hypothesis_length: Length of the hypothesis sentence
    :type hypothesis_length: int
    :param alpha: A parameter to set weight fot recall.
    :type alpha: float
    :param beta: A parameter to set weight fot precision.
    :type beta: float

    :return: Harmonic mean.
    :rtype: float
    )sys
float_infoepsilon)	r-   r1   r2   r3   r4   r8   	precisionrecallharmonic_scores	            r   harmonicr<      s[    4 nn$$G//I++Fdl	&7"	#	G0C(DEN r   
references	tokenizerc                    t               }|r' ||      }t        |       D ]  \  }} ||      | |<    n@t        j                  |      }t        |       D ]  \  }}t        j                  |      | |<    | D ]|  }t	        |      dk(  st	        |      dk(  rt        d      t        ||      }t        ||      \  }	}
t        |
t	        |      t	        |      ||      }|j                  ||	z  |z         ~ |S )a  
    Calculate LEPOR score a sentence from Han, A. L.-F. (2017).
    LEPOR: An Augmented Machine Translation Evaluation Metric. https://arxiv.org/abs/1703.08748v2

    >>> hypothesis = 'a bird is on a stone.'

    >>> reference1 = 'a bird behind the stone.'
    >>> reference2 = 'a bird is on the rock.'

    >>> sentence_lepor([reference1, reference2], hypothesis)
    [0.7824248013113159, 0.7739937377760259]

    :param references: Reference sentences
    :type references: list(str)
    :param hypothesis: Hypothesis sentence
    :type hypothesis: str
    :param alpha: A parameter to set weight fot recall.
    :type alpha: float
    :param beta: A parameter to set weight fot precision.
    :type beta: float
    :param tokenizer: A callable tokenizer that will accept a string and returns a list of tokens.
    :type tokenizer: Callable[[str], List[str]]

    :return: The list of Lepor scores for a hypothesis with all references.
    :rtype: list(float)

    r   z#One of the sentence is empty. Exit.)
listr   nltkword_tokenizer
   
ValueErrorr   r0   r<   r   )r=   r   r3   r4   r>   lepor_scoresr   r   lpr/   r-   r;   s               r   sentence_leporrF      s   F 6L z*
 )* 5 	5E9 )) 4Ju	5 ''
3
 )* 5 	>E9 $ 2 29 =Ju	>   7	y>Q#j/Q"6BCC Iz2 4IzJ[!YZ%
 	BH~567  r   c           
         t        |       dk(  st        |      dk(  rt        d      t        |       t        |      k(  sJ d       t               }t        | |      D ]#  \  }}|j	                  t        |||||             % |S )aK  
    Calculate LEPOR score for list of sentences from Han, A. L.-F. (2017).
    LEPOR: An Augmented Machine Translation Evaluation Metric. https://arxiv.org/abs/1703.08748v2

    >>> hypothesis = ['a bird is on a stone.', 'scary crow was not bad.']

    >>> references = [['a bird behind the stone.', 'a bird is on the rock'],
    ...              ['scary cow was good.', 'scary crow was elegant.']]

    >>> corpus_lepor(references, hypothesis)
    [[0.7824248013113159, 0.7931427828105261], [0.5639427891892225, 0.7860963170056643]]


    :param references: Reference sentences
    :type references: list(list(str))
    :param hypothesis: Hypothesis sentences
    :type hypothesis: list(str)
    :param alpha: A parameter to set weight fot recall.
    :type alpha: float
    :param beta: A parameter to set weight fot precision.
    :type beta: float
    :param tokenizer: A callable tokenizer that will accept a string and returns a list of tokens.
    :type tokenizer: Callable[[str], List[str]]

    :return: The Lepor score. Returns a list for all sentences
    :rtype: list(list(float))

    r   zThere is an Empty list. Exit.zCThe number of hypothesis and their reference(s) should be the same )r
   rC   r@   r   r   rF   )r=   r   r3   r4   r>   rD   reference_senhypothesis_sens           r   corpus_leporrJ     s    H :!s:!3899z?c*o- P- 6L),Z)D 
%~=.%yQ	

 r   )      ?rK   N)__doc__r   rer6   typingr   r   rA   strfloatr   r*   r0   intr<   rF   rJ    r   r   <module>rS      s   "  	 
 ! 1d3i 1T#Y 15 14a$s) ac aH'S	''+Cy'U^'@### # 	#
 # #R ,0@S	@@ @ 	@
 tCy()@ 
%[@L ,03T#Y3S	3 3 	3
 tCy()3 
$u+3r   