
    ;i                       S r SSKJr  SSKrSSKrSSKrSSKJr  SSKJ	r	J
r
  SSKJrJrJrJrJrJr  SSKJr  SSKJr  SS	KJr   SS
KJrJrJrJr  SSKrSSK J!r!  SSK"J#r#J$r$J%r%J&r&  \RN                  " \(5      r) " S S\5      r* " S S\SS9r+ " S S\5      r, " S S\SS9r- " S S5      r.\\,\-\/4   r0 " S S\5      r1\\1\/4   r2 " S S\.5      r3  S'S jr4Sr5S(S jr6 " S  S!5      r7    S)S" jr8    S*S# jr9    S+S$ jr:    S,S% jr;\\\
\RL                     \
\RJ                     /\\,\-4   4   \\<\RL                     \<\RJ                     /\\,\-4   4   4   r=S-S& jr>g! \ a    SS
KJrJrJrJr   GN+f = f).z?This module contains the evaluator classes for evaluating runs.    )annotationsN)abstractmethod)	AwaitableSequence)AnyCallableLiteralOptionalUnioncast)	TypedDictrun_helpers)schemas)	BaseModelFieldValidationError	validator)wraps)
SCORE_TYPE
VALUE_TYPEExampleRunc                  0    \ rS rSr% SrS\S'    S\S'   Srg)	Category/   z$A category for categorical feedback.Optional[Union[float, int]]valuestrlabel N__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__r!       n/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/langsmith/evaluation/evaluator.pyr   r   /   s    .&&CJ&r*   r   c                  H    \ rS rSr% SrS\S'    S\S'    S\S'    S\S	'   S
rg)FeedbackConfig8   zaConfiguration to define a type of feedback.

Applied on on the first creation of a feedback_key.
z0Literal['continuous', 'categorical', 'freeform']typer   minmaxz%Optional[list[Union[Category, dict]]]
categoriesr!   Nr"   r!   r*   r+   r-   r-   8   s*    
 ;:	$$;	$$A55r*   r-   F)totalc                      \ rS rSr% SrS\S'    SrS\S'    SrS\S	'    SrS
\S'    Sr	S\S'    \
" \S9rS\S'    SrS\S'    SrS\S'    SrS\S'    SrS\S'     " S S5      r\" S	SS9S 5       rSrg)EvaluationResultG   zEvaluation result.r   keyNr   scorer   r   zOptional[str]commentzOptional[dict]
correction)default_factorydictevaluator_infoz%Optional[Union[FeedbackConfig, dict]]feedback_configOptional[Union[uuid.UUID, str]]source_run_idtarget_run_idextrac                      \ rS rSrSrSrSrg)EvaluationResult.Configb   zPydantic model configuration.Fr!   N)r#   r$   r%   r&   r'   allow_extrar)   r!   r*   r+   ConfigrD   b   s
    +r*   rG   T)prec                    SU;  d  US   c3  [        U[        [        45      (       a  [        R	                  SU 35        U$ )z$Check that the value is not numeric.r8   zJNumeric values should be provided in the 'score' field, not 'value'. Got: )
isinstanceintfloatloggerwarning)clsvvaluess      r+   check_value_non_numeric(EvaluationResult.check_value_non_numericg   sH    
 & F7O$;!c5\**C!
 r*   r!   )r#   r$   r%   r&   r'   r(   r8   r   r9   r:   r   r<   r=   r>   r@   rA   rB   rG   r   rR   r)   r!   r*   r+   r5   r5   G   s    	H@E:0E:8!G]!2!%J%: 6ND65=AO:A;59M29659M29 !E> ) 
 wD! "r*   r5   c                  $    \ rS rSr% SrS\S'   Srg)EvaluationResultsv   zeBatch evaluation results.

This makes it easy for your evaluator to return multiple
metrics at once.
zlist[EvaluationResult]resultsr!   Nr"   r!   r*   r+   rU   rU   v   s     $#!r*   rU   c                  b    \ rS rSrSr\  S       SS jj5       r  S       SS jjrSrg)	RunEvaluator   zEvaluator interface class.Nc                    g)zEvaluate an example.Nr!   )selfrunexampleevaluator_run_ids       r+   evaluate_runRunEvaluator.evaluate_run   s    r*   c                   ^ ^^^^#    [         R                  " 5       mUUUUU 4S jn[        R                  " 5       R	                  SU5      I Sh  vN $  N7f)z#Evaluate an example asynchronously.c                    > [         R                  " S0 T D6   TR                  TTT5      sS S S 5        $ ! , (       d  f       g = f)Nr!   )rhtracing_contextr`   )current_contextr_   r^   r]   r\   s   r+   _run_with_context5RunEvaluator.aevaluate_run.<locals>._run_with_context   s3    ##6o6((g7GH 766s	   5
AN)rd   get_tracing_contextasyncioget_running_looprun_in_executor)r\   r]   r^   r_   rg   rf   s   ```` @r+   aevaluate_runRunEvaluator.aevaluate_run   sG      002	I 	I --/??FWXXXXs   AAAAr!   NNr]   r   r^   Optional[Example]r_   Optional[uuid.UUID]return*Union[EvaluationResult, EvaluationResults])	r#   r$   r%   r&   r'   r   r`   rm   r)   r!   r*   r+   rY   rY      s    $ &*04	## ## .	#
 
4# # &*04	YY #Y .	Y
 
4Y Yr*   rY   c                  P    \ rS rSr% SrS\S'    S\S'    SrS\S	'    SrS
\S'   Srg)ComparisonEvaluationResult   zFeedback scores for the results of comparative evaluations.

These are generated by functions that compare two or more runs,
returning a ranking or other feedback.
r   r7   z'dict[Union[uuid.UUID, str], SCORE_TYPE]scoresNr?   r@   z6Optional[Union[str, dict[Union[uuid.UUID, str], str]]]r9   r!   )	r#   r$   r%   r&   r'   r(   r@   r9   r)   r!   r*   r+   rv   rv      s8     
H@33459M296FJGCJ:r*   rv   c                     ^  \ rS rSrSr S   SS jjr S       SS jjr      SS jr      SS jr\	SS j5       r
  S       SS jjr  S     SU 4S	 jjjr S     SS
 jjrSS jrSrU =r$ )DynamicRunEvaluator   a  A dynamic evaluator that wraps a function and transforms it into a `RunEvaluator`.

This class is designed to be used with the `@run_evaluator` decorator, allowing
functions that take a `Run` and an optional `Example` as arguments, and return
an `EvaluationResult` or `EvaluationResults`, to be used as instances of `RunEvaluator`.

Attributes:
    func (Callable): The function that is wrapped by this evaluator.
c                  ^ [        U5      u  nmU(       a  [        U5      u  nmS	U4S jjn[        U5      " U 5        SSKJn  Ub&  UR	                  X#S9U l        [        USS5      U l        [        R                  " U5      (       a5  Ub  [        S5      eUR	                  XS9U l        [        USS5      U l        gUR	                  [        [        [        [        [           /[         4   U5      US9U l        [        USS5      U l        g)
zInitialize the DynamicRunEvaluator with a given function.

Args:
    func (Callable): A function that takes a `Run` and an optional `Example` as
    arguments, and returns a dict or `ComparisonEvaluationResult`.
c                f   > Tc  U $ T" U R                  S5      U R                  S5      5      u    pU$ )Nr]   r^   getinputs_traced_inputsprepare_inputss      r+   process_inputs4DynamicRunEvaluator.__init__.<locals>.process_inputs   s<    %$2

5!6::i#8%!Q ! r*   r   r   Nr   r#   rz   Func was provided as a coroutine function, but afunc was also provided. If providing both, func should be a regular function to avoid ambiguity.r   r<   rs   r<   )_normalize_evaluator_funcr   	langsmithr   ensure_traceableafuncgetattr_nameinspectiscoroutinefunction	TypeErrorr   r   r   r
   r   _RUNNABLE_OUTPUTfuncr\   r   r   r   r   r   s        @r+   __init__DynamicRunEvaluator.__init__   s   ( ";4!@~&?&F#UN	! 	dD)$55 6 DJ !
4IJDJ&&t,, 3 
 %55 6 DJ !z3HIDJ#44XsHW$568HHI4P- 5 DI !z3HIDJr*   c                v  ^ [        T[        5      (       a  TR                  (       d  UTl        T$  T(       d  [        ST 35      eST;  a  U(       a  U R                  TS'   [        U4S jS 5       5      (       a  [        ST 35      e[        S0 SU0TED6$ ! [         a  n[        ST 35      UeS nAff = f)	NziExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got empty result: r7   c              3  ,   >#    U  H	  oT;  v   M     g 7fNr!   ).0kresults     r+   	<genexpr>@DynamicRunEvaluator._coerce_evaluation_result.<locals>.<genexpr>  s     J,IqF?,I   )r8   r   r9   zrExpected an EvaluationResult object, or dict with a metric 'key' and optional 'score' or categorical 'value'; got r@   z[Expected an EvaluationResult object, or dict with a metric 'key' and optional 'score'; got r!   )rJ   r5   r@   
ValueErrorr   allr   )r\   r   r@   allow_no_keyes    `   r+   _coerce_evaluation_result-DynamicRunEvaluator._coerce_evaluation_result   s     f.//'''4$M	 FFLXO  F"| $

uJ,IJJJ OOUhX  $Q&P&PQQ 	44:8= 	s   A'B 
B8$B33B8c                    SU;   aA  UR                  5       nUS    Vs/ s H  nU R                  XBS9PM     snUS'   [        S0 UD6$ U R                  [        [        U5      USS9$ s  snf )NrW   )r@   T)r@   r   r!   )copyr   rU   r   r<   )r\   rW   r@   cprs        r+   _coerce_evaluation_results.DynamicRunEvaluator._coerce_evaluation_results  s    
 B !++A ..q.N+ByM %*r**--w}4 . 
 	
s   A'c                    [        U[        5      (       a  UR                  (       d  X!l        U$ [        U5      nU R	                  X5      $ r   )rJ   r5   r@   _format_evaluator_resultr   )r\   r   r@   s      r+   _format_result"DynamicRunEvaluator._format_result)  sA     f.//'''4$M)&1..vEEr*   c                    [        U S5      $ zCheck if the evaluator function is asynchronous.

Returns:
    bool: True if the evaluator function is asynchronous, False otherwise.
r   hasattrr\   s    r+   is_asyncDynamicRunEvaluator.is_async7       tW%%r*   c                   [        U S5      (       dU  [        R                  " 5       nUR                  5       (       a  [	        S5      eUR                  U R                  X5      5      $ Uc  [        R                  " 5       nSUR                  0n[        USS5      (       a  [        UR                  5      US'   U R                  UUX5S.S9nU R                  Xc5      $ )	aU  Evaluate a run using the wrapped function.

This method directly invokes the wrapped function with the provided arguments.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
r   tCannot call `evaluate_run` on an async run evaluator from within an running event loop. Use `aevaluate_run` instead.NrA   
session_id
experimentrun_idmetadatalangsmith_extra)r   rj   get_event_loop
is_runningRuntimeErrorrun_until_completerm   uuiduuid4idr   r   r   r   r   )r\   r]   r^   r_   running_loopr   r   s          r+   r`    DynamicRunEvaluator.evaluate_run@  s    " tV$$"113L&&(("R 
 $66t7I7I#7WXX##zz|$3SVV#<3d++%(%8H\"'7N  

 ""6<<r*   c                \  >#    [        U S5      (       d  [        TU ]	  X5      I Sh  vN $ Uc  [        R                  " 5       nSUR
                  0n[        USS5      (       a  [        UR                  5      US'   U R                  UUX4S.S9I Sh  vN nU R                  XS5      $  N N7f)a|  Evaluate a run asynchronously using the wrapped async function.

This method directly invokes the wrapped async function with the
    provided arguments.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used
        in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
r   NrA   r   r   r   r   )r   superrm   r   r   r   r   r   r   r   r   )r\   r]   r^   r_   r   r   	__class__s         r+   rm   !DynamicRunEvaluator.aevaluate_runf  s     & tW%%.s<<<##zz|$3SVV#<3d++%(%8H\"zz'7N " 
 

 ""6<< =
s"   $B,B(A*B,B*B,*B,c                $    U R                  X5      $ )a  Make the evaluator callable, allowing it to be used like a function.

This method enables the evaluator instance to be called directly, forwarding the
call to `evaluate_run`.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    Union[EvaluationResult, EvaluationResults]: The result of the evaluation.
)r`   )r\   r]   r^   s      r+   __call__DynamicRunEvaluator.__call__  s       ..r*   c                "    SU R                    S3$ ))Represent the DynamicRunEvaluator object.z<DynamicRunEvaluator >r   r   s    r+   __repr__DynamicRunEvaluator.__repr__  s    &tzzl!44r*   r   r   r   r   )r   XCallable[[Run, Optional[Example]], Union[_RUNNABLE_OUTPUT, Awaitable[_RUNNABLE_OUTPUT]]]r   zIOptional[Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]])F)r   zUnion[EvaluationResult, dict]r@   	uuid.UUIDr   boolrs   r5   )rW   zUnion[dict, EvaluationResults]r@   r   rs   rt   )r   zMUnion[EvaluationResult, EvaluationResults, dict, str, int, bool, float, list]r@   r   rs   rt   rs   r   ro   rp   )r]   r   r^   rq   r_   rr   )r]   r   r^   rq   rs   rt   rs   r   )r#   r$   r%   r&   r'   r   r   r   r   propertyr   r`   rm   r   r   r)   __classcell__)r   s   @r+   rz   rz      sE   , 8J
8J
8J| #	- ! 	
 
<
/
 !
 
4	
"F
F
 !F 
4F & & &*04	$=$= #$= .	$=
 
4$=R &*04	== #= .	= =D 6://!2/	3/"5 5r*   rz   c                    [        U 5      $ )zeCreate a run evaluator from a function.

Decorator that transforms a function into a `RunEvaluator`.
)rz   r   s    r+   run_evaluatorr     s     t$$r*   i'  c                `    [        U 5      n[        U5      [        :  a  US [        S-
   S-   nU$ )N   z...))reprlen_MAXSIZE)objss     r+   _maxsize_reprr     s1    S	A
1vn1&Hr*   c                      \ rS rSrSr S   SS jjr\SS j5       r S     SS jjr S     SS jjr	 S     SS jjr
SS	 jr\SS
 j5       r        SS jrSrg)DynamicComparisonRunEvaluatori  z4Compare predictions (as traces) from 2 or more runs.Nc                (  ^ [        U5      u  nmU(       a  [        U5      u  nmS	U4S jjn[        U5      " U 5        SSKJn  Ub&  UR	                  X#S9U l        [        USS5      U l        [        R                  " U5      (       a5  Ub  [        S5      eUR	                  XS9U l        [        USS5      U l        gUR	                  [        [        [        [           [        [            /["        4   U5      US9U l        [        USS5      U l        g)
zInitialize the DynamicRunEvaluator with a given function.

Args:
    func (Callable): A function that takes a `Run` and an optional `Example` as
    arguments, and returns an `EvaluationResult` or `EvaluationResults`.
c                f   > Tc  U $ T" U R                  S5      U R                  S5      5      u    pU$ )Nrunsr^   r~   r   s      r+   r   >DynamicComparisonRunEvaluator.__init__.<locals>.process_inputs  s<    %$2

6"FJJy$9%!Q ! r*   r   r   Nr   r#   rz   r   r   )$_normalize_comparison_evaluator_funcr   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   _COMPARISON_OUTPUTr   r   s        @r+   r   &DynamicComparisonRunEvaluator.__init__  s%   ( "Fd!K~&J5&Q#UN	! 	dD)$55 6 DJ !
4IJDJ&&t,, 3 
 %55 6 DJ !z3HIDJ#44!#(9:*,   . 5 	DI !z3HIDJr*   c                    [        U S5      $ r   r   r   s    r+   r   &DynamicComparisonRunEvaluator.is_async  r   r*   c                d   [        U S5      (       dU  [        R                  " 5       nUR                  5       (       a  [	        S5      eUR                  U R                  X5      5      $ [        R                  " 5       nU R                  U5      nU R                  UUXES.S9nU R                  XdU5      $ )zCompare runs to score preferences.

Args:
    runs: A list of runs to compare.
    example: An optional example to be used in the evaluation.

r   r   r   tagsr   )r   rj   r   r   r   r   acompare_runsr   r   	_get_tagsr   _format_results)r\   r   r^   r   r@   r   r   s          r+   compare_runs*DynamicComparisonRunEvaluator.compare_runs  s     tV$$"113L&&(("R 
 $66&&t5  

~~d#'4C  

 ##F4@@r*   c                   #    [        U S5      (       d  U R                  X5      $ [        R                  " 5       nU R	                  U5      nU R                  UUX4S.S9I Sh  vN nU R                  XSU5      $  N7f)an  Evaluate a run asynchronously using the wrapped async function.

This method directly invokes the wrapped async function with the
    provided arguments.

Args:
    runs (Run): The runs to be evaluated.
    example (Optional[Example]): An optional example to be used
        in the evaluation.

Returns:
    ComparisonEvaluationResult: The result of the evaluation.
r   r   r   N)r   r   r   r   r   r   r   )r\   r   r^   r@   r   r   s         r+   r   +DynamicComparisonRunEvaluator.acompare_runs  s|       tW%%$$T33

~~d#zz'4C " 
 

 ##F4@@
s   AA:!A8"A:c                $    U R                  X5      $ )ay  Make the evaluator callable, allowing it to be used like a function.

This method enables the evaluator instance to be called directly, forwarding the
call to `evaluate_run`.

Args:
    run (Run): The run to be evaluated.
    example (Optional[Example]): An optional example to be used in the evaluation.

Returns:
    ComparisonEvaluationResult: The result of the evaluation.
)r   )r\   r   r^   s      r+   r   &DynamicComparisonRunEvaluator.__call__:  s       //r*   c                "    SU R                    S3$ )r   z<DynamicComparisonRunEvaluator r   r   r   s    r+   r   &DynamicComparisonRunEvaluator.__repr__K  s    0A>>r*   c                    / nU  He  nUR                  S[        UR                  5      -   5        [        USS5      (       d  M>  UR                  S[        UR                  5      -   5        Mg     U$ )zExtract tags from runs.zrun:r   Nzexperiment:)appendr   r   r   r   )r   r   r]   s      r+   r   'DynamicComparisonRunEvaluator._get_tagsO  s\     CKKSVV,-sL$//MC,??@  r*   c                   [        U[        5      (       a  UR                  (       d  X!l        U$ [        U[        5      (       a:  [	        X15       VVs0 s H  u  pEUR
                  U_M     snnU R                  US.nO<[        U[        5      (       a  SU;  a  U R                  US'   OSU< 3n[        U5      e [        S0 SU0UED6$ s  snnf ! [         a  n[        SU 35      UeS nAff = f)N)rx   r7   r@   r7   zXExpected 'dict', 'list' or 'ComparisonEvaluationResult' result object. Received: result=r@   zExpected a dictionary with a 'key' and dictionary of scores mappingrun IDs to numeric scores, or ComparisonEvaluationResult object, got r!   )
rJ   rv   r@   listzipr   r   r<   r   r   )r\   r   r@   r   r]   r8   msgr   s           r+   r   -DynamicComparisonRunEvaluator._format_resultsZ  s	    f899'''4$M%%;>t;LM;LZS3665=;LMzz!.F
 %%F" $

u-%+I/  S/!		- "M<V<  N"  	x! 		s   C
;C 
C.C))C.r   r   )r   fCallable[[Sequence[Run], Optional[Example]], Union[_COMPARISON_OUTPUT, Awaitable[_COMPARISON_OUTPUT]]]r   zUOptional[Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]]r   )r   Sequence[Run]r^   rq   rs   rv   r   )r   r  rs   z	list[str])r   z-Union[dict, list, ComparisonEvaluationResult]r@   r   r   r  rs   rv   )r#   r$   r%   r&   r'   r   r   r   r   r   r   r   staticmethodr   r   r)   r!   r*   r+   r   r     s    > >J
>J
>J@ & & AEA!A,=A	#A@ AEA!A,=A	#A: AE0!0,=0	#0"?  "=" !" 	"
 
$"r*   r   c                    [        U 5      $ )z.Create a comaprison evaluator from a function.)r   r   s    r+   comparison_evaluatorr    s     )..r*   c                
  ^ ^^	^
^ Sm[         R                  " T 5      m
T
R                  R                  5        VVs/ s H#  u  pUR                  UR
                  :w  d  M!  UPM%     nnnT
R                  R                  5        VVs/ s H0  u  pUR                  [         R                  R                  Ld  M.  UPM2     snnm	U(       aB  [        U	U4S jU 5       5      (       d8  [        U Vs/ s H  oDT	;  d  M
  UPM     sn5      S:w  a  ST S3n[        U5      e[        U	U4S jU 5       5      (       a  USS/:X  a  T S 4$ [         R                  " T 5      (       aO        SU
4S	 jjm      SUU 4S
 jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$       SU
4S jjmSUU 4S jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$ s  snnf s  snnf s  snf )N)r]   r^   r   outputsreference_outputsattachmentsc              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r!   r   pnameargs_with_defaultssupported_argss     r+   r   ,_normalize_evaluator_func.<locals>.<genexpr>  &      
PXu^#Bu0B'BBPX       UInvalid evaluator function. Must have at least one argument. Supported arguments are . Please see https://docs.smith.langchain.com/evaluation/how_to_guides/evaluation/evaluate_llm_application#use-custom-evaluatorsc              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r!   r  s     r+   r   r    %      LT5>5,>#>>Hr  r]   r^   c                  > U UU(       a  UR                   O0 U R                  =(       d    0 U(       a  UR                  =(       d    0 O0 U(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nTR                  R	                  5        He  u  pgXb;   d  M  UR
                  UR                  UR                  4;   a  UR                  X&   5        OX&   X6'   US;   a  [        X&   5      OX&   XV'   Mg     XCU4$ N)r]   r^   r   r  r  r  )r]   r^   
r   r  r  
parametersitemskindPOSITIONAL_OR_KEYWORDPOSITIONAL_ONLYr  r   	r]   r^   arg_mapkwargsargsr   
param_nameparamsigs	           r+   _prepare_inputs2_normalize_evaluator_func.<locals>._prepare_inputs       &07gnnR"{{0b@G7#6#6#<"RBI)>Br  "),)=)=)?%J!, ::!77!11*  !KK(;<181DF.  *-?? *'*=>!(!4 &1 *@ ]22r*   c                F   >#    T" X5      u  p#nT" U0 UD6I S h  vN $  N7fr   r!   r]   r^   r-  r,  r   r1  r   s        r+   awrapper+_normalize_evaluator_func.<locals>.awrapper  s-      %4C$A!q!4262222   !!r#   c                  > U UU(       a  UR                   O0 U R                  =(       d    0 U(       a  UR                  =(       d    0 O0 U(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nTR                  R	                  5        He  u  pgXb;   d  M  UR
                  UR                  UR                  4;   a  UR                  X&   5        OX&   X6'   US;   a  [        X&   5      OX&   XV'   Mg     XCU4$ r#  r$  r*  s	           r+   r1  r2    r3  r*   c                *   > T" X5      u  p#nT" U0 UD6$ r   r!   r5  s        r+   wrapper*_normalize_evaluator_func.<locals>.wrapper  s"    $3C$A!qT,V,,r*   )r]   r   r^   rq   rs   tuple[list, dict, dict])r]   r   r^   rq   rs   r   r   	signaturer%  r&  r'  VAR_KEYWORDdefault	Parameteremptyr   r   r   r   r   r   r#   r   r  pall_argsar
  r6  r;  r1  r  r0  r  s   `       @@@@r+   r   r     s9   N 

D
!C&)nn&:&:&<X&<(%!--@W&<HX ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@ AFG 	 o  LT  	 
 Tz&&t,,33#43(3>33#43!3 3 4,, j)&& 
 o..33#43(3>- - 4,, j)%% 
 _--k Y Es#    G4G4-G:6G:+	H 8H c                  ^ ^^	^
^ Sm[         R                  " T 5      m
T
R                  R                  5        VVs/ s H#  u  pUR                  UR
                  :w  d  M!  UPM%     nnnT
R                  R                  5        VVs/ s H0  u  pUR                  [         R                  R                  Ld  M.  UPM2     snnm	U(       aB  [        U	U4S jU 5       5      (       d8  [        U Vs/ s H  oDT	;  d  M
  UPM     sn5      S:w  a  ST S3n[        U5      e[        U	U4S jU 5       5      (       a  USS/:X  a  T S 4$ [         R                  " T 5      (       aO        SU
4S	 jjm      SUU 4S
 jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$       SU
4S jjm      SUU 4S jjn[        T S5      (       a  [        T S5      OUR                  Ul        UT4$ s  snnf s  snnf s  snf )Nr   r^   r   r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r!   r  s     r+   r   7_normalize_comparison_evaluator_func.<locals>.<genexpr>%  r  r  r  r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r!   r  s     r+   r   rK  3  r!  r  r   r^   c                  > U UU(       a  UR                   O0 U  Vs/ s H  o"R                  =(       d    0 PM     snU(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nT	R                  R                  5        He  u  pxXs;   d  M  UR                  UR
                  UR                  4;   a  UR                  X7   5        OX7   XG'   US;   a  [        X7   5      OX7   Xg'   Mg     XTU4$ s  snf NrI  )r   r^   	r   r  r%  r&  r'  r(  r)  r  r   
r   r^   r]   r+  r,  r-  r   r.  r/  r0  s
            r+   r1  =_normalize_comparison_evaluator_func.<locals>._prepare_inputs=       !&07gnnR=ABTc 1r 1TBBI)>Br  "),)=)=)?%J!, ::!77!11*  !KK(;<181DF.  *-@@ *'*=>!(!4 &1 *@ ]22+  C   C)
c                F   >#    T" X5      u  p#nT" U0 UD6I S h  vN $  N7fr   r!   r   r^   r-  r,  r   r1  r   s        r+   r6  6_normalize_comparison_evaluator_func.<locals>.awrapper[  s-      %4D$B!q!4262222r8  r#   c                  > U UU(       a  UR                   O0 U  Vs/ s H  o"R                  =(       d    0 PM     snU(       a  UR                  =(       d    0 O0 S.n0 n/ n0 nT	R                  R                  5        He  u  pxXs;   d  M  UR                  UR
                  UR                  4;   a  UR                  X7   5        OX7   XG'   US;   a  [        X7   5      OX7   Xg'   Mg     XTU4$ s  snf rN  rO  rP  s
            r+   r1  rQ  j  rR  rS  c                *   > T" X5      u  p#nT" U0 UD6$ r   r!   rU  s        r+   r;  5_normalize_comparison_evaluator_func.<locals>.wrapper  s$     %4D$B!qT,V,,r*   )r   r  r^   rq   rs   r=  )r   r  r^   rq   rs   r   r>  rD  s   `       @@@@r+   r   r     sP    SN


D
!C&)nn&:&:&<X&<(%!--@W&<HX ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@ AFG 	 o  LT  	 
 Tz&&t,,3#3.?3(3<3#3.?3#3 3 4,, j)&& 
 _,,3#3.?3(3<-#-.?-#- - 4,, j)%% 
 O++k Y Es#    G:G:-H 6H +	H8Hc                |   [        U [        [        [        45      (       a  SU 0n U $ U (       d  [	        SU  35      e[        U [
        5      (       a,  [        S U  5       5      (       d  [	        SU  S35      eSU 0n U $ [        U [        5      (       a  SU 0n U $ [        U [        5      (       a   U $ [	        SU  35      e)	Nr8   zdExpected a non-empty dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )rJ   r<   )r   xs     r+   r   +_format_evaluator_result.<locals>.<genexpr>  s     71:a&&s   z8Expected a list of dicts or EvaluationResults. Received .rW   r   zZExpected a dict, str, bool, int, float, list, EvaluationResult, or EvaluationResults. Got )	rJ   r   rL   rK   r   r  r   r   r<   )r   s    r+   r   r     s     &4,--6"* M) ;;A(D
 	
 
FD	!	!7777J6(RST  V$ M 
FC	 	 6" M 
FD	!	! M	 &&,X/
 	
r*   c                  ^ ^^^	 Sm	[         R                  " T 5      mTR                  R                  5        VVs/ s H  u  pUPM	     nnnTR                  R                  5        VVs/ s H0  u  pUR                  [         R
                  R                  Ld  M.  UPM2     snnmU(       aB  [        UU	4S jU 5       5      (       dH  [        U Vs/ s H  oDT;  d  M
  UPM     sn5      S:w  a!  ST	 S3nU(       a	  USU S3-  n[        U5      e[        U	4S jU 5       5      (       a  USS	/:X  a  T $       SU U4S
 jjn[        T S5      (       a  [        T S5      Ul        U$ UR                  Ul        U$ s  snnf s  snnf s  snf )Nr   examplesr   r  r  c              3  D   >#    U  H  oT;   =(       d    UT;   v   M     g 7fr   r!   r  s     r+   r   /_normalize_summary_evaluator.<locals>.<genexpr>  r  r  r  r  r^  z Received arguments c              3  ,   >#    U  H	  oT;   v   M     g 7fr   r!   )r   r  r  s     r+   r   rc    s     ?hU.(hr   r   ra  c           	     $  > U UU Vs/ s H  o"R                   PM     snU  Vs/ s H  o3R                  =(       d    0 PM     snU Vs/ s H  o"R                  =(       d    0 PM     snS.n0 n/ nTR                  R                  5        HM  u  pxXt;   d  M  UR                  UR
                  UR                  4;   a  UR                  XG   5        MG  XG   XW'   MO     T
" U0 UD6n	[        U	[        5      (       a  U	$ [        U	5      $ s  snf s  snf s  snf )Nr`  )r   r  r%  r&  r'  r(  r)  r  rJ   r5   r   )r   ra  r^   r]   r+  r,  r-  r.  r/  r   r   r0  s             r+   r;  -_normalize_summary_evaluator.<locals>.wrapper  s    $9ABg>>B9=>#KK-2->KS%T8oo&;&;8%TG FD%(^^%9%9%;!
(zz33--&  G$78-4-@* &< 4*6*F&"233+F33) C>%Ts   DD
Dr#   )r   zSequence[schemas.Run]ra  zSequence[schemas.Example]rs   rt   )r   r?  r%  r&  rA  rB  rC  r   r   r   r   r   r#   )
r   r  rE  rF  rG  r
  r;  r  r0  r  s
   `      @@@r+   _normalize_summary_evaluatorrg    s   SN


D
!C&)nn&:&:&<=&<(%&<H= ,,..HE99G--333 	.
  
PX
 
 
 HDHq1C(CHDEJ11?0@C 	 )(155Co ?h???8P D 	4'	43L	47	4 	4: *1z)B)BGD*% 	  IPHXHX 	 w > Es   E7(-E=E=	FF)r   r   )r   r   )r   r  rs   r   )r   r   rs   ztuple[Union[Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]]], Optional[Callable[..., dict]]])r   r   rs   ztuple[Union[Callable[[Sequence[Run], Optional[Example]], _COMPARISON_OUTPUT], Callable[[Sequence[Run], Optional[Example]], Awaitable[_COMPARISON_OUTPUT]]], Optional[Callable[..., dict]]])r   z;Union[EvaluationResults, dict, str, int, bool, float, list]rs   zUnion[EvaluationResults, dict])r   r   rs   SUMMARY_EVALUATOR_T)?r'   
__future__r   rj   r   r   abcr   collections.abcr   r   typingr   r   r	   r
   r   r   typing_extensionsr   r   r   rd   r   pydantic.v1r   r   r   r   ImportErrorpydanticlogging	functoolsr   langsmith.schemasr   r   r   r   	getLoggerr#   rM   r   r-   r5   rU   rY   r<   r   rv   r   rz   r   r   r   r   r  r   r   r   r  rh  rg  r!   r*   r+   <module>ru     s   E "     /  ( '     B B			8	$'y '6Ye 6,y ,^"	 "Y Y8 )+<dBC : :$ 5t;< e5, e5P	%	% I IX//
 #/G.
G.G.T@,
@,@,FG#8 	'++	 9: 112	4 	gkk	D12 112	4		 >   s   E( (E>=E>