
    ;i                         S r SSKrSSKrSSKrSSKrSSKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SSKJr  SSKJr  S\S	\4S
 jrS\R2                  S\S\\   4S jr\SSSSS.S\\R2                     S\S\\   S\\   S\S\S\R<                  4S jj5       rS\S\S\\R2                     4S jr \	" S5      r!\	" S5      r"S\\!   S\\"   S\\#\!\"4      4S jr$\SSS .S\S!\S"\\%   S\\   SS4
S# jj5       r&g)$zfBeta utility functions to assist in common eval workflows.

These functions may change in the future.
    N)Sequence)OptionalTypeVar)
evaluation)	warn_beta)Clientrun_dictid_mapc                    U S   nUR                  5        H)  u  p4UR                  [        U5      [        U5      5      nM+     X S'   U R                  S5      (       a
  XS      U S'   U R                  S5      (       d  0 U S'   U $ )zConvert the IDs in the run dictionary using the provided ID map.

Parameters:
- run_dict (dict): The dictionary representing a run.
- id_map (dict): The dictionary mapping old IDs to new IDs.

Returns:
- dict: The updated run dictionary.
dotted_orderparent_run_idextra)itemsreplacestrget)r	   r
   dokvs        e/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/langsmith/beta/_evals.py_convert_idsr      s     
.	!BZZAA' !^||O$$$*O+D$E!<<  O    rootrun_to_example_mapreturnc                    U /n[         R                  " 5       nU R                  U0n/ nU(       a  UR                  5       nUR	                  1 SkS9nUR                  US   [         R                  " 5       5      XGS   '   XGS      US'   XGS      US'   UR                  (       a  UR                  UR                  5        UR                  U5        U(       a  M  U Vs/ s H  n[        X5      PM     n	nXR                     U	S   S'   U	$ s  snf )a  Convert the root run and its child runs to a list of dictionaries.

Parameters:
- root (ls_schemas.Run): The root run to convert.
- run_to_example_map (dict): The dictionary mapping run IDs to example IDs.

Returns:
- List[dict]: The list of converted run dictionaries.
>   
session_idchild_run_idsparent_run_ids)excludeidtrace_idr   reference_example_id)uuiduuid4r"   popdictr   
child_runsextendappendr   r!   )
r   r   runs_r"   r
   resultssrcsrc_dictrresults
             r   _convert_root_runr1   *   s     FEzz|HmmX&FG
iik88$U8V!'HTNDJJL!I~/%z&:;>>LL(x  % 077w!l1%wF7(:77(CF1I$%M 8s   D	F)test_project_nameclientload_child_runsinclude_outputsrunsdataset_namer2   r3   r4   r5   c                
   U (       d  [        SU  35      eU=(       d    [        R                  " 5       nUR                  US9nU(       a  U  Vs/ s H  owR                  PM     snOSnUR                  U  Vs/ s H  owR                  PM     snUU  Vs/ s H  owR                  PM     snUR                  S9  U(       d  U n	O)U  Vs/ s H  osR                  UR                  US9PM     n	nU=(       d%    S[        R                  " 5       R                  SS  3n[        UR                  US95      n
U
 Vs0 s H  oR                  UR                  _M     nnU
S   R                  (       a  U
S   R                  OU
S   R                   nU	 VVs/ s H  n[#        X5        H  nUPM     M     nnnUR%                  UUR                  S	UR'                  5       S
.S9nU Hg  nUS   US   -
  n[(        R(                  R+                  [(        R,                  R.                  S9US'   US   U-   US'   UR0                  " S0 UDSU0D6  Mi     UR3                  UR                  5      nU$ s  snf s  snf s  snf s  snf s  snf s  snnf )a  Convert the following runs to a dataset + test.

This makes it easy to sample prod runs into a new regression testing
workflow and compare against a candidate system.

Internally, this function does the following:
    1. Create a dataset from the provided production run inputs.
    2. Create a new test project.
    3. Clone the production runs and re-upload against the dataset.

Parameters:
- runs (Sequence[ls_schemas.Run]): A sequence of runs to be executed as a test.
- dataset_name (str): The name of the dataset to associate with the test runs.
- client (Optional[Client]): An optional LangSmith client instance. If not provided,
    a new client will be created.
- load_child_runs (bool): Whether to load child runs when copying runs.
    Defaults to False.

Returns:
- ls_schemas.TracerSession: The project containing the cloned runs.

Examples:
--------
.. code-block:: python

    import langsmith
    import random

    client = langsmith.Client()

    # Randomly sample 100 runs from a prod project
    runs = list(client.list_runs(project_name="My Project", execution_order=1))
    sampled_runs = random.sample(runs, min(len(runs), 100))

    runs_as_test(runs, dataset_name="Random Runs")

    # Select runs named "extractor" whose root traces received good feedback
    runs = client.list_runs(
        project_name="<your_project>",
        filter='eq(name, "extractor")',
        trace_filter='and(eq(feedback_key, "user_score"), eq(feedback_score, 1))',
    )
    runs_as_test(runs, dataset_name="Extraction Good")
z1Expected a non-empty sequence of runs. Received: )r7   N)inputsoutputssource_run_ids
dataset_id)r4   zprod-baseline-   r   zprod-baseline)whichdataset_version)project_namereference_dataset_idmetadataend_time
start_time)tzr@    )
ValueErrorrtget_cached_clientcreate_datasetr:   create_examplesr9   r!   read_runr$   r%   hexlistlist_examplessource_run_idmodified_at
created_atr1   create_project	isoformatdatetimenowtimezoneutc
create_runupdate_project)r6   r7   r2   r3   r4   r5   dsr/   r:   runs_to_copyexampleser   r?   root_runr	   	to_createprojectnew_runlatency_s                        r   convert_runs_to_testre   F   s~   l NtfWXX-r++-F			L		9B+:$'$Qyy$'G
"&'$Q$'&*+dd+55	    MQ
LPqOOADD/OBD 	 
 *T~djjl>N>NrPQ>R=S-TF((l(CDH9ABA//144/B#+A;#:#:@V@V  %$H)(GH 	G 	$   ##&UU$.88:
 $ G *%(== ( 1 1 5 59J9J9N9N 5 O%l3g=
DGD2CD	  	

	A N_ ('+
 C
s$   I&7I+I0#I55 I:I?r@   c                 \   UR                  U S9n[        R                  " [        5      n/ n0 nU HM  nUR                  b  X6R                     R                  U5        OUR                  U5        XeUR                  '   MO     UR                  5        H  u  px[        US S9XW   l	        M     U$ )N)r@   c                     U R                   $ N)r   )r/   s    r   <lambda>%_load_nested_traces.<locals>.<lambda>   s    q~~r   )key)
	list_runscollectionsdefaultdictrN   r   r*   r!   r   sortedr(   )	r@   r3   r6   treemapr,   all_runsrunrun_idr(   s	            r   _load_nested_tracesrt      s    6D%  GH(%%&--c2NN3  &mmo&,Z=U&V# .Nr   TUlist1list2c                 @    [        [        R                  " X5      5      $ rh   )rN   	itertoolsproduct)rw   rx   s     r   _outer_productr|      s    	!!%/00r   
   )max_concurrencyr3   
evaluatorsr~   c          
         SSK Jn  / nU H  n[        U[        R                  5      (       a  UR                  U5        M5  [        U5      (       a'  UR                  [        R                  " U5      5        Ml  [        S[        U5       35      e   U=(       d    [        R                  " 5       n[        X5      nU" US9 nUR                  " UR                  /[        [!        Xu5      6 Q76 n	SSS5        W	 H  n
M     g! , (       d  f       N= f)a  Compute test metrics for a given test name using a list of evaluators.

Args:
    project_name (str): The name of the test project to evaluate.
    evaluators (list): A list of evaluators to compute metrics with.
    max_concurrency (Optional[int], optional): The maximum number of concurrent
        evaluations. Defaults to 10.
    client (Optional[Client], optional): The client to use for evaluations.
        Defaults to None.

Returns:
    None: This function does not return any value.
r   )ContextThreadPoolExecutorz5Evaluation not yet implemented for evaluator of type )max_workersN)	langsmithr   
isinstancels_evalRunEvaluatorr*   callablerun_evaluatorNotImplementedErrortyperH   rI   rt   mapevaluate_runzipr|   )r@   r   r~   r3   r   evaluators_functracesexecutorr,   rd   s              r   compute_test_metricsr      s    * 4.0KdG0011t$d^^w44T:;%GT
|T   -r++-F 6F	"	?8,,
"%~f'J"K
 
@  	 
@	?s   ?-C>>
D)'__doc__rm   rU   rz   r$   collections.abcr   typingr   r   langsmith.run_trees	run_treesrH   langsmith.schemasschemas
ls_schemasr   r   r   #langsmith._internal._beta_decoratorr   langsmith.clientr   r'   r   RunrN   r1   r   boolTracerSessionre   rt   ru   rv   tupler|   intr   rF   r   r   <module>r      s  
     $ $   & + 9 #4  ,JNN  d 8 
 (,#!!h
:>>
"h h  }	h
 Vh h h h hVc 6 d:>>>R $ CLCL1$q' 1$q' 1d5A;6G 1 
 &(#'' ' c]	'
 V' 
' 'r   