
    i*<                     <   S SK r S SKrS SKrS SKJr  S SKJrJrJrJ	r	   " S S\5      r
 S SKJr  S SKJrJrJr  SrSS
 jr SSS	SS.S\\\\\\	\\\/\4   4   4         4S jjjr\R0                  " \5      r\" 5        " S S5      5       rg! \ a    S	r N]f = f)    N)Enum)AnyCallableOptionalUnionc                   4    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rg)RequestStatus   z5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailed N)__name__
__module____qualname____firstlineno____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED__static_attributes__r       j/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/transformers/utils/metrics.pyr	   r	      s*    ?GJ)7HHFr    r	   )metrics)Status
StatusCode
get_tracerTFc                 0   ^  [         (       d  S $ U 4S jnU$ )a  
Decorator that attaches a tracer to a class.

This decorator should be applied to classes that need OpenTelemetry tracing.
It adds a tracer attribute to the class instance that can be used by the traced decorator.

Args:
    tracer_name_template: Optional template string for the tracer name.
        If provided, it should contain {module} which will be replaced with the class's full module path
        and {class_name} for the class name.
        If None, a default naming scheme will be used where:
          - If the module already starts with "transformers.", it will use that directly
          - Otherwise, it will prepend "transformers." to the module name

Returns:
    Class decorator function
c                     U $ Nr   )clss    r!   <lambda>attach_tracer.<locals>.<lambda>0   s    3r    c                 t   >^ ^ T R                   m[        R                  " T5      U UU4S j5       nUT l         T $ )Nc                    > T" U /UQ70 UD6  TR                   nTR                  nTc'  UR                  S5      (       a  U SU 3nOSU SU 3nOTR                  X4S9n[	        U5      U l        g )Ntransformers..)module
class_name)r   r   
startswithformatr%   tracer)	selfargskwargsmodule_namer1   tracer_namer)   original_inittracer_name_templates	         r!   init_with_tracer:attach_tracer.<locals>.decorator.<locals>.init_with_tracer5   s    $000..K))J#+))/::%0M:,"?K$1+a
|"LK2999d$[1DKr    )__init__	functoolswraps)r)   r<   r:   r;   s   ` @r!   	decorator attach_tracer.<locals>.decorator2   s5    		'	2 
(	2  (
r    )_has_opentelemetry)r;   rA   s   ` r!   attach_tracerrD      s    $ . r    )	span_name
standaloneadditional_attributesrG   c                2   ^^^ UUU4S jnU c  U$ U" U 5      $ )a^  
Decorator to trace function calls with OpenTelemetry.

Can be used as @traced or @traced(span_name="custom_name")

Args:
    func: The function to trace
    span_name: Optional custom name for the span (defaults to function name)
    standalone: If True, creates a parentless span
    additional_attributes: Optional list of additional attributes to set on the span.
      Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
      where:
        - instance_attribute_name: Name of the attribute to get from the class instance
        - span_attribute_key: Key to use when setting the attribute on the span
        - value_or_transform_function: Either a raw value to use directly, or a function to transform
          the attribute value before setting it on the span

Returns:
    Decorated function with tracing
c                 h   >^  [         (       d  T $ [        R                  " T 5      UU UU4S j5       nU$ )Nc            
        > U (       a#  [        TS5      (       a  TR                  b  U S   OS nUS LnU(       a  [        US5      (       a  UR                  nO%[        STR                   STR
                   35      nT=(       d    TR
                  nT(       a  UR                  OUR                  nU" U5       nUR                  STR
                  5        UR                  STR                  5        UR                  SU5        U (       a  [        U 5       Ht  u  p[        U	[        [        [        [        45      (       d  U	c   UR                  S	U 3[        U	5      5        MM  UR                  S	U 3[        [        U	5      5      5        Mv     U(       a  UR!                  5        Ht  u  p[        U[        [        [        [        45      (       d  Uc   UR                  S
U
 3[        U5      5        MM  UR                  S
U
 3[        [        U5      5      5        Mv     T(       a_  U(       aX  T HR  nUu  pn[        X-5      (       d  M  [#        X-5      n[%        U5      (       a	  U" U5      nOUnUR                  UU5        MT      T" U 0 UD6nUsS S S 5        $ ! [&         a?  nUR)                  [+        [,        R.                  5      5        UR1                  U5        e S nAff = f! , (       d  f       g = f)N__self__r   r4   r.   r/   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrrK   r4   r%   r   r   
start_spanstart_as_current_spanset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	Exception
set_statusr#   r$   ERRORrecord_exception)r6   r7   instance	is_methodr4   namespan_fnspaniargkeyvalueattr_configinstance_attribute_namespan_attribute_keyvalue_or_transform_functionattribute_valuetransformed_valueresulterG   funcrE   rF   s                       r!   wrapper*traced.<locals>.decorator.<locals>.wrapperl   sa   "&GD*,E,E$--JctAwjnH ,IWXx88!#mDOO3DAdmm_$UV-D+5f''6;W;WG$""?DMMB""#4dooF""#7C"+D/%cCeT+BCCs{ ..qc{CHE ..qc{CS	NK	 #2
 &,lln
%ec3t-DEE ..E
K ..DK@PQ	 '5 )Y'<cn`/E`"8EE.5h.XO'(CDD4OP_4` 14O 1 ../ACTU (=!4262F!? @ ! OOF:+;+;$<=))!,A s1   8F
K"<K"	J
K :KKK""
K0)rC   r?   r@   )ro   rp   rG   rE   rF   s   ` r!   rA   traced.<locals>.decoratorh   s1    !!K		.	 
.	` r    r   )ro   rE   rF   rG   rA   s    ``` r!   tracedrs   L   s     85n |T?r    c                       \ rS rSrSrS\4S jrS r\S\	S\
SS	4S
 j5       r\S\SS	4S j5       r\SS j5       r\S\S\SS	4S j5       r\S\	S\
SS	4S j5       rSrg	)ContinuousBatchProcessorMetrics   z0Metrics collection for ContinuousBatchProcessor.max_batch_tokensc                 0    Xl         U R                  5         g)ztInitialize metrics for continuous batch processor.

Args:
    max_batch_tokens: Maximum number of tokens in a batch
N)rw   _setup_metrics)r5   rw   s     r!   r>   (ContinuousBatchProcessorMetrics.__init__   s     !1r    c                    [         (       d  [        R                  S5        g[        R                  " S5      U l        / SQnU R
                  R                  SSSUS9U l        U R
                  R                  S	S
SS9U l	        U R
                  R                  SSSS9U l
        / SQnU R
                  R                  SSSUS9U l        U R
                  R                  SSSS9U l        U R
                  R                  SSSS9U l        U R
                  R                  SSSS9U l        / SQnU R
                  R                  SSSUS9U l        U R
                  R                  SSS S9U l        U R
                  R                  S!S"S S9U l        g)#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  ttft_millisecondsz#Time to first token in millisecondsms)r`   descriptionunit#explicit_bucket_boundaries_advisoryactive_requests_countz3Number of active requests currently being processedrequests)r`   r   r   waiting_requests_countz*Number of requests waiting to be processed)r~   r      r   r   r   r   r   i N  i0u  i`  request_latency_millisecondsz9End-to-end latency for completed requests in millisecondsdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioprefill_tokens_processedz"Number of prefill tokens processedtokensdecode_tokens_processedz!Number of decode tokens processed)   r|         (   r~   <   F   P   Z   _   b   r   batch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbyteskv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)rC   loggerinfor"   	get_metermetercreate_histogramttft_histogramcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugecreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r5   ttft_bucketslatency_bucketsbatch_fill_bucketss       r!   ry   .ContinuousBatchProcessorMetrics._setup_metrics   s    "!KKcd&&'[\
 _"jj99$=0<	 : 
 &*ZZ%<%<(M &= &
" '+jj&=&=)D '> '
# \)-)D)D/S0?	 *E *
& +/***A*A'M +B +
' '+jj&?&?+< '@ '
# &*ZZ%>%>*; &? &
" R/3zz/J/J(O0B	 0K 0
, +/***A*A-I +B +
' &*ZZ%<%<(J &= &
"r    created_time
request_idreturnNc                 "   [         (       d  g[        R                  " 5       U-
  S-  n U R                  R                  U5        [        R                  SU SUS S35        g! [         a"  n[        R                  SU 35         SnAgSnAff = f)zRecord Time to First Token (TTFT).

Args:
    created_time: The time the request was created
    request_id: The ID of the request
N     @@zRecorded TTFT for request : .2fr   zFailed to record TTFT metric: )rC   timer   recordr   debugrZ   warning)r5   r   r   ttft_msrn   s        r!   record_ttft_metric2ContinuousBatchProcessorMetrics.record_ttft_metric  s     "!99;-7	A&&w/LL5j\GC=PRST 	ANN;A3?@@	A   8A" "
B,B		Brequests_in_batchc                    [         (       a  U(       d  gSnSnU Hp  nUR                  [        R                  :X  a  US-  nM(  UR                  [        R                  [        R
                  4;   d  MX  U[        UR                  5      -  nMr     X#-   n US:  a  U R                  R                  U5        US:  a  U R                  R                  U5        US:  a  X#-  nU R                  R                  U5        XPR                  -  S-  nU R                  R                  U5        [         R#                  SU SU SUS S	U S
U R                   S35        g! [$         a"  n[         R'                  SU 35         SnAgSnAff = f)zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

Args:
    requests_in_batch: List of request states in the current batch
Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )rC   statusr	   r   r   r   len
prompt_idsr   addr   r   setrw   r   r   r   r   rZ   r   )	r5   r   decode_tokensprefill_tokensstatetotal_batch_tokensr   fill_percentagern   s	            r!   record_batch_metrics4ContinuousBatchProcessorMetrics.record_batch_metrics  sx    "!):&E||}555"-":":M<Z<Z![[#e&6&6"77	 ' +;	C!++//?q **..}=!%6//33E:14I4IIURO0077HLL!-0@@P Q.s337I6J!DLaLaKbbce  	CNN=aSABB	Cs   CE 
F!E>>Fc                 h   [         (       d  g UR                  UR                  -  nX!R                  R                  -  nS[        UR                  5      -  UR                  -  U-  nUR                  5       nUR                  U-
  nXd-  nXT-  nU R                  R                  U5        U R                  R                  U5        [        R                  SUS-  S SU SUR                   SXaR                  -  S	-  S
 S3	5        g! [         a"  n	[        R!                  SU	 35         Sn	A	gSn	A	ff = f)zRecord memory usage of the PagedAttentionCache without GPU synchronization.

This calculates the theoretical memory usage based on cache configuration
and the number of blocks currently in use.

Args:
    cache: The PagedAttentionCache object to measure
N   zKV Cache memory: i   r   zMB, Used blocks: r   z (r   z.1fz%)z*Failed to record KV cache memory metrics: )rC   head_dimnum_key_value_headsdtypeitemsizer   	key_cache
block_sizeget_num_free_blocks
num_blocksr   r   r   r   r   rZ   r   )
r5   cache	page_sizepage_mem_in_bytesblock_mem_in_bytesfree_blocksused_blocksused_memory_bytesfree_memory_bytesrn   s
             r!   record_kv_cache_memory_metrics>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metricsB  sD    "!	M)B)BBI )KK,@,@ @ "#S%9!9E<L<L!LO`!`  335K**[8K !, @ + @ &&**+<=++//0ABLL#$5$Ec#J K  +}Ae.>.>-? @"2"22S8=RA
  	MNNGsKLL	Ms   C6D 
D1D,,D1active_requestswaiting_requestsc                     [         (       d  g U R                  R                  U5        U R                  R                  U5        [        R                  SU SU S35        g! [         a"  n[        R                  SU 35         SnAgSnAff = f)zRecord metrics about active and waiting requests.

Args:
    active_requests: Number of active requests
    waiting_requests: Number of waiting requests
NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )rC   r   r   r   r   r   rZ   r   )r5   r   r   rn   s       r!   record_queue_metrics4ContinuousBatchProcessorMetrics.record_queue_metricsj  s     "!	C&&**?;''++,<=LL??*;;MN^M__pqr 	CNN=aSABB	Cs   AA! !
B+BBc                 "   [         (       d  g[        R                  " 5       U-
  S-  n U R                  R                  U5        [        R                  SU SUS S35        g! [         a"  n[        R                  SU 35         SnAgSnAff = f)zRecord metrics about a completed request.

Args:
    created_time: The time the request was created
    request_id: The ID of the request
Nr   z Recorded request completion for r   r   r   z,Failed to record request completion metric: )rC   r   r   r   r   r   rZ   r   )r5   r   r   
latency_msrn   s        r!   record_request_completion9ContinuousBatchProcessorMetrics.record_request_completion|  s     "!iikL0F:
	O**11*=LL;J<r*UXIYY[\] 	ONNI!MNN	Or   )r   r   r   r   r   r   rw   r   r   r   r   r   )r   N)r   r   r   r   r   rS   r>   ry   rs   rT   rR   r   listr   r   r   r   r   r   r    r!   ru   ru      s    : O
b Au A# A$ A A$ &Cd &Ct &C &CP %M %MN CC C3 CSW C C" Oe O OQU O Or    ru   r(   )r?   loggingr   enumr   typingr   r   r   r   r	   opentelemetryr"   opentelemetry.tracer#   r$   r%   rC   ImportErrorrD   r   tuplerR   rs   	getLoggerr   r   ru   r   r    r!   <module>r     s        1 1	D 	%BB
,` 
U _cU
 $DsCsHcUTWZDX?X9Y/Y)Z$[\Up 
		8	$ fO fO fO_  s   B BB