
    i.-                     ,   S SK Jr  S SKJr  S SKrS SKJr  SSKJr  SSK	J
r
JrJrJr  SSKJr  SSKJr  SS	KJrJrJrJr  \R.                  " \5      r " S
 S\R4                  5      r\ " S S5      5       r\ " S S5      5       r\ " S S5      5       rg)    )partial)OptionalN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   0   ^  \ rS rSrSrSrU 4S jrSrU =r$ )GradientCheckpointingLayer#   a  Base class for layers with gradient checkpointing.

This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
(`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

Important:

    When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
    must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

    Example:

        ```python
        >>> # Correct - hidden_states passed as positional arg
        >>> out = self.layer(hidden_states, attention_mask=attention_mask)

        >>> # Incorrect - hidden_states passed as keyword arg
        >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
        ```
Fc                   > U R                   (       a  U R                  (       a  SnU R                  R                  nSU S3nSU;   a  US   (       a  SUS'   US-  nSnSU;   a  US   b  S US'   US-  nSnS	U;   a  US	   b  S US	'   US
-  nSnSU;   a  US   b  S US'   US-  nSnU(       a)  UR	                  S5      S-   n[
        R                  U5        U R                  " [        [        TU ](  40 UD6/UQ76 $ [        TU ](  " U0 UD6$ )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_once_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         l/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/transformers/modeling_layers.pyr$   #GradientCheckpointingLayer.__call__<   sK   &&4==G00JOPZ|[deGf$)<&+{#00  6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00 !..-3##G,44WUW=M5XQW5X`[_``w000     )	r   
__module____qualname____firstlineno____doc__r   r$   __static_attributes____classcell__r   s   @r+   r   r   #   s    , #"1 "1r-   r   c                     ^  \ rS rSrSrU 4S jr\\       SS\\	R                     S\\	R                     S\\	R                     S\\   S\\	R                     S	\\	R                     S
\\   S\\   S\4S jj5       5       rSrU =r$ ) GenericForSequenceClassificationa   modelc                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        R                  " UR                  U R                  SS9U l
        U R                  5         g )NF)bias)r#   __init__
num_labelssetattrbase_model_prefixr   from_confignnLinearhidden_sizescore	post_initr%   configr   s     r+   r<   )GenericForSequenceClassification.__init__e   sb      ++,,i.C.CF.KLYYv114??O
 	r-   	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r'   returnc           	         [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      nUb  UR                  S   nOUR                  S   nU R
                  R                  c  US:w  a  [        S5      eU R
                  R                  c  SnOUb  XR
                  R                  :g  R                  UR                  [        R                  5      n[        R                  " UR                  S   UR                  [        R                  S9nX-  R                  S5      nO.Sn[        R                  U R                   R"                   S35        U[        R                  " XR                  S9U4   nS nUb  U R%                  XUU R
                  S	9n['        UUU	R(                  U	R*                  U	R,                  S
9$ )NrJ   rK   r   rL   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rR   )logitsrM   pooled_logitsrG   )lossrT   r   hidden_states
attentions)getattrr?   last_hidden_staterD   shaperG   pad_token_id
ValueErrortorR   torchint32arangeargmaxr    r!   r   r   loss_functionr	   r   rW   rX   )r%   rI   rJ   rK   r   rL   rM   r   r'   transformer_outputsrW   rT   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrU   rV   s                     r+   forward(GenericForSequenceClassification.forwardo   s    8?tE[E[7\8
)%+'8
 8
 ,==M* "+J&,,Q/J;;##+
a\]];;##+!#"%)A)AAEEfmmUZU`U`aL!LL)<V]]Z_ZeZefM"/">!F!Fr!J!#>>**+ ,Z Z
 u||J}}MOaab%%VR_hlhshs%tD/ /??-;;*55
 	
r-   )r=   rD   NNNNNNN)r   r/   r0   r1   r?   r<   r   r   r   r_   
LongTensorTensorr   FloatTensorboolr   r   r	   ri   r3   r4   r5   s   @r+   r7   r7   a   s      151537+/59-1$(8
E,,-8
 !.8
 u//0	8

 "%8
   1 128
 ))*8
 D>8
 +,8
 
*8
  8
r-   r7   c                   4  ^  \ rS rSrSrU 4S jrS rS r\\	       SS\
\R                     S\
\R                     S\
\R                     S	\
\   S
\
\R                     S\
\R                     S\
\R                     S\\   S\4S jj5       5       rSrU =r$ )GenericForQuestionAnswering   r9   c                    > [         TU ]  U5        [        X R                  [        R
                  " U5      5        [        R                  " UR                  S5      U l	        U R                  5         g )N   )r#   r<   r>   r?   r   r@   rA   rB   rC   
qa_outputsrE   rF   s     r+   r<   $GenericForQuestionAnswering.__init__   sO     ,,i.C.CF.KL))F$6$6: 	r-   c                 @    [        X R                  5      R                  $ NrY   r?   embed_tokens)r%   s    r+   get_input_embeddings0GenericForQuestionAnswering.get_input_embeddings   s    t334AAAr-   c                 8    U[        X R                  5      l        g rx   ry   )r%   values     r+   set_input_embeddings0GenericForQuestionAnswering.set_input_embeddings   s    =B,,-:r-   rI   rJ   rK   r   rL   start_positionsend_positionsr'   rN   c                    [        X R                  5      " U4UUUUS.UD6n	U	R                  n
U R                  U
5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  U R                  " XXg40 UD6n[        UUUU	R                  U	R                  S9$ )N)rJ   rK   r   rL   r   rQ   )dim)rV   start_logits
end_logitsrW   rX   )rY   r?   rZ   ru   splitsqueeze
contiguousrc   r   rW   rX   )r%   rI   rJ   rK   r   rL   r   r   r'   outputssequence_outputrT   r   r   rV   s                  r+   ri   #GenericForQuestionAnswering.forward   s     ,349O9O+P,
)%+',
 ,
 "331#)<<r<#: #++B/::<''+668
&=+D%%libhiD+%!!//))
 	
r-   )ru   rk   )r   r/   r0   r1   r?   r<   r{   r   r   r   r   r_   rl   rm   r   rn   r   r   r   ri   r3   r4   r5   s   @r+   rq   rq      s    BC  151537+/596:48%
E,,-%
 !.%
 u//0	%

 "%%
   1 12%
 "%"2"23%
   0 01%
 +,%
 
&%
  %
r-   rq   c                     ^  \ rS rSrSrU 4S jr\\       SS\\	R                     S\\	R                     S\\	R                     S\\   S\\	R                     S	\\	R                     S
\\   S\\   S\4S jj5       5       rSrU =r$ )GenericForTokenClassification   r9   c                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        USS 5      b  UR                  nO[        USS 5      b  UR                  nOSn[        R                  " U5      U l        [        R                  " UR                  UR                  5      U l        U R!                  5         g )Nclassifier_dropouthidden_dropoutg?)r#   r<   r=   r>   r?   r   r@   rY   r   r   rA   DropoutdropoutrB   rC   rD   rE   )r%   rG   r   r   s      r+   r<   &GenericForTokenClassification.__init__   s      ++,,i.C.CF.KL6/6B!'!:!:V-t4@!'!6!6!$zz"45YYv1163D3DE
 	r-   rI   rJ   rK   r   rL   rM   r   r'   rN   c           	      $   [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      n
U R	                  U
5      nS nUb  U R                  XU R                  5      n[        UUU	R                  U	R                  S9$ )NrP   )rV   rT   rW   rX   )
rY   r?   rZ   r   rD   rc   rG   r
   rW   rX   )r%   rI   rJ   rK   r   rL   rM   r   r'   r   r   rT   rV   s                r+   ri   %GenericForTokenClassification.forward   s     ,349O9O+P,
)%+',
 ,
 "33,,7O,%%fdkkBD$!//))	
 	
r-   )r   r=   rD   rk   )r   r/   r0   r1   r?   r<   r   r   r   r_   rl   rm   r   rn   ro   r   r   r
   ri   r3   r4   r5   s   @r+   r   r      s    "  151537+/59-1$(!
E,,-!
 !.!
 u//0	!

 "%!
   1 12!
 ))*!
 D>!
 +,!
 
!
  !
r-   r   )	functoolsr   typingr   r_   torch.nnrA   cache_utilsr   modeling_outputsr   r   r	   r
   models.autor   processing_utilsr   utilsr   r   r   r   
get_loggerr   r    Moduler   r7   rq   r   r.   r-   r+   <module>r      s          # $ P P 
		H	%;1 ;1| G
 G
 G
T 9
 9
 9
x 7
 7
 7
r-   