
    itT                    .   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	J
r
  SSKJr  SSKJrJrJrJrJr  SSKrSSKrSSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJ r J!r!  SSK"J#r#  SSK$J%r%J&r&  \!" 5       (       a  SSKJ'r'  SSK(J)r)J*r*J+r+J,r,J-r-  SSK.J/r/J0r0J1r1J2r2J3r3J4r4J5r5J6r6J7r7J8r8J9r9J:r:J;r;J<r<J=r=J>r>  SSK?J@r@  \<" 5       (       a  SSKAJBrB  \>R                  " \D5      rE\" SSS9rF\8" \" \G5      R                  5      rISSSSS.rJ\R                  S:  a  \R                  rLO\R                  rL " S S\SS9rM " S  S!\SS9rN " S" S#\SS9rO " S$ S%\SS9rP " S& S'\SS9rQ " S( S)\SS9rR " S* S+\SS9rS " S, S-\SS9rT " S. S/\T\SSS9rU " S0 S1\SS9rV\
 " S2 S35      5       rW " S4 S\45      rX\7" \XR                  5      \XlY        \XR                  R                   b5  \XR                  R                   R                  S5S6S7S89\XR                  l         gg)9z8
Processing saving/loading class for common processors.
    N)	dataclass)Path)AnyOptional	TypedDictTypeVarUnion)EntryNotFoundError   )
AudioInput
load_audio)custom_object_save)BatchFeature)ChannelDimension
ImageInputis_vision_available)render_jinja_template)
VideoInputVideoMetadata)PILImageResampling)PaddingStrategyPreTokenizedInputPreTrainedTokenizerBase	TextInputTruncationStrategy)AUDIO_TOKENIZER_NAMECHAT_TEMPLATE_DIRCHAT_TEMPLATE_FILE#LEGACY_PROCESSOR_CHAT_TEMPLATE_FILEPROCESSOR_NAMEPushToHubMixin
TensorTypecached_file	copy_funcdirect_transformers_importdownload_urlis_offline_modeis_remote_urlis_torch_availablelist_repo_templateslogging)deprecate_kwarg)PreTrainedAudioTokenizerBaseSpecificProcessorTypeProcessorMixin)boundr   FeatureExtractionMixinImageProcessingMixinBaseVideoProcessor)AutoTokenizerAutoFeatureExtractorAutoImageProcessorAutoVideoProcessor)      c                      \ rS rSr% Sr\\\\\	\   \	\   4      \
S'   \\\\	\   \	\   4   \
S'   \\\\\	\   \	\   4      \
S'   \\   \
S'   \\\\4   \
S'   \\\\4   \
S'   \\   \
S	'   \\   \
S
'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   \\   \
S'   Srg)
TextKwargsd   a  
Keyword arguments for text processing. For extended documentation, check out tokenization_utils_base methods and
docstrings associated.

Attributes:
    add_special_tokens (`bool`, *optional*)
        Whether or not to add special tokens when encoding the sequences.
    padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*)
        Activates and controls padding.
    truncation (`bool`, `str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*):
        Activates and controls truncation.
    max_length (`int`, *optional*):
        Controls the maximum length to use by one of the truncation/padding parameters.
    stride (`int`, *optional*):
        If set, the overflowing tokens will contain some tokens from the end of the truncated sequence.
    is_split_into_words (`bool`, *optional*):
        Whether or not the input is already pre-tokenized.
    pad_to_multiple_of (`int`, *optional*):
        If set, will pad the sequence to a multiple of the provided value.
    return_token_type_ids (`bool`, *optional*):
        Whether to return token type IDs.
    return_attention_mask (`bool`, *optional*):
        Whether to return the attention mask.
    return_overflowing_tokens (`bool`, *optional*):
        Whether or not to return overflowing token sequences.
    return_special_tokens_mask (`bool`, *optional*):
        Whether or not to return special tokens mask information.
    return_offsets_mapping (`bool`, *optional*):
        Whether or not to return `(char_start, char_end)` for each token.
    return_length (`bool`, *optional*):
        Whether or not to return the lengths of the encoded inputs.
    verbose (`bool`, *optional*):
        Whether or not to print more information and warnings.
    padding_side (`str`, *optional*):
        The side on which padding will be applied.
    return_mm_token_type_ids (`bool`, *optional*):
        Whether to return multimodal token type ids indicating mm placeholder token positions.
	text_pairtext_targettext_pair_targetadd_special_tokenspadding
truncation
max_lengthstrideis_split_into_wordspad_to_multiple_ofreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbosepadding_sidereturn_mm_token_type_ids N)__name__
__module____qualname____firstlineno____doc__r   r	   r   r   list__annotations__boolstrr   r   int__static_attributes__rP       m/home/dmtnaga/Documents/work/airagagent/rag_env/lib/python3.13/site-packages/transformers/processing_utils.pyr;   r;   d   s#   %N i):DOTRcMddeffy"3T)_dK\F]]^^uY0A4	?TXYjTk%klmm &4o-..dC!3344SM!$' %#D>)#D>)'~- (.$TN*D>!d^3-&tn,r\   r;   F)totalc                   \   \ rS rSr% Sr\\   \S'   \\\	\
4      \S'   \\\	\
4      \S'   \\S\
4      \S'   \\   \S'   \\   \S	'   \\   \S
'   \\\\\   4      \S'   \\\\\   4      \S'   \\   \S'   \\\	\
4      \S'   \\   \S'   \\   \S'   \\\	\4      \S'   \\	   \S'   Srg)ImagesKwargs   a  
Keyword arguments for image processing. For extended documentation, check the appropriate ImageProcessor
class methods and docstrings.

Attributes:
    do_resize (`bool`, *optional*):
        Whether to resize the image.
    size (`dict[str, int]`, *optional*):
        Resize the shorter side of the input to `size["shortest_edge"]`.
    crop_size (`dict[str, int]`, *optional*):
        Desired output size when applying center-cropping.
    resample (`PILImageResampling`, *optional*):
        Resampling filter to use if resizing the image.
    do_rescale (`bool`, *optional*):
        Whether to rescale the image by the specified scale `rescale_factor`.
    rescale_factor (`int` or `float`, *optional*):
        Scale factor to use if rescaling the image.
    do_normalize (`bool`, *optional*):
        Whether to normalize the image.
    image_mean (`float` or `list[float]`, *optional*):
        Mean to use if normalizing the image.
    image_std (`float` or `list[float]`, *optional*):
        Standard deviation to use if normalizing the image.
    do_pad (`bool`, *optional*):
        Whether to pad the image to the `(max_height, max_width)` of the images in the batch.
    pad_size (`dict[str, int]`, *optional*):
        The size `{"height": int, "width" int}` to pad the images to.
    do_center_crop (`bool`, *optional*):
        Whether to center crop the image.
    data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the output image.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input image.
    device (`str`, *optional*):
        The device to use for processing (e.g. "cpu", "cuda"), only relevant for fast image processing.
	do_resizesize	crop_sizer   resample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padpad_sizedo_center_cropdata_formatinput_data_formatdevicerP   N)rQ   rR   rS   rT   rU   r   rX   rW   dictrY   rZ   r	   floatrV   r   r[   rP   r\   r]   r`   r`      s    #J ~
4S>
""S#X''u136788UO#4. ud5k1233eT%[0122TNtCH~&&TN"*++c+;&; <==SMr\   r`   c                      \ rS rSr% Sr\\   \S'   \\   \S'   \\\	\
4      \S'   \\   \S'   \S   \S'   \\   \S	'   \\   \S
'   \\   \S'   \\\\\   4      \S'   \\\\\   4      \S'   \\   \S'   \\\	\
4      \S'   \\   \S'   \\\	\4      \S'   \\	   \S'   \\   \S'   \\\\4      \S'   \\\
\4      \S'   \\
   \S'   \\   \S'   Srg)VideosKwargs   ak  
Keyword arguments for video processing.

Attributes:
    do_convert_rgb (`bool`):
        Whether to convert the video to RGB format.
    do_resize (`bool`):
        Whether to resize the video.
    size (`dict[str, int]`, *optional*):
        Resize the shorter side of the input to `size["shortest_edge"]`.
    default_to_square (`bool`, *optional*, defaults to `self.default_to_square`):
        Whether to default to a square when resizing, if size is an int.
    resample (`PILImageResampling`, *optional*):
        Resampling filter to use if resizing the video.
    do_rescale (`bool`, *optional*):
        Whether to rescale the video by the specified scale `rescale_factor`.
    rescale_factor (`int` or `float`, *optional*):
        Scale factor to use if rescaling the video.
    do_normalize (`bool`, *optional*):
        Whether to normalize the video.
    image_mean (`float` or `list[float]`, *optional*):
        Mean to use if normalizing the video.
    image_std (`float` or `list[float]`, *optional*):
        Standard deviation to use if normalizing the video.
    do_center_crop (`bool`, *optional*):
        Whether to center crop the video.
    do_sample_frames (`bool`, *optional*):
        Whether to sample frames from the video before processing or to process the whole video.
    video_metadata (`Union[VideoMetadata, dict]`, *optional*):
        Metadata of the video containing information about total duration, fps and total number of frames.
    num_frames (`int`, *optional*):
        Maximum number of frames to sample when `do_sample_frames=True`.
    fps (`int` or `float`, *optional*):
        Target frames to sample per second when `do_sample_frames=True`.
    crop_size (`dict[str, int]`, *optional*):
        Desired output size when applying center-cropping.
    data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the output video.
    input_data_format (`ChannelDimension` or `str`, *optional*):
        The channel dimension format for the input video.
    return_metadata (`ChannelDimension` or `str`, *optional*):
        Whether to return video metadata or not.
do_convert_rgbrb   rc   default_to_squarer   re   rf   rg   rh   ri   rj   rm   rd   rn   ro   rp   do_sample_framesvideo_metadatafps
num_framesreturn_metadatarP   N)rQ   rR   rS   rT   rU   r   rX   rW   rq   rY   rZ   rr   r	   rV   r   r   r[   rP   r\   r]   rt   rt      s'   *X TN"~
4S>
""~%+,,UO#4. ud5k1233eT%[0122TN"S#X''*++c+;&; <==SMtn$U=$#6788	%U
#	$$d^#r\   rt   c                       \ rS rSr% Sr\\   \S'   \\\	R                  \\   \\	R                     \\\      4      \S'   \\\\\4      \S'   \\   \S'   \\   \S'   \\   \S'   \\   \S	'   S
rg)AudioKwargsi  a;  
Keyword arguments for audio processing.

Attributes:
    sampling_rate (`int`, *optional*):
        The sampling rate at which the `raw_speech` input was sampled.
    raw_speech (`np.ndarray`, `list[float]`, `list[np.ndarray]`, `list[list[float]]`):
        The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float
        values, a list of numpy arrays or a list of list of float values. Must be mono channel audio, not
        stereo, i.e. single float per timestep.
    padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*):
        Select a strategy to pad the returned sequences (according to the model's padding side and padding
        index) among:

        - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
            sequence if provided).
        - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
            acceptable input length for the model if that argument is not provided.
        - `False` or `'do_not_pad'`
    max_length (`int`, *optional*):
        Maximum length of the returned list and optionally padding length (see above).
    truncation (`bool`, *optional*):
        Activates truncation to cut input sequences longer than *max_length* to *max_length*.
    pad_to_multiple_of (`int`, *optional*):
        If set, will pad the sequence to a multiple of the provided value.
    return_attention_mask (`bool`, *optional*):
        Whether or not [`~ASTFeatureExtractor.__call__`] should return `attention_mask`.
sampling_rate
raw_speechrA   rC   rB   rF   rH   rP   N)rQ   rR   rS   rT   rU   r   rZ   rW   r	   npndarrayrV   rr   rX   rY   r   r[   rP   r\   r]   r~   r~     s    : C= rzz4;RZZ8H$tTY{J[[\]]eD#6788 %#D>)r\   r~   c                   0    \ rS rSr% \\\\4      \S'   Sr	g)CommonKwargsiB  return_tensorsrP   N)
rQ   rR   rS   rT   r   r	   rY   r"   rW   r[   rP   r\   r]   r   r   B  s    U3
?344r\   r   c                       \ rS rSr% Sr0 r0 \R                  Er\\S'   0 \	R                  Er
\	\S'   0 \R                  Er\\S'   0 \R                  Er\\S'   0 \R                  Er\\S'   Srg	)
ProcessingKwargsiF  a"  
Base class for kwargs passing to processors.
In case a model has specific kwargs that are not present in the base class or default values for existing keys,
it should have its own `ModelProcessorKwargs` class that inherits from `ProcessingKwargs` to provide:
    1) Additional typed keys and that this model requires to process inputs.
    2) Default values for existing keys under a `_defaults` attribute.
New keys have to be defined as follows to ensure type hinting is done correctly.

```python
# adding a new image kwarg for this model
class ModelImagesKwargs(ImagesKwargs, total=False):
    new_image_kwarg: Optional[bool]

class ModelProcessorKwargs(ProcessingKwargs, total=False):
    images_kwargs: ModelImagesKwargs
    _defaults = {
        "images_kwargs: {
            "new_image_kwarg": False,
        }
        "text_kwargs": {
            "padding": "max_length",
        },
    }

```

For Python 3.8 compatibility, when inheriting from this class and overriding one of the kwargs,
you need to manually update the __annotations__ dictionary. This can be done as follows:

```python
class CustomProcessorKwargs(ProcessingKwargs, total=False):
    images_kwargs: CustomImagesKwargs

CustomProcessorKwargs.__annotations__["images_kwargs"] = CustomImagesKwargs  # python 3.8 compatibility
```python

common_kwargstext_kwargsimages_kwargsvideos_kwargsaudio_kwargsrP   N)rQ   rR   rS   rT   rU   	_defaultsr   rW   r   r;   r   r`   r   rt   r   r~   r   r[   rP   r\   r]   r   r   F  s    $L I#

&
&#M< 

$
$K #

&
&#M< #

&
&#M< !

%
%!L+ r\   r   c                       \ rS rSr% SrSr\\\      \	S'   Sr
\\\\\4         \	S'   Sr\\   \	S'   Sr\\   \	S'   Sr\\   \	S	'   S
rg)TokenizerChatTemplateKwargsi  a  
Keyword arguments for tokenizer's `apply_chat_template`, when it is called from within a processor.

tools (`list[Dict]`, *optional*):
    A list of tools (callable functions) that will be accessible to the model. If the template does not
    support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
    giving the name, description and argument types for the tool. See our
    [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
    for more information.
documents (`list[dict[str, str]]`, *optional*):
    A list of dicts representing documents that will be accessible to the model if it is performing RAG
    (retrieval-augmented generation). If the template does not support RAG, this argument will have no
    effect. We recommend that each document should be a dict containing "title" and "text" keys. Please
    see the RAG section of the [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG)
    for examples of passing documents with chat templates.
add_generation_prompt (bool, *optional*):
    If this is set, a prompt with the token(s) that indicate
    the start of an assistant message will be appended to the formatted output. This is useful when you want to generate a response from the model.
    Note that this argument will be passed to the chat template, and so it must be supported in the
    template for this argument to have any effect.
continue_final_message (bool, *optional*):
    If this is set, the chat will be formatted so that the final
    message in the chat is open-ended, without any EOS tokens. The model will continue this message
    rather than starting a new one. This allows you to "prefill" part of
    the model's response for it. Cannot be used at the same time as `add_generation_prompt`.
return_assistant_tokens_mask (`bool`, defaults to `False`):
    Whether to return a mask of the assistant generated tokens. For tokens generated by the assistant,
    the mask will contain 1. For user and system tokens, the mask will contain 0.
    This functionality is only available for chat templates that support it via the `{% generation %}` keyword.
Ntools	documentsFadd_generation_promptcontinue_final_messagereturn_assistant_tokens_maskrP   )rQ   rR   rS   rT   rU   r   r   rV   rq   rW   r   rY   r   rX   r   r   r[   rP   r\   r]   r   r     se    > #'E8DJ&04IxT#s(^,-4,18D>1-2HTN238 (4.8r\   r   c                   B    \ rS rSr% SrSr\\   \S'   Sr	\\
   \S'   Srg)	ChatTemplateLoadKwargsi  a  
Keyword arguments used to load multimodal data in processor chat templates.

num_frames (`int`, *optional*):
    Number of frames to sample uniformly. If not passed, the whole video is loaded.
load_audio_from_video (`bool`, *optional*):
        Whether to use the audio track of input video. If `True` the audio track will be loaded and passed to the
        processor. This flag has no effect if the model doesn't support audio modality.
i>  r   Fload_audio_from_videorP   N)rQ   rR   rS   rT   rU   r   r   rZ   rW   r   rX   r[   rP   r\   r]   r   r     s&     $*M8C=),18D>1r\   r   c                   B    \ rS rSr% SrSr\\   \S'   Sr	\\   \S'   Sr
g)ProcessorChatTemplateKwargsi  a"  
Keyword arguments for processor's `apply_chat_template`.

tokenize (`bool`, *optional*, defaults to `False`):
    Whether to tokenize the output or not.
return_dict (`bool`, defaults to `False`):
    Whether to return a dictionary with named outputs. Has no effect if tokenize is `False`.
Ftokenizereturn_dictrP   N)rQ   rR   rS   rT   rU   r   r   rX   rW   r   r[   rP   r\   r]   r   r     s%      %Hhtn$"'K$'r\   r   c                   4    \ rS rSr% \\S'   \\S'   \\S'   Srg)AllKwargsForChatTemplatei  processor_kwargsmm_load_kwargstemplate_kwargsrP   N)	rQ   rR   rS   rT   r   rW   r   r   r[   rP   r\   r]   r   r     s    &&**00r\   r   c                       \ rS rSr% SrSr\\\      \	S'   Sr
\\\      \	S'   Sr\\\      \	S'   Sr\\\      \	S'   S rS	 rS
rg)MultiModalDatai  aq  
Dataclass that holds extra useful data for processing
multimodal data. Processors currently cannot return keys,
unless it is used in model's forward. Thus we have helper
methods that calculate and return useful data from processing
input multimodals (images/videos).
Note that this dataclass is aimed to be used only in vLLM
and we might change its API in the future.
Nnum_image_tokensnum_video_tokensnum_audio_tokensnum_image_patchesc                 @    [        X5      =(       a    [        X5      S L$ N)hasattrgetattrselfkeys     r]   __contains__MultiModalData.__contains__  s    t!Dgd&8&DDr\   c                     [        X5      (       a  [        X5      $ [        U R                  R                   SU 35      e)Nz has no attribute )r   r   AttributeError	__class__rQ   r   s     r]   __getitem__MultiModalData.__getitem__  s:    44%% 7 788J3%PQQr\   rP   )rQ   rR   rS   rT   rU   r   r   rV   rZ   rW   r   r   r   r   r   r[   rP   r\   r]   r   r     se     -1htCy)0,0htCy)0,0htCy)0-1xS	*1ERr\   r   c                      \ rS rSr% SrSS/rSS/r/ r\\	   \
S'   SrSrSr\rS	 r    S?S
\\   S\\\\\\   \\   4      S\\   S\\   S\\   4
S jjrS rS@S\\	\4   4S jjrS@S\	4S jjrS@S\\	\R@                  4   4S jjr!S r"SAS\#S\#4S jjr$\%S\\	\R@                  4   S\&\\	\4   \\	\4   4   4S j5       r'\%S\\	\4   4S j5       r( SBS\S\\   S\\	\4   4S  jjr)\%     SCS!\*\+   S\\	\R@                  4   S"\\\	\R@                  4      S#\#S$\#S%\\\	\#4      S&\	S\+4S' jj5       r,\%SDS( j5       r-\%S) 5       r.\/S* 5       r0S+ r1S, r2\3S- 5       r4\/S. 5       r5\6" S/S0S1S29\6" S3S4S5S69 SBS7\\\\	\	4      \\\\	\	4         4   S\\	   S\\7   S\	4S8 jj5       5       r8S@S9 jr9S\\	   S:S;S<\\	   4S= jr:S>r;g)Er/   i  zY
This is a mixin used to provide saving/loading functionality for all processor classes.
feature_extractor	tokenizerchat_templateaudio_tokenizeroptional_call_argsNc           
         U R                    Hr  nUR                  US 5      n[        XU5        US:X  d  M)  Uc  M.  U R                  X45      n[	        5       (       a  [        U[        5      (       a  Me  [        SU S35      e   U H!  nX`R                  ;  d  M  [        SU S35      e   [        XR                  5       H  u  pxX;   a  [        SU S35      eXrU'   M     [        U5      [        U R                  5      :w  aJ  [        S[        U R                  5       SS	R                  U R                  5       S
[        U5       S35      eUR                  5        H"  u  pU R                  X5        [        XU5        M$     g )Nr   zTried to use `zW` for audio tokenization. However, this class is not registered for audio tokenization.zUnexpected keyword argument .z!Got multiple values for argument zThis processor requires z arguments: , z. Got z arguments instead.)optional_attributespopsetattrcheck_argument_for_proper_classr)   
isinstancer-   
ValueError
attributes	TypeErrorziplenjoinitems)	r   argskwargsoptional_attributeoptional_attribute_valueproper_classr   argattribute_names	            r]   __init__ProcessorMixin.__init__  s~    #'":":'-zz2Dd'K$D.FG "%66;S;_#CCDVq*,,<TVr1s1s$( 7> >  #; C//)">se1 EFF  $'t__#=C'"CNCSST UVV),~&	 $> v;#doo..*3t+?*@TYYW[WfWfMgLhhnt9+02  $*<<>N00ED#. $2r\   imagestextvideosaudior   c                    Uc+  Uc(  Uc%  Uc"  [        SU R                  R                   35      eU R                  " U R                  4S[        U S5      (       a  U R                  R                  O0 0UD6nUS4US4US4US4S	.n0 nU R                   H;  n[        XS5      n	Xh   u  pU
c  M  U	c  M  U	" U
40 X[   D6nUR                  U5        M=     [        U5      $ )
ao  
Main method to prepare for model inputs. This method forwards the each modality argument to its own processor
along with `kwargs`. Please refer to the docstring of the each processor attributes for more information.

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`, `list[torch.Tensor]`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. Both channels-first and channels-last formats are supported.
    text (`TextInput`, `PreTokenizedInput`, `list[TextInput]`, `list[PreTokenizedInput]`, *optional*):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
    videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`):
        The video or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch
        tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported.
    audio (`np.ndarray`, `torch.Tensor`, `list[np.ndarray]`, `list[torch.Tensor]`):
        The audio or batch of audio to be prepared. Each audio can be a NumPy array or PyTorch
        tensor.
    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors of a particular framework. Acceptable values are:

        - `'tf'`: Return TensorFlow `tf.constant` objects.
        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return NumPy `np.ndarray` objects.
        - `'jax'`: Return JAX `jnp.ndarray` objects.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] object with processed inputs in a dict format.
Nz/You need to provide at least one input to call tokenizer_init_kwargsr   r   r   r   r   )r   image_processorvideo_processorr   )r   r   rQ   _merge_kwargsvalid_processor_kwargsr   r   init_kwargsr   r   updater   )r   r   r   r   r   r   attribute_to_kwargsoutputsr   	attribute
input_datainput_kwargsattribute_outputs                r]   __call__ProcessorMixin.__call__  s   J >dlv~%-Nt~~OfOfNghii##''
@Gk@Z@Z$.."<"<`b
 
 . &8 &8"'!8	
 "ooNd;I':'J$J%)*?#,Z#P6;O#P /0 . G$$r\   c           	      8  ^  [        T U S35      n[        R                  X35      n[        U[        5      (       a  [	        U 4S jU 5       5      nOT R                  U5      n[        X$5      (       d(  [        S[        U5      R                   SU SU S35      eU$ )z
Checks the passed argument's class against the expected transformers class. In case of an unexpected
mismatch between expected and actual class, an error is raise. Otherwise, the proper retrieved class
is returned.
_classc              3   N   >#    U  H  oc  M  TR                  U5      v   M     g 7fr   get_possibly_dynamic_module).0nr   s     r]   	<genexpr>AProcessorMixin.check_argument_for_proper_class.<locals>.<genexpr>a  s"      jj!D!A!A!!D!Djs   %%zReceived a z for argument z, but a z was expected.)	r   AUTO_TO_BASE_CLASS_MAPPINGgetr   tupler   r   typerQ   )r   argument_nameargument
class_namer   s   `    r]   r   .ProcessorMixin.check_argument_for_proper_classW  s     Tm_F#;<
/33JK
j%((  jj jjL;;JGL(11d8n556n]OS[\f[gguv  r\   returnc           	        ^	 [         R                  " U R                  5      n[        R                  " U R
                  5      n[        UR                  5      nUS/-  nU(       a-  U Vs/ s H   oUU R                  R                  ;  d  M  UPM"     nnSU;   a  US	 SU;   a  US	 SU;   a  US	 SU;   a  US	 U	4S jm	UR                  5        VVs0 s Hu  u  pgXd;   d  M  UR                  R                  S:w  d  M(  U(       a  [        U[        5      (       a	  U(       a  MM  U[        U[        5      (       a  UR                  5       OU_Mw     nnnT	" U5      nU(       dA  SU;   a;  U R                  R                  R                  U R                  R                   S	.nXS'   U R                  R                  US
'   U$ s  snf s  snnf )z
Serializes this instance to a Python dictionary.

Returns:
    `dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
auto_mapr   qformer_tokenizerprotein_tokenizerr   c                    > U R                  5        HY  u  p[        U[        R                  5      (       a  UR	                  5       X'   M8  [        U[
        5      (       d  MO  T" U5      X'   M[     U $ )z
Numpy arrays are not serialiazable but can be in pre-processing dicts.
This function casts arrays to list, recusring through the nested configs as well.
)r   r   r   r   tolistrq   )
dictionaryr   valuecast_array_to_lists      r]   r  2ProcessorMixin.to_dict.<locals>.cast_array_to_list  sY    
 )..0
eRZZ00&+llnJOt,,&8&?JO	 1
 r\   BeamSearchDecoderCTCr   audio_tokenizer_classaudio_tokenizer_name_or_pathprocessor_class)copydeepcopy__dict__inspect	signaturer   rV   
parametersr   r   r   rQ   r   r!   to_dictr   name_or_path)
r   legacy_serializationoutputsigattrs_to_savexkvaudio_tokenizer_dictr  s
            @r]   r  ProcessorMixin.to_dictl  s    t}}- .S^^,*%(5\1$..B[B[9[QM\& {#&(*+&(*+f$'
	 

&" C KK((,BB	 C **Q2O2OXl CAjN;;qyy{B& 	 

 $F+ $(9V(C)-)=)=)G)G)P)P040D0D0Q0Q$ 
 )=$%$(NN$;$; !a ]0

s$   (F8	F8F=F=9!F=,F=c                 R    U R                  US9n[        R                  " USSS9S-   $ )z
Serializes this instance to a JSON string.

Returns:
    `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
r     Tindent	sort_keys
)r  jsondumps)r   r  r   s      r]   to_json_stringProcessorMixin.to_json_string  s-     \\7K\L
zz*Q$?$FFr\   json_file_pathc                     [        USSS9 nUR                  U R                  US95        SSS5        g! , (       d  f       g= f)z
Save this instance to a JSON file.

Args:
    json_file_path (`str` or `os.PathLike`):
        Path to the JSON file in which this processor instance's parameters will be saved.
wutf-8encodingr  N)openwriter"  )r   r$  r  writers       r]   to_json_fileProcessorMixin.to_json_file  s9     .#8FLL,,BV,WX 988s	   4
Ac                     U R                    Vs/ s H  nSU S[        [        X5      5       3PM     nnSR                  U5      nU R                  R
                   SU SU R                  5        3$ s  snf )Nz- z: r  z:
z

)r   reprr   r   r   rQ   r"  )r   nameattributes_reprs      r]   __repr__ProcessorMixin.__repr__  sw    PTP_P_`P_RvRWT-@(A'BCP_`))O4..))*#o->d4CVCVCXBYZZ as   $A2push_to_hubr  c           	         UR                  SS5      nUb<  [        R                  " S[        5        UR	                  S5      b  [        S5      eXTS'   [        R                  " USS9  U(       ar  UR                  SS5      nUR                  S	UR                  [        R                  R                  5      S
   5      nU R                  " U40 UD6nU R                  U5      nU R                  bs  U R                   V	s/ s H  n	[        X	5      PM     n
n	U
 Vs/ s H&  n[!        U["        5      (       a  UR$                  OUPM(     nnUR'                  U 5        [)        XUS9  UR	                  SS5      nU R                   H  n	U	S:X  aR  [        X	5      n[+        US5      (       a%  UR-                  U R.                  R0                  5        UR3                  XS9  M[  U(       d  Md  [        X	5      n[+        US5      (       a%  UR-                  U R.                  R0                  5        UR3                  U5        M     U R                  bB  U R                   H2  n	[        X	5      n[!        U["        5      (       d  M%  UR$                  S	 M4     [        R                  R5                  U[6        5      n[        R                  R5                  U[8        5      n[        R                  R5                  U[:        5      n[        R                  R5                  U[<        5      nU R>                  Gb  UR	                  SS5      n[!        U R>                  [@        5      nU(       aP  U(       aI  [C        USSS9 nURE                  U R>                  5        SSS5        [F        RI                  SU 35        GO|U(       a  U(       d  U R>                  RK                  5        H  u  nnUS:X  aL  [C        USSS9 nURE                  U R>                  S   5        SSS5        [F        RI                  SU 35        MX  [        R                  " USS9  [        R                  R5                  UU S35      n[C        USSS9 nURE                  U5        SSS5        [F        RI                  SU 35        M     OU(       ac  [L        RN                  " SU R>                  0SSS9S-   n[C        USSS9 nURE                  U5        SSS5        [F        RI                  SU 35        OU R>                  b  [        S5      eU(       Ga'  [        R                  R5                  U[P        5      nU RS                  5       n[U        URW                  5       5      S1:w  a)  U RY                  U5        [F        RI                  SU 35        [U        URW                  5       5      S1:X  a  / nOU/nU RZ                  bz  U RZ                  R.                  R0                  nU RZ                  R\                  nUUS.n[L        RN                  " USSS9S-   n [C        USSS9 nURE                  U 5        SSS5        O,O+U RY                  USS 9  [F        RI                  SU 35        U/nU(       a"  U R_                  UWWWUR	                  S5      S!9  U$ s  sn	f s  snf ! , (       d  f       GN4= f! , (       d  f       GN= f! , (       d  f       GNa= f! , (       d  f       GN= f! , (       d  f       N= f)"a  
Saves the attributes of this processor (feature extractor, tokenizer...) in the specified directory so that it
can be reloaded using the [`~ProcessorMixin.from_pretrained`] method.

<Tip>

This class method is simply calling [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] and
[`~tokenization_utils_base.PreTrainedTokenizerBase.save_pretrained`]. Please refer to the docstrings of the
methods above for more information.

</Tip>

Args:
    save_directory (`str` or `os.PathLike`):
        Directory where the feature extractor JSON file and the tokenizer files will be saved (directory will
        be created if it does not exist).
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
        repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
        namespace).
    legacy_serialization (`bool`, *optional*, defaults to `True`):
        Whether or not to save processor attributes in separate config files (legacy) or in processor's config
        file as a nested dict. Saving all attributes in a single dict will become the default in future versions.
        Set to `legacy_serialization=True` until then.
    kwargs (`dict[str, Any]`, *optional*):
        Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.tokenV`token` and `use_auth_token` are both specified. Please set only the argument `token`.T)exist_okcommit_messagerepo_id)configsave_jinja_filesr   _set_processor_class)r@  r   r&  r'  r(  zchat template saved in default.jinjar   r  r  r  zMultiple chat templates are not supported in the legacy format. Please save them as separate files using the `save_jinja_files` argument.r  zprocessor saved in r  Fr  )r<  r9  )0r   warningswarnFutureWarningr   r   osmakedirssplitpathsep_create_repo_get_files_timestamps_auto_classr   r   r   r   r   appendr   r   rA  r   rQ   save_pretrainedr   r    r   r   r   r   rY   r*  r+  loggerinfor   r   r!  r   r  setkeysr-  r   r  _upload_modified_files)!r   save_directoryr5  r  r   r7  r<  r=  files_timestampsr   attrsaconfigsr@  r   output_processor_fileoutput_chat_template_file_jinja output_chat_template_file_legacychat_template_diris_single_templateftemplate_nametemplatetemplate_filepathchat_template_json_stringr,  output_audio_tokenizer_fileprocessor_dictreturn_filesr  r  r  audio_tokenizer_jsons!                                    r]   rP  ProcessorMixin.save_pretrained  s"   8  $4d;%MM E zz'". l  -7O
NT2#ZZ(8$?NjjN,@,@,Mb,QRG'':6:G#99.I 'IMY~WT2EYafgaf\]A7N)O)OUVVafGgNN4 tGD!::&8$?"ooN,#D9	9&<==224>>3J3JK )).)\%%#D9	9&<==224>>3J3JK)).9 .  '"&//#D9	i)@AA!--j9 #2 !#^^ L*,'',,~GY*Z'+-77<<?,
( GGLL9JK )%zz*<dC!+D,>,>!D$693QUVGGD../ R56U5VWX!*< 04/A/A/G/G/I+M8$	1!"A3QXY]^GGD$6$6y$AB Z&=>]=^$_`$5E,.GGLL9J}o]cLd,e)!"3S7KqGGH- L&=>O=P$QR 0J $ JJ1C1CDQZ^_bff * :C'RV\LL!:; S56V5WXY##/ !L 
  *,'',,~G[*\'!\\^N >&&().?-@@!!"7812G1HIJ>&&().?-@@! 56##/(,(<(<(F(F(O(O%/3/C/C/P/P,-B4P($ (,zz2Fq\`'adh'h$5sWMQWLL!56 NM 0 3%PKK-.C-DEF12L'' -jj) (  u ZgZ RQ ZY LK SRD NMsH   ;Z)-Z.*Z3'[+[[)7[;3
[
[	
[&	)
[8;
\	pretrained_model_name_or_pathc                    [         R                  " U5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      n	UR                  S	S5      n
UR                  S
S5      nUR                  SS5      nUR                  SS5      nSUS.nUb  XS'   [        5       (       a  U	(       d  [        R                  S5        Sn	[        U5      n[        R                  R                  U5      n[        R                  R                  U5      (       a$  [        R                  R                  U[        5      n0 n0 n[        R                  R                  U5      (       a  UnSnSnSnSnGOM[        U5      (       a  Un[        U5      nSnSnSnGO(U(       ab  [        U[         5      nUR#                  5       (       a<  UR%                  S5       H'  nUR&                  n[          SUR(                   3UU'   M)     O$ [+        UU	U
UUS9 H  n[          SU S3UU'   M     [        n [/        UUUUUUU	UUU
USS9n[/        U[0        UUUUU	UUU
USS9n[/        U[2        UUUUU	UUU
USS9nUR5                  5        VVs0 s H  u  nnU[/        UUUUUUU	UUU
USS9_M     nnn[/        U[6        UUUUU	UUU
USS9nUbR  [=        USS9 n[>        R@                  " URC                  5       5      nSUS   0nU(       a  [E        S 5      e SSS5        OgUR5                  5        VVs0 s H   u  nnU[=        US!SS9RC                  5       _M"     nnnUb'  [=        US!SS9 nURC                  5       US'   SSS5        [G        W[H        5      (       a  SU;   a  [K        U5      S":X  a  US   nU(       a  UUS'   Uc  0 nO: [=        USS9 nURC                  5       nSSS5        [>        R@                  " W5      nU(       a  [        R                  S%U 35        O[        R                  S%W S&U 35        SU;   a  US   b  [        RO                  S'5        SU;   a  UR                  S5      US'   Uc  S(U;   ai  Ub2  [=        US!SS9nURC                  5       n [>        R@                  " U 5      n OUS(   n U RQ                  U S)   5      n!U S*   n"U!RR                  " U"40 UD6US('   U RT                   H  n#UR                  U#S5        M     UU4$ ! [,         a     GNf = fs  snnf ! [8         a    e [:         a    [9        SU SU S[         S35      ef = f! , (       d  f       GN= fs  snnf ! , (       d  f       GN= f! , (       d  f       GN= f! [>        RL                   a    [9        S#U S$35      ef = f)+a  
From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
processor of type [`~processing_utils.ProcessingMixin`] using `from_args_and_dict`.

Parameters:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
    subfolder (`str`, *optional*, defaults to `""`):
        In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
        specify the folder name here.

Returns:
    `tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the processor object.
	cache_dirNforce_downloadFresume_downloadproxiesr9  local_files_onlyrevision	subfolder _from_pipeline
_from_auto	processor)	file_typefrom_auto_classusing_pipelinez+Offline mode: forcing local_files_only=TrueTz*.jinja/)rp  rq  rl  r9  rC  )
rl  rm  ro  rn  rp  r9  
user_agentrq  rr  %_raise_exceptions_for_missing_entrieszCan't load processor for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z filer'  r(  rB  r   a  Cannot load chat template due to conflicting files - this checkpoint combines a legacy chat_template.json file with separate template files, which is not supported. To resolve this error, replace the legacy chat_template.json file with a modern chat_template.jinja file.rr   z"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at zChat templates should be in a 'chat_template.jinja' file but found key='chat_template' in the processor's config. Make sure to move your template to its own file.r   r  r  )+r  r	  r   r'   rQ  rR  rY   rG  rJ  isdirr   r    isfiler(   r&   r   r   is_dirglobstemr1  r*   r
   r#   r   r   r   r   OSError	Exceptionr*  r   loadsreadr   r   rq   r   JSONDecodeErrorwarning_oncer   from_pretrainedr   )$clsrj  r   audio_tokenizer_kwargsrl  rm  rn  ro  r9  rp  rq  rr  from_pipelinerx  r{  is_localprocessor_fileadditional_chat_template_files'resolved_additional_chat_template_filesresolved_processor_fileresolved_chat_template_fileresolved_raw_chat_template_fileresolved_audio_tokenizer_filetemplate_dirtemplate_filera  rb  readerchat_template_jsonchat_templatesrf  r   r  r  audio_tokenizer_pathr   s$                                       r]   get_processor_dict!ProcessorMixin.get_processor_dictz  s   & "&v!6JJ{D1	$4e< **%6=**Y-

7D)!::&8%@::j$/JJ{B/	

#3T: **\59#.?S
$+8'(%5KKEF#(+,I(J%77==!>?77==677WW\\*GXN)+&24/77>>788&C#*.'.2+,0)H899:N&23P&Q#*.'.2+,0)#$ACTU&&(()5):):9)E(5(:(:K\J]]^_l_q_q^rHs6}E *F
$75)9!)"+#% GXEXXYZbYcciCj6x@% ,N\*51"'#1#$3%5)%':?+'" /:17'#1#$3%5)%':?/+ 3>1&'#1#$3%5)%':?3/< 9W8\8\8^; 9_4} ";5%"+'5 '(7)9##-!)"+>C$  9_ 8 ;$ 1<1('#1#$3%5)%':?1-8 '21GD%)ZZ%>""+-?-P!Q:$B  ; ED 5\4a4a4c4c0M= tM3INNPP4c   /:93QU[06N9- Rnd++	^0KPSTbPcghPh+I6N&4F?# #*N	1GD!;;=D E!%D!1 KK56M5NOPKK5n5E_UlTmnon,1P1\^
 f$.4jj.IN?+ )48I^8[,8;S7S'-{{}$'+zz2F'G$'56G'H$$'$C$CDXYpDq$r!#78V#W 0E0U0U$1(>1N,-
 Iy$/ ( v%%e * j;@    01N0O P99V8W X//=.>eE  ED
 RQ$ ED '' 89P8QQlm ss   9#T? #AU :"UU >V'VV5
W ?V1W ?
UUU .V
V
V.1
W ;W $W'rf  c                    UR                  5       nUR                  SS5      nSU;   a  US	 SU;   a  US	 UR                  U5        U R                  R                  R
                  SU R                  R                  R                   SS nU R                  X%S9u  pg[        U5       VV	s0 s H.  u  pX;   d  M  U[        U5      :  d  M  XR                  U	5      _M0     n
nn	[        U5       VV	s/ s H  u  pU
R                  X5      PM     nnn	U " U0 UD6n[        R                  SU 35        U(       a  X4$ U$ s  sn	nf s  sn	nf )	aT  
Instantiates a type of [`~processing_utils.ProcessingMixin`] from a Python dictionary of parameters.

Args:
    processor_dict (`dict[str, Any]`):
        Dictionary that will be used to instantiate the processor object. Such a dictionary can be
        retrieved from a pretrained checkpoint by leveraging the
        [`~processing_utils.ProcessingMixin.to_dict`] method.
    kwargs (`dict[str, Any]`):
        Additional parameters from which to initialize the processor object.

Returns:
    [`~processing_utils.ProcessingMixin`]: The processor object instantiated from those
    parameters.
return_unused_kwargsFr  r   Nr   )processor_configvalid_kwargsz
Processor )r  r   r   r   __code__co_varnamesco_argcountvalidate_init_kwargs	enumerater   r   rQ  rR  )r  r   rf  r   r  accepted_args_and_kwargsunused_kwargsr  ir   args_to_updaterv  s               r]   from_args_and_dict!ProcessorMixin.from_args_and_dict}  sf   " (,,.%zz*@%H .01'z* 	f% $'<<#8#8#D#DEhs||G\G\GhGh#ijkjl#m  '*&>&>+ '? '
# $$<=
=# %()CI %A$$= 	 

 :C4Ivq""1*I ..	j,-++

 Js   )D;8D;	D;/EModelProcessorKwargsr   c           	      \  ^ 0 0 0 0 0 S.n0 0 0 0 0 S.m1 Skn[        5       nT H  nUR                  R                  U0 5      R                  5       TU'   UR                  U   R                   HL  nUc  M  X;   d  M  [        U R                  U5      (       a  [        U R                  U5      OX(   n	U	TU   U'   MN     M     UR                  T5        [        U5      [        U5      -
  n
UR                  5        H  u  p{UR                  U   R                   H  nXs;   a2  X7   R                  US5      nUS:w  a  X;   a  [        SU SU S35      eOX;   a  UR                  US5      nOSn[        U[        5      (       a  US:w  d  Mq  XU'   UR                  U5        M     M     [        U4S jU 5       5      (       aZ  UR                  5        HE  u  p}UT;   d  M  UR                  5        H$  u  pX;  d  M  XU   U'   UR                  U5        M&     MG     OjUR                  5        HV  u  nnUU;  d  M  UUR                  S   R                  ;   a
  UUS   U'   M5  UU;  d  M=  [         R#                  S	U S
35        MX     UR%                  5        H  nUR                  US   5        M     U$ )a  
Method to merge dictionaries of kwargs cleanly separated by modality within a Processor instance.
The order of operations is as follows:
    1) kwargs passed as before have highest priority to preserve BC.
        ```python
        high_priority_kwargs = {"crop_size" = {"height": 222, "width": 222}, "padding" = "max_length"}
        processor(..., **high_priority_kwargs)
        ```
    2) kwargs passed as modality-specific kwargs have second priority. This is the recommended API.
        ```python
        processor(..., text_kwargs={"padding": "max_length"}, images_kwargs={"crop_size": {"height": 222, "width": 222}}})
        ```
    3) kwargs passed during instantiation of a modality processor have fourth priority.
        ```python
        tokenizer = tokenizer_class(..., {"padding": "max_length"})
        image_processor = image_processor_class(...)
        processor(tokenizer, image_processor) # will pass max_length unless overridden by kwargs at call
        ```
    4) defaults kwargs specified at processor level have lowest priority.
        ```python
        class MyProcessingKwargs(ProcessingKwargs, CommonKwargs, TextKwargs, ImagesKwargs, total=False):
            _defaults = {
                "text_kwargs": {
                    "padding": "max_length",
                    "max_length": 64,
                },
            }
        ```
Args:
    ModelProcessorKwargs (`ProcessingKwargs`):
        Typed dictionary of kwargs specifically required by the model passed.
    tokenizer_init_kwargs (`Dict`, *optional*):
        Dictionary of kwargs the tokenizer was instantiated with and need to take precedence over defaults.

Returns:
    output_kwargs (`Dict`):
        Dictionary of per-modality kwargs to be passed to each modality-specific processor.

)r   r   r   r   r   >   r   r   r   r   	__empty__zKeyword argument z+ was passed two times:
in a dictionary for z and as a **kwarg.c              3   ,   >#    U  H	  oT;   v   M     g 7fr   rP   )r   r   default_kwargss     r]   r   /ProcessorMixin._merge_kwargs.<locals>.<genexpr>  s     7n$s   r   zKeyword argument `zA` is not a valid argument for this processor and will be ignored.)rS  r   r   r  rW   r   r   r   r   r   r   r   r   rY   addanyrQ  r  values)r   r  r   r   output_kwargspossible_modality_keywords	used_keysmodalitymodality_keyr   non_modality_kwargsoutput_kwargkwarg_valuesubdictsubkeysubvaluer   kwargr  s                     @r]   r   ProcessorMixin._merge_kwargs  s   ^ 
 
 &K"E	 'H';'E'E'I'I(TV'W'\'\'^N8$ 4 D DX N ^ ^(49^ #4>><@@  =2@ 
 >CN8,\: !_ ' 	^, "&kC,>>&3&9&9&;"H 4 D DX N ^ ^%"("2"6"6|["QK"k1l6Y(/~ >33;*<NP  "+ #)**\;"GK"-K!+s33{k7Q1<.MM,/% !_ '<, 7777%+\\^!~-,3MMO(!2>F(3F;%MM&1 -< &4 %lln
Ui'2BB?Sccc>Co6s;$>>++05vw - #))+ELL78 ,r\   r  rl  rm  rp  r9  rq  c                 &   X'S'   X7S'   XGS'   XgS'   UR                  SS5      nUb+  [        R                  " S[        5        Ub  [	        S5      eUnUb  XWS	'   U R
                  " U40 UD6n	U R                  " U40 UD6u  pU R                  " X40 UD6$ )
a  
Instantiate a processor associated with a pretrained model.

<Tip>

This class method is simply calling the feature extractor
[`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`], image processor
[`~image_processing_utils.ImageProcessingMixin`] and the tokenizer
[`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`] methods. Please refer to the docstrings of the
methods above for more information.

</Tip>

Args:
    pretrained_model_name_or_path (`str` or `os.PathLike`):
        This can be either:

        - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
          huggingface.co.
        - a path to a *directory* containing a feature extractor file saved using the
          [`~SequenceFeatureExtractor.save_pretrained`] method, e.g., `./my_model_directory/`.
        - a path or url to a saved feature extractor JSON *file*, e.g.,
          `./my_model_directory/preprocessor_config.json`.
    **kwargs
        Additional keyword arguments passed along to both
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] and
        [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
rl  rm  rp  rq  r7  Nr8  r:  r9  )r   rD  rE  rF  r   _get_arguments_from_pretrainedr  r  )r  rj  rl  rm  rp  r9  rq  r   r7  r   rf  s              r]   r  ProcessorMixin.from_pretrained7  s    N ({#1 %5!"%z$4d;%MM E   l  #E#7O112OZSYZ!$!7!78U!`Y_!`%%dEfEEr\   c                     [        U[        5      (       d  UR                  nSSKJs  Jn  [        X!5      (       d  [        U S35      eXl        g)aC  
Register this class with a given auto class. This should only be used for custom feature extractors as the ones
in the library are already mapped with `AutoProcessor`.



Args:
    auto_class (`str` or `type`, *optional*, defaults to `"AutoProcessor"`):
        The auto class to register this new feature extractor with.
r   Nz is not a valid auto class.)	r   rY   rQ   transformers.models.automodelsautor   r   rN  )r  
auto_classauto_modules      r]   register_for_auto_class&ProcessorMixin.register_for_auto_classv  sE     *c**#,,J66{//
|+FGHH$r\   c                   ^  / nT R                    H  n[        T U S35      n[        U[        5      (       ap  [        U 4S jU 5       5      nUS:X  a*  UR	                  S5      nUc  [
        R                  S5        OUR	                  SS5      nU(       a  US   b  US   nOUS   nOT R                  U5      nUR                  UR                  " U40 UD65        M     U$ )	aU  
Identify and instantiate the subcomponents of Processor classes, like image processors and
tokenizers. This method uses the Processor attributes like `tokenizer_class` to figure out what class those
subcomponents should be. Note that any subcomponents must either be library classes that are accessible in
the `transformers` root, or they must be custom code that has been registered with the relevant autoclass,
via methods like `AutoTokenizer.register()`. If neither of these conditions are fulfilled, this method
will be unable to find the relevant subcomponent class and will raise an error.
r   c              3   N   >#    U  H  ob  TR                  U5      OS v   M     g 7fr   r   )r   r   r  s     r]   r   @ProcessorMixin._get_arguments_from_pretrained.<locals>.<genexpr>  s'     rgqbcm ? ? BY] ]gqs   "%r   use_fastaC  Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.Tr   r   )
r   r   r   r   r   rQ  r  r   rO  r  )	r  rj  r   r   r   r   classesr  attribute_classs	   `        r]   r  -ProcessorMixin._get_arguments_from_pretrained  s     !nnN (8&?@J*e,,rgqrr!%66%zz*5H'++T  &zz*d;H
 6&-ajO&-ajO"%"A"A*"MKK778U`Y_`a- -0 r\   c                    [        [        U 5      (       a  [        [        U 5      $ [        R                  [        R                  [        R
                  [        R                  [        R                  /nU H|  nUR                  R                  5        H[  n[        U[        5      (       a(  U H   nUc  M  UR                  U :X  d  M  Us  s  s  $    M@  Uc  ME  UR                  U :X  d  MW  Us  s  $    M~     [        SU  S35      e)NzCould not find module z in `transformers`. If this is a custom class, it should be registered using the relevant `AutoClass.register()` function so that other functions can find it!)r   transformers_moduler   IMAGE_PROCESSOR_MAPPINGVIDEO_PROCESSOR_MAPPINGTOKENIZER_MAPPINGFEATURE_EXTRACTOR_MAPPING$MODEL_FOR_AUDIO_TOKENIZATION_MAPPING_extra_contentr  r   r   rQ   r   )module_namelookup_locationslookup_locationcustom_classcustom_subclasss        r]   r   *ProcessorMixin.get_possibly_dynamic_module  s    &44.<<77771199DD
  0O / > > E E GlE22+7*6?;S;SWb;b#22 ,8 "-,2G2G;2V'' !H  0 $[M 2+ ,
 	
r\   c                     [        U S5      (       d#  [        SU R                  R                   S35      eU R                  R
                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
r   zCannot batch decode text:  has no tokenizer.)r   r   r   rQ   r   batch_decoder   r   r   s      r]   r  ProcessorMixin.batch_decode  sL    
 t[))9$..:Q:Q9RRdeff~~**D;F;;r\   c                     [        U S5      (       d#  [        SU R                  R                   S35      eU R                  R
                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
r   zCannot decode text: r  )r   r   r   rQ   r   decoder  s      r]   r  ProcessorMixin.decode  sL    
 t[))3DNN4K4K3LL^_``~~$$d5f55r\   c                     / nU R                    H,  n[        XS 5      n[        US5      nUR                  U5        M.     U$ )Nmodel_input_names)r   r   extend)r   r  r   r   attr_input_namess        r]   r   ProcessorMixin.model_input_names  sG    "ooNd;I&y2EF$$%56 . ! r\   c                     [        U R                  5       5      n[        U5      nX#-
  nX#-  nU(       a  U Vs0 s H  ofX   _M	     snO0 nU(       a  U Vs0 s H  ofX   _M	     snO0 nXq4$ s  snf s  snf r   )rS  rT  )r  r  kwargs_from_configvalid_kwargs_setunused_keys
valid_keysr  r  s           r]   r  #ProcessorMixin.validate_init_kwargs  s     !1!6!6!89|,(;':
ITEA,//EZ\GQ
C
1+..
CWY** FCs   A,A1	video_fpsz4.58rz   )versionnew_namevideo_load_backendz4.59zd. This function will use `torchcodec` by default, or `torchvision` if `torchcodec` is not installed.)r  additional_messageconversationc           
         Uc  [        U R                  [        5      (       a   SU R                  ;   a  U R                  S   nO[        U R                  [        5      (       a5  [        SSR	                  U R                  R                  5       5       35      eU R                  b  U R                  nOJ[        S5      e[        U R                  [        5      (       a  X R                  ;   a  U R                  U   nO [        U S5      =(       a/    U R                  R                  R                  R                  S5      nUR                  SS	5      (       aD  UR                  S
S	5      (       a  [        S5      eUR                  SS	5      (       a  [        S5      eUR                  SS	5      (       a  U(       d  [        S5      eSUS'   0 0 S.nU Hz  n[        R                  U   R                   HV  n[        R                  U   n[        XS5      n	UR                  Xy5      n
U
c  M8  [        U
[        5      (       a  MO  XU   U'   MX     M|     UR                  SS5        US   R!                  U5        [        U["        [$        45      (       a7  [        US   ["        [$        45      (       d  [        US   S5      (       a  SnUnOS	nU/nUS   R                  SS	5      nUS   R                  SS	5      nUS   nU(       Gaz  / / nn/ nU GHm  n/ / nnU GH<  nUS    Vs/ s H  nUS   S;   d  M  UPM     nnUS    VVs/ s H%  nS  H  nUU;   d  M  US   S:X  d  M  UU   PM     M'     nnnU VVs/ s H%  nS  H  nUU;   d  M  US   S:X  d  M  UU   PM     M'     nnnUR'                  U5        U VVs/ s H%  nS  H  nUU;   d  M  US   S :X  d  M  UU   PM     M'     nnnUR'                  U5        US!   (       d(  U H  nUR)                  [+        UUS"   S#95        M!     GM  U H  nUR)                  [+        UUS"   S#95        M!     GM?     UR)                  U5        UR)                  U5        GMp     [-        S2UUS$.US   DU R                  R.                  D6u  nnU(       d  US   nU(       Ga  U(       a  US   OUnU R                  R0                  b/  UR3                  U R                  R0                  5      (       a  S	US%'   S&U;  a)  UR                  S'5      c  UR                  S(5      b  SUS&'   [5        S) W 5       5      n [5        S* W 5       5      n!U " S2UU (       a  UOSU!(       a  UOSW(       a  UOSS+.UD6n"U(       Ga?  US   R                  SS	5      (       Ga"  / n#U"R                  S,5      n$U"S-   n%[7        [9        U%5      5       H  n&S/[9        U%U&   5      -  n'U$U&   n(U( V)V*s/ s H  u  n)n*U)PM
     n+n)n*UU&    H  u  n,n-[:        R<                  " U+U,5      n.[:        R<                  " U+U-5      n/U.S:  a  U(U.   S   U,s=::  a  U(U.   S.   :  d  O  MW  [7        U.U/(       a  U/O[9        U%U&   5      5       H  n0S.U'U0'   M
     M     U#R)                  U'5        M     U#U"S/'   U"R?                  UR                  S05      S19  U"$ U"S-   $ U$ s  snf s  snnf s  snnf s  snnf s  sn*n)f )3a;  
Similar to the `apply_chat_template` method on tokenizers, this method applies a Jinja template to input
conversations to turn them into a single tokenizable string.

The input is expected to be in the following format, where each message content is a list consisting of text and
optionally image or video inputs. One can also provide an image, video, URL or local path which will be used to form
`pixel_values` when `return_dict=True`. If not provided, one will get only the formatted text, optionally tokenized text.

conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"},
            {"type": "text", "text": "Please describe this image in detail."},
        ],
    },
]

Args:
    conversation (`Union[list[Dict, [str, str]], list[list[dict[str, str]]]]`):
        The conversation to format.
    chat_template (`Optional[str]`, *optional*):
        The Jinja template to use for formatting the conversation. If not provided, the tokenizer's
        chat template is used.
NrB  zThe processor has multiple chat templates but none of them are named "default". You need to specify which one to use by passing the `chat_template` argument. Available templates are: r   zTCannot use apply_chat_template because this processor does not have a chat template.r   Fastr   Fr   a  continue_final_message and add_generation_prompt are not compatible. Use continue_final_message when you want the model to continue the final message, and add_generation_prompt when you want to add a header that will prompt it to start a new assistant message instead.r   zKcontinue_final_message is not compatible with return_assistant_tokens_mask.z`return_assistant_tokens_mask` is not possible with slow tokenizers. Make sure you have `tokenizers` installed. If the error persists, open an issue to support a Fast tokenizer for your model.TrK   )r   r   r  r   r   contentr   r   r   r   )imagevideo)r   urlrJ  r   )r  r   rJ  base64r  )r  r   rJ  r  r   r   )r   )conversationsr   r@   rx   rz   r{   c              3   :   #    U  H  o  H  o"S Lv   M
     M     g 7fr   rP   )r   im_listims      r]   r   5ProcessorMixin.apply_chat_template.<locals>.<genexpr>  s     ^|GV]PR$V]|   c              3   :   #    U  H  o  H  o"S Lv   M
     M     g 7fr   rP   )r   vid_listvids      r]   r   r    s     bXYaRU4Yar  )r   r   r   r   offset_mapping	input_idsr   assistant_masksr   )tensor_typerP   ) r   r   rq   r   r   rT  r   r   r   rQ   endswithr   r   rW   r   r   r   rV   r   r  rO  r   r   special_tokens_map	bos_token
startswithr  ranger   bisectbisect_leftconvert_to_tensors)1r   r  r   r   is_tokenizers_fastprocessed_kwargs
kwarg_typer   kwarg_type_defaultsdefault_valuer   
is_batchedr  r   r   r   batch_imagesbatch_videosbatch_audiosr   r   messager  visualsaudio_fnamesvision_infoimage_fnamesvideo_fnamesfnamepromptgeneration_indicessingle_promptimages_existvideos_existoutr  r  r  r  current_maskoffsetsstartendoffset_startsassistant_start_charassistant_end_char	start_posend_postoken_ids1                                                    r]   apply_chat_template"ProcessorMixin.apply_chat_template  s_   J  $,,d33	TEWEW8W $ 2 29 =D..55 kyy!3!3!8!8!:;<> 
 ##/ $ 2 2 j  $,,d33I[I[8[ $ 2 2= A $T;7nDNN<T<T<]<]<f<fgm<n::.66zz1599  c  zz8%@@ !noo::4e<<% g 
 48/0 !!

 +J/??
K[[&>&N&Nz&Z# '(;$ G

36$Zt-D-D8=Z05 \ + 	

'. 	*+226:lT5M22|Au66',q/S\:]:]J(MJ)NM#$56:::uM&'89==mUS)*:;)+R,LL -!#R+G6=i6Hr6H7GTZO_qLqw6HGr (/y'9$'9G#;C'> % /6fo.H %#; %'9 ! $ ,3$+2K#EC+- ) 3>f2E2P )C(#E )+2 ! $ MM,/ ,3$+2K#;C+- ) 3>f2E2P )C(#; )+2 ! $ MM,/ **AB%1E(//
5P^_nPo0pq &2 &2E(//
5P^_nPo0pq &29  ,B ##F+##F+I !.L &; &
''&
 01&
 nn//	&
"" AYF *4F1IM~~''38P8PQUQ_Q_QiQi8j8j/4+, "/

5!-L1I1U-1)*^|^^LbbbL '3|'3|&2l	
 C #$56::;Y[`aa&(O%(WW-=%>N #K 0I"3y>2()sS1->'>"0"3AH(I:5#(IHZ[\H]D02D(.(:(:=J^(_I&,&8&8HZ&[G !*Q$+I$6q$9=Q$iT[\eTfghTi$i !),1)WUXYbcdYeUf,g9:X 6 -h I^ (..|<! 3" .=C)***vzzBR7S*T
;''I s$$$F )JsH   [-[=[
[
[
1[
[
[
6[%

[%
[%
4[+c                 @    U R                   R                  " U4SU0UD6$ )ad  
Post-process the output of a vlm to decode the text.

Args:
    generated_outputs (`torch.Tensor` or `np.ndarray`):
        The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
        or `(sequence_length,)`.
    skip_special_tokens (`bool`, *optional*, defaults to `True`):
        Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
    **kwargs:
        Additional arguments to be passed to the tokenizer's `batch_decode method`.

Returns:
    `list[str]`: The decoded text.
skip_special_tokens)r   r  )r   generated_outputsr:  r   s       r]   post_process_image_text_to_text.ProcessorMixin.post_process_image_text_to_text  s&      ~~**+<pRepioppr\   text_inputsr   
modalitiesc           
      0   U H  n[        X S35      n[        X S35      nUS    Vs/ s H  n[        U5      R                  U5      PM     nnU V	s/ s H  oR                  U5      PM     n
n	X:w  d  Ms  [        SU SU SU
 S35      e   gs  snf s  sn	f )	z
Checks that number of special tokens in text and processed text is same. The count can be different
if tokenized text was truncated, leading to issues in model code.
_token	_token_idr  zMismatch in `z4` token count between text and `input_ids`. Got ids=z
 and text=z^. Likely due to `truncation='max_length'`. Please disable truncation or increase `max_length`.N)r   rV   countr   )r   r   r>  r?  r  	token_strr6  ids	ids_countsample
text_counts              r]   _check_special_mm_tokens'ProcessorMixin._check_special_mm_tokens  s    
 #H
&&9:Itz%;<H>I+>VW>Vsc2>VIW@DEf,,y1JE& #H:-abkallv  xB  wC Cs s  # XEs   $BBrP   )NNNN)T)FTr   )NFFNmain)AutoProcessor)<rQ   rR   rS   rT   rU   r   r   r   rV   rY   rW   feature_extractor_classtokenizer_classrN  r   r   r   r   r   r	   r   r   r   r   Unpackr   r   rq   r   r  r"  rG  PathLiker-  r3  rX   rP  classmethodr   r  r  r   r   r.   r  r  r  staticmethodr   r  r  propertyr  r  r,   r   r7  r<  rI  r[   rP   r\   r]   r/   r/     s    &{3J*,=>$&S	&"OK-$/P (,hl'+&*<%$<% uY(94	?DQbLccde<% $	<%
 
#<% )*<%|*BDcN BH	G3 	G	Y5bkk1A+B 	Y[
m4 m_c m^ @&,1#r{{2B,C@&	tCH~tCH~-	.@& @&D 5d38n 5 5t 15@.@  (~@
 
c4i@D  8<$!&,0<F'(<F',S"++-='><F E#r{{"234<F 	<F
 <F c4i()<F <F 
<F <F| % %* " "H 
 
0<6 ! ! 
+ 
+ [&5A B (,TDc3h0$tDcN7K2LLMT  }T 12	T
 
T BTlq$T#Y ^ aefiaj r\   rv  rL  zprocessor files)objectobject_classobject_files)[rU   r  r  r  r   rG  systypingrD  dataclassesr   pathlibr   r   r   r   r   r	   numpyr   typing_extensionshuggingface_hub.errorsr
   audio_utilsr   r   dynamic_module_utilsr   feature_extraction_utilsr   image_utilsr   r   r   utils.chat_template_utilsr   video_utilsr   r   r   tokenization_utils_baser   r   r   r   r   utilsr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   utils.deprecationr,   modeling_utilsr-   
get_loggerrQ   rQ  r.   __file__parentr  r   version_inforO  r;   r`   rt   r~   r   r   r   r   r   r   r   r/   r5  formatrP   r\   r]   <module>rm     s)       	 
   !  ; ;   5 / 4 2 J J < 2 /     $ / < 
		H	%   7?OP  1h1F1FG  /40.	  w]]F%%F:-% :-z49E 4n@$9E @$F$*)5 $*N59E 57y 7t$9)5 $9N2Ye 2("8:U]b (1y 1 R R R2K^ K\( '~'A'AB %%1)7)C)C)K)K)R)RGX *S *N& 2r\   