
    9i              	          d dl mZ esdev rddlmZ nd dlZ	 d dlZd Z
d Zd Zd	 Z G d
 de      Z G d de      Z ej$                  e        G d de      Z ej(                  e        G d de      Z ej,                  e        G d de      Z ej0                  e       d Zd Z G d de      Z ej8                  e        G d de      Z ej<                  e       d Zd dl Z d dl!Z!d dl Z d dl"Z"d dl#Z$d dl%m&Z& d dl%m'Z' d Z(d Z)ejT                  Z+ejT                  Z, e-edej\                          e-edej\                         ej^                  e_0        ejb                  e_2        d D ]  Z3 e)ee3         e(e        e(e        e(e       eZ4eZ5dd!l6m7Z7  ee"jp                  js                   e:e$jv                  jy                  d"            d#              G d$ d%e      Z=y# e	$ r d dlZY w xY w)&    )version_info.   )_sentencepieceNc                     	 d| j                   j                         z   }d| j                  j
                  d| j                  j                  d|dS # t        j                  $ r d}Y Kw xY w)Nz	proxy of  <r   z; z >)this__repr____builtin__	Exception	__class__
__module____name__)selfstrthiss     `/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/sentencepiece/__init__.py
_swig_reprr      s_    		 2 2 44 "^^668O8OQXZZ    s   A A+*A+c                       fd}|S )Nc                     |dk(  r | ||       y |dk(  r| j                   j                  |       y t        | |      r.t        t	        t        |       |      t              r | ||       y t        d| z        )Nr
   thisownz(You cannot add instance attributes to %s)r
   ownhasattr
isinstancegetattrtypepropertyAttributeError)r   namevaluesets      r   set_instance_attrzE_swig_setattr_nondynamic_instance_variable.<locals>.set_instance_attr   si    6>dE"YIIMM% T4 ZT
D0I8%TdE" !Kd!RSS     )r!   r"   s   ` r   *_swig_setattr_nondynamic_instance_variabler%      s    T r#   c                       fd}|S )Nc                     t        | |      r%t        t        | |      t              s | ||       y t	        d| z        )Nz%You cannot add class attributes to %s)r   r   r   r   r   )clsr   r    r!   s      r   set_class_attrz?_swig_setattr_nondynamic_class_variable.<locals>.set_class_attr)   s:    3jd1CX&NT5! !H3!NOOr#   r$   )r!   r)   s   ` r   '_swig_setattr_nondynamic_class_variabler*   (   s    P
 r#   c                       fd}|S )zlClass decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclassc                 p     | j                   | j                  | j                  j                               S N)r   	__bases____dict__copy)r(   	metaclasss    r   wrapperz$_swig_add_metaclass.<locals>.wrapper3   s'    s}}cll6G6G6IJJr#   r$   )r1   r2   s   ` r   _swig_add_metaclassr3   1   s    KNr#   c                   4    e Zd ZdZ eej                        Zy)_SwigNonDynamicMetazKMeta class to enforce nondynamic attributes (no new attributes) for a classN)r   r   __qualname____doc__r*   r   __setattr__r$   r#   r   r5   r5   8   s    U9$:J:JKKr#   r5   c                       e Zd Z ed d d      ZeZd Zej                  Z
d Zd Zd Zd	 Zd
 Zd Zd Z ee      Z ee      Z ee      Z ee      Z ee      Z ee      Z ee      Zd Zd Zd ZeZy)1ImmutableSentencePieceText_ImmutableSentencePiecec                 6    | j                   j                         S r-   r
   r   xs    r   <lambda>z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>>        r#   c                 8    | j                   j                  |      S r-   r<   r>   vs     r   r?   z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>>       AFFJJqM r#   The membership flagdocc                 T    t        j                  | t        j                                y r-   )r   :ImmutableSentencePieceText_ImmutableSentencePiece_swiginit5new_ImmutableSentencePieceText_ImmutableSentencePiecer   s    r   __init__z:ImmutableSentencePieceText_ImmutableSentencePiece.__init__A   s,    QQRVXf  Y]  Y]  Y_  	`r#   c                 ,    t        j                  |       S r-   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__piecerK   s    r   _piecez8ImmutableSentencePieceText_ImmutableSentencePiece._pieceE       VVW[\\r#   c                 ,    t        j                  |       S r-   )r   :ImmutableSentencePieceText_ImmutableSentencePiece__surfacerK   s    r   _surfacez:ImmutableSentencePieceText_ImmutableSentencePiece._surfaceH   s    XXY]^^r#   c                 ,    t        j                  |       S r-   )r   5ImmutableSentencePieceText_ImmutableSentencePiece__idrK   s    r   _idz5ImmutableSentencePieceText_ImmutableSentencePiece._idK   s    SSTXYYr#   c                 ,    t        j                  |       S r-   )r   8ImmutableSentencePieceText_ImmutableSentencePiece__beginrK   s    r   _beginz8ImmutableSentencePieceText_ImmutableSentencePiece._beginN   rP   r#   c                 ,    t        j                  |       S r-   )r   6ImmutableSentencePieceText_ImmutableSentencePiece__endrK   s    r   _endz6ImmutableSentencePieceText_ImmutableSentencePiece._endQ   s    TTUYZZr#   c                 ,    t        j                  |       S r-   )r   CImmutableSentencePieceText_ImmutableSentencePiece__surface_as_bytesrK   s    r   _surface_as_byteszCImmutableSentencePieceText_ImmutableSentencePiece._surface_as_bytesT   s    aabfggr#   c                 ,    t        j                  |       S r-   )r   AImmutableSentencePieceText_ImmutableSentencePiece__piece_as_bytesrK   s    r   _piece_as_byteszAImmutableSentencePieceText_ImmutableSentencePiece._piece_as_bytesW   s    __`deer#   c                     dj                  | j                  | j                  | j                  | j                  | j
                        S )Nz3piece: "{}"
id: {}
surface: "{}"
begin: {}
end: {}
)formatpieceidsurfacebeginendrK   s    r   __str__z9ImmutableSentencePieceText_ImmutableSentencePiece.__str__b   s7     "6$**dggt||"&**dhh8	8r#   c                    | j                   |j                   k(  xrj | j                  |j                  k(  xrO | j                  |j                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S r-   )re   rf   rg   rh   ri   r   others     r   __eq__z8ImmutableSentencePieceText_ImmutableSentencePiece.__eq__j   s    ZZ5;;&  Y477ehh+>  Y4<<SXS`S`C`  Yeieoeosxs~s~e~  Y  DH  DL  DL  PU  PY  PY  DY  Yr#   c                 *    t        t        |             S r-   )hashstrrK   s    r   __hash__z:ImmutableSentencePieceText_ImmutableSentencePiece.__hash__m   s    #d)_r#   N)r   r   r6   r   r   r   r   rL   r   8delete_ImmutableSentencePieceText_ImmutableSentencePiece__swig_destroy__rO   rS   rV   rY   r\   r_   rb   re   piece_as_bytesrg   surface_as_bytesrf   rh   ri   rj   rn   rr   r$   r#   r   r:   r:   =   s    -/IOdeGH`%^^]_Z][hf VEo.Nx G 12	#BVE
4.C8Y Hr#   r:   c                       e Zd Z ed d d      ZeZd Zej                  Z
d Zd Zd Zd	 Zd
 Zd Z ee      Z ee      Z ee      Z G d d      Zed        Zd Zd Zd ZeZy)ImmutableSentencePieceTextc                 6    | j                   j                         S r-   r<   r=   s    r   r?   z#ImmutableSentencePieceText.<lambda>v   r@   r#   c                 8    | j                   j                  |      S r-   r<   rB   s     r   r?   z#ImmutableSentencePieceText.<lambda>v   rD   r#   rE   rF   c                 T    t        j                  | t        j                                y r-   )r   #ImmutableSentencePieceText_swiginitnew_ImmutableSentencePieceTextrK   s    r   rL   z#ImmutableSentencePieceText.__init__y   s    ::4AnAnApqr#   c                 ,    t        j                  |       S r-   )r   'ImmutableSentencePieceText__pieces_sizerK   s    r   _pieces_sizez'ImmutableSentencePieceText._pieces_size}   s    EEdKKr#   c                 .    t        j                  | |      S r-   )r   "ImmutableSentencePieceText__piecesr   indexs     r   _piecesz"ImmutableSentencePieceText._pieces   s    @@uMMr#   c                 ,    t        j                  |       S r-   )r    ImmutableSentencePieceText__textrK   s    r   _textz ImmutableSentencePieceText._text   s    >>tDDr#   c                 ,    t        j                  |       S r-   )r   !ImmutableSentencePieceText__scorerK   s    r   _scorez!ImmutableSentencePieceText._score   s    ??EEr#   c                 ,    t        j                  |       S r-   )r   ,ImmutableSentencePieceText_SerializeAsStringrK   s    r   SerializeAsStringz,ImmutableSentencePieceText.SerializeAsString       JJ4PPr#   c                 ,    t        j                  |       S r-   )r   )ImmutableSentencePieceText__text_as_bytesrK   s    r   _text_as_bytesz)ImmutableSentencePieceText._text_as_bytes   s    GGMMr#   c                   (    e Zd Zd Zd Zd Zd ZeZy)9ImmutableSentencePieceText.ImmutableSentencePieceIteratorc                 P    || _         | j                   j                         | _        y r-   )protor   lenr   r   s     r   rL   zBImmutableSentencePieceText.ImmutableSentencePieceIterator.__init__       
::**,r#   c                     | j                   S r-   r   rK   s    r   __len__zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__len__       xxr#   c                    t        |t              r`t        | j                        D cg c]  }| j                  j                  |       c}|j                  |j                  |j                     S |dk  r|| j                  z   }|dk  s|| j                  k\  rt        d      | j                  j                  |      S c c}w )Nr   zpiece index is out of range)
r   sliceranger   r   r   startstopstep
IndexErrorr   r   is      r   __getitem__zEImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__   s    eU#16txxAA$**$$Q'A%++ejjY^YcYcBcd
d19$(("%19)89
9zz!!%(( B   "B?c           
      z    dj                  | D cg c]  }dj                  t        |             c}      S c c}w )N
pieces {{
{}}}joinrd   rq   r   r>   s     r   rj   zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__   /    yyDIq+223q6:IJJI   !8Nr   r   r6   rL   r   r   rj   r   r$   r#   r   ImmutableSentencePieceIteratorr          -)K hr#   r   c                 ,    t         j                  |       S r-   )rx   r   rK   s    r   piecesz!ImmutableSentencePieceText.pieces   s    'FFtLLr#   c                 D    | j                         |j                         k(  S r-   r   rl   s     r   rn   z!ImmutableSentencePieceText.__eq__       ##%)@)@)BBBr#   c                 4    t        | j                               S r-   rp   r   rK   s    r   rr   z#ImmutableSentencePieceText.__hash__       $((*++r#   c                     dj                  | j                  | j                  dj                  | j                  D cg c]  }dj                  t        |             c}            S c c}w )Nztext: "{}"
score: {}
{}r   r   )rd   textscorer   r   rq   r   s     r   rj   z"ImmutableSentencePieceText.__str__   sX    F499djj99PTP[P[%\1&7&>&>s1v&F%\]__ &]s   !A'N)r   r   r6   r   r   r   r   rL   r   !delete_ImmutableSentencePieceTextrt   r   r   r   r   r   r   r   text_as_bytesr   r   r   rn   rr   rj   r$   r#   r   rx   rx   u   s    -/IOdeGHr%GGLNEFQN E?D^,MVE , M MC,_ Hr#   rx   c                       e Zd Z ed d d      ZeZd Zej                  Z
d Zd Zd Z G d	 d
      Zed        Zd Zd Zd ZeZy)ImmutableNBestSentencePieceTextc                 6    | j                   j                         S r-   r<   r=   s    r   r?   z(ImmutableNBestSentencePieceText.<lambda>   r@   r#   c                 8    | j                   j                  |      S r-   r<   rB   s     r   r?   z(ImmutableNBestSentencePieceText.<lambda>   rD   r#   rE   rF   c                 T    t        j                  | t        j                                y r-   )r   (ImmutableNBestSentencePieceText_swiginit#new_ImmutableNBestSentencePieceTextrK   s    r   rL   z(ImmutableNBestSentencePieceText.__init__   s    ??nFxFxFz{r#   c                 ,    t        j                  |       S r-   )r   ,ImmutableNBestSentencePieceText__nbests_sizerK   s    r   _nbests_sizez,ImmutableNBestSentencePieceText._nbests_size   r   r#   c                 .    t        j                  | |      S r-   )r   'ImmutableNBestSentencePieceText__nbestsr   s     r   _nbestsz'ImmutableNBestSentencePieceText._nbests   s    EEdERRr#   c                 ,    t        j                  |       S r-   )r   1ImmutableNBestSentencePieceText_SerializeAsStringrK   s    r   r   z1ImmutableNBestSentencePieceText.SerializeAsString   s    OOPTUUr#   c                   (    e Zd Zd Zd Zd Zd ZeZy)BImmutableNBestSentencePieceText.ImmutableSentencePieceTextIteratorc                 P    || _         | j                   j                         | _        y r-   )r   r   r   r   s     r   rL   zKImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__init__   r   r#   c                     | j                   S r-   r   rK   s    r   r   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__len__   r   r#   c                    t        |t              r`t        | j                        D cg c]  }| j                  j                  |       c}|j                  |j                  |j                     S |dk  r|| j                  z   }|dk  s|| j                  k\  rt        d      | j                  j                  |      S c c}w )Nr   znbests index is out of range)
r   r   r   r   r   r   r   r   r   r   r   s      r   r   zNImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__   s    eU#16txxAA$**$$Q'A%++ejjY^YcYcBcd
d19$(("%19)9:
:zz!!%(( Br   c           
      z    dj                  | D cg c]  }dj                  t        |             c}      S c c}w Nr   znbests {{
{}}}r   r   s     r   rj   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__   r   r   Nr   r$   r#   r   "ImmutableSentencePieceTextIteratorr      r   r#   r   c                 ,    t         j                  |       S r-   )r   r   rK   s    r   nbestsz&ImmutableNBestSentencePieceText.nbests   s    ,OOPTUUr#   c                 D    | j                         |j                         k(  S r-   r   rl   s     r   rn   z&ImmutableNBestSentencePieceText.__eq__   r   r#   c                 4    t        | j                               S r-   r   rK   s    r   rr   z(ImmutableNBestSentencePieceText.__hash__   r   r#   c           
          dj                  | j                  D cg c]  }dj                  t        |             c}      S c c}w r   )r   r   rd   rq   r   s     r   rj   z'ImmutableNBestSentencePieceText.__str__   s3    YY$++NQ)00Q8NOONs   !AN)r   r   r6   r   r   r   r   rL   r   &delete_ImmutableNBestSentencePieceTextrt   r   r   r   r   r   rn   rr   rj   r$   r#   r   r   r      st    -/IOdeGH|%LLQSV , V VC,P Hr#   r   c                      e Zd Z ed d d      ZeZd Zej                  Z
d Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$d  Z%d! Z&d" Z'd# Z(d$ Z)d% Z*d& Z+d' Z,d( Z-d) Z.d* Z/d+ Z0d, Z1d- Z2d. Z3d/ Z4d0 Z5d1 Z6d2 Z7d3 Z8d4 Z9d5 Z:d6 Z;d7 Z<d8 Z=d9 Z>d: Z?d; Z@d< ZAd= ZBd>d>eCd?d?d?d?d?d@dAd@fdBZD	 	 	 	 	 	 	 	 	 dgdCZEdD ZFdE ZGdF ZHdG ZIdhdHZJdhdIZKdhdJZLdhdKZM	 	 	 	 	 	 didLZNdjdMZOdjdNZPdjdOZQdjdPZR	 	 	 	 	 	 	 	 	 dgdQZSdhdRZTdhdSZUdhdTZVdhdUZWeXd>fdVZYeXfdWZZeXfdXZ[dkdYZ\dkdZZ]dld[Z^dld\Z_djd]Zdjd^Z`d_ Zad` Zbda Zcdb Zddc Zedd Zfde ZgdhdfZhy>)mSentencePieceProcessorc                 6    | j                   j                         S r-   r<   r=   s    r   r?   zSentencePieceProcessor.<lambda>   r@   r#   c                 8    | j                   j                  |      S r-   r<   rB   s     r   r?   zSentencePieceProcessor.<lambda>   rD   r#   rE   rF   c                 T    t        j                  | t        j                                y r-   )r   SentencePieceProcessor_swiginitnew_SentencePieceProcessorrK   s    r   rL   zSentencePieceProcessor.__init__   s    66t^=f=f=hir#   c                 .    t        j                  | |      S r-   )r   .SentencePieceProcessor_LoadFromSerializedProtor   
serializeds     r   LoadFromSerializedProtoz.SentencePieceProcessor.LoadFromSerializedProto   s    LLTS]^^r#   c                 .    t        j                  | |      S r-   )r   ,SentencePieceProcessor_SetEncodeExtraOptionsr   extra_options     r   SetEncodeExtraOptionsz,SentencePieceProcessor.SetEncodeExtraOptions      JJ4Q]^^r#   c                 .    t        j                  | |      S r-   )r   ,SentencePieceProcessor_SetDecodeExtraOptionsr   s     r   SetDecodeExtraOptionsz,SentencePieceProcessor.SetDecodeExtraOptions  r   r#   c                 .    t        j                  | |      S r-   )r   $SentencePieceProcessor_SetVocabulary)r   valid_vocabs     r   SetVocabularyz$SentencePieceProcessor.SetVocabulary  s    BB4UUr#   c                 ,    t        j                  |       S r-   )r   &SentencePieceProcessor_ResetVocabularyrK   s    r   ResetVocabularyz&SentencePieceProcessor.ResetVocabulary  s    DDTJJr#   c                 0    t        j                  | ||      S r-   )r   %SentencePieceProcessor_LoadVocabulary)r   filename	thresholds      r   LoadVocabularyz%SentencePieceProcessor.LoadVocabulary  s    CCD(T]^^r#   c                 .    t        j                  | g| S r-   )r   'SentencePieceProcessor_CalculateEntropyr   argss     r   CalculateEntropyz'SentencePieceProcessor.CalculateEntropy  s    EEdRTRRr#   c                 ,    t        j                  |       S r-   )r   #SentencePieceProcessor_GetPieceSizerK   s    r   GetPieceSizez#SentencePieceProcessor.GetPieceSize  s    AA$GGr#   c                 .    t        j                  | |      S r-   )r    SentencePieceProcessor_PieceToIdr   re   s     r   	PieceToIdz SentencePieceProcessor.PieceToId  s    >>tUKKr#   c                 .    t        j                  | |      S r-   )r    SentencePieceProcessor_IdToPiecer   rf   s     r   	IdToPiecez SentencePieceProcessor.IdToPiece      >>tRHHr#   c                 .    t        j                  | |      S r-   )r   SentencePieceProcessor_GetScorer  s     r   GetScorezSentencePieceProcessor.GetScore      ==dBGGr#   c                 .    t        j                  | |      S r-   )r    SentencePieceProcessor_IsUnknownr  s     r   	IsUnknownz SentencePieceProcessor.IsUnknown   r  r#   c                 .    t        j                  | |      S r-   )r    SentencePieceProcessor_IsControlr  s     r   	IsControlz SentencePieceProcessor.IsControl#  r  r#   c                 .    t        j                  | |      S r-   )r   SentencePieceProcessor_IsUnusedr  s     r   IsUnusedzSentencePieceProcessor.IsUnused&  r  r#   c                 .    t        j                  | |      S r-   )r   SentencePieceProcessor_IsByter  s     r   IsBytezSentencePieceProcessor.IsByte)  s    ;;D"EEr#   c                 ,    t        j                  |       S r-   )r   SentencePieceProcessor_unk_idrK   s    r   unk_idzSentencePieceProcessor.unk_id,      ;;DAAr#   c                 ,    t        j                  |       S r-   )r   SentencePieceProcessor_bos_idrK   s    r   bos_idzSentencePieceProcessor.bos_id/  r#  r#   c                 ,    t        j                  |       S r-   )r   SentencePieceProcessor_eos_idrK   s    r   eos_idzSentencePieceProcessor.eos_id2  r#  r#   c                 ,    t        j                  |       S r-   )r   SentencePieceProcessor_pad_idrK   s    r   pad_idzSentencePieceProcessor.pad_id5  r#  r#   c                 ,    t        j                  |       S r-   )r   -SentencePieceProcessor_serialized_model_protorK   s    r   serialized_model_protoz-SentencePieceProcessor.serialized_model_proto8  s    KKDQQr#   c                 .    t        j                  | |      S r-   )r   #SentencePieceProcessor_LoadFromFiler   args     r   LoadFromFilez#SentencePieceProcessor.LoadFromFile;  s    AA$LLr#   c	                 <    t        j                  | ||||||||	      S r-   )r   #SentencePieceProcessor__EncodeAsIds	r   r   enable_sampling
nbest_sizealphaadd_bosadd_eosreverseemit_unk_pieces	            r   _EncodeAsIdsz#SentencePieceProcessor._EncodeAsIds>  s;    AA$o_ikpry  |C  EL  N\  ]  	]r#   c	                 <    t        j                  | ||||||||	      S r-   )r   &SentencePieceProcessor__EncodeAsPiecesr7  s	            r   _EncodeAsPiecesz&SentencePieceProcessor._EncodeAsPiecesA  s<    DDT4Q`blnsu|  F  HO  Q_  `  	`r#   c	                 <    t        j                  | ||||||||	      S r-   )r   /SentencePieceProcessor__EncodeAsSerializedProtor7  s	            r   _EncodeAsSerializedProtoz/SentencePieceProcessor._EncodeAsSerializedProtoD  sA    MMdTXZikuw|  F  HO  QX  Zh  i  	ir#   c	                 <    t        j                  | ||||||||	      S r-   )r   .SentencePieceProcessor__EncodeAsImmutableProtor7  s	            r   _EncodeAsImmutableProtoz.SentencePieceProcessor._EncodeAsImmutableProtoG  sA    LLTSWYhjtv{  ~E  GN  PW  Yg  h  	hr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   (SentencePieceProcessor__EncodeAsIdsBatch
r   insnum_threadsr8  r9  r:  r;  r<  r=  r>  s
             r   _EncodeAsIdsBatchz(SentencePieceProcessor._EncodeAsIdsBatchJ  sG    FFtSR]_npz  }B  DK  MT  V]  _m  n  	nr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   +SentencePieceProcessor__EncodeAsPiecesBatchrK  s
             r   _EncodeAsPiecesBatchz+SentencePieceProcessor._EncodeAsPiecesBatchM  sH    II$PSU`bqs}  @E  GN  PW  Y`  bp  q  	qr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   4SentencePieceProcessor__EncodeAsSerializedProtoBatchrK  s
             r   _EncodeAsSerializedProtoBatchz4SentencePieceProcessor._EncodeAsSerializedProtoBatchP  sM    RRSWY\^ikz  }G  IN  PW  Y`  bi  ky  z  	zr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   3SentencePieceProcessor__EncodeAsImmutableProtoBatchrK  s
             r   _EncodeAsImmutableProtoBatchz3SentencePieceProcessor._EncodeAsImmutableProtoBatchS  sM    QQRVX[]hjy  |F  HM  OV  X_  ah  jx  y  	yr#   c                 .    t        j                  | |      S r-   )r   !SentencePieceProcessor__DecodeIdsr   idss     r   
_DecodeIdsz!SentencePieceProcessor._DecodeIdsV  s    ??cJJr#   c                 .    t        j                  | |      S r-   )r   (SentencePieceProcessor__DecodeIdsAsBytesrZ  s     r   _DecodeIdsAsBytesz(SentencePieceProcessor._DecodeIdsAsBytesY  s    FFtSQQr#   c                 .    t        j                  | |      S r-   )r   $SentencePieceProcessor__DecodePiecesr   r   s     r   _DecodePiecesz$SentencePieceProcessor._DecodePieces\  s    BB4PPr#   c                 .    t        j                  | |      S r-   )r   2SentencePieceProcessor__DecodeIdsAsSerializedProtorZ  s     r   _DecodeIdsAsSerializedProtoz2SentencePieceProcessor._DecodeIdsAsSerializedProto_  s    PPQUWZ[[r#   c                 .    t        j                  | |      S r-   )r   5SentencePieceProcessor__DecodePiecesAsSerializedProtorb  s     r   _DecodePiecesAsSerializedProtoz5SentencePieceProcessor._DecodePiecesAsSerializedProtob  s    SSTXZ`aar#   c                 .    t        j                  | |      S r-   )r   1SentencePieceProcessor__DecodeIdsAsImmutableProtorZ  s     r   _DecodeIdsAsImmutableProtoz1SentencePieceProcessor._DecodeIdsAsImmutableProtoe  s    OOPTVYZZr#   c                 .    t        j                  | |      S r-   )r   4SentencePieceProcessor__DecodePiecesAsImmutableProtorb  s     r   _DecodePiecesAsImmutableProtoz4SentencePieceProcessor._DecodePiecesAsImmutableProtoh  s    RRSWY_``r#   c                 0    t        j                  | ||      S r-   )r   &SentencePieceProcessor__DecodeIdsBatchr   rL  rM  s      r   _DecodeIdsBatchz&SentencePieceProcessor._DecodeIdsBatchk  s    DDT3P[\\r#   c                 0    t        j                  | ||      S r-   )r   -SentencePieceProcessor__DecodeIdsAsBytesBatchrr  s      r   _DecodeIdsAsBytesBatchz-SentencePieceProcessor._DecodeIdsAsBytesBatchn  s    KKDRUWbccr#   c                 0    t        j                  | ||      S r-   )r   7SentencePieceProcessor__DecodeIdsAsSerializedProtoBatchrr  s      r    _DecodeIdsAsSerializedProtoBatchz7SentencePieceProcessor._DecodeIdsAsSerializedProtoBatchq  s    UUVZ\_almmr#   c                 0    t        j                  | ||      S r-   )r   6SentencePieceProcessor__DecodeIdsAsImmutableProtoBatchrr  s      r   _DecodeIdsAsImmutableProtoBatchz6SentencePieceProcessor._DecodeIdsAsImmutableProtoBatcht  s    TTUY[^`kllr#   c                 0    t        j                  | ||      S r-   )r   )SentencePieceProcessor__DecodePiecesBatchrr  s      r   _DecodePiecesBatchz)SentencePieceProcessor._DecodePiecesBatchw  s    GGcS^__r#   c                 0    t        j                  | ||      S r-   )r   :SentencePieceProcessor__DecodePiecesAsSerializedProtoBatchrr  s      r   #_DecodePiecesAsSerializedProtoBatchz:SentencePieceProcessor._DecodePiecesAsSerializedProtoBatchz  s    XXY]_bdoppr#   c                 0    t        j                  | ||      S r-   )r   9SentencePieceProcessor__DecodePiecesAsImmutableProtoBatchrr  s      r   "_DecodePiecesAsImmutableProtoBatchz9SentencePieceProcessor._DecodePiecesAsImmutableProtoBatch}  s    WWX\^acnoor#   c           	      8    t        j                  | ||||||      S r-   )r   (SentencePieceProcessor__NBestEncodeAsIdsr   r   r9  r;  r<  r=  r>  s          r   _NBestEncodeAsIdsz(SentencePieceProcessor._NBestEncodeAsIds  s.    FFtTS]_fhoqx  {I  J  	Jr#   c           	      8    t        j                  | ||||||      S r-   )r   +SentencePieceProcessor__NBestEncodeAsPiecesr  s          r   _NBestEncodeAsPiecesz+SentencePieceProcessor._NBestEncodeAsPieces  s/    II$PTV`bikrt{  ~L  M  	Mr#   c           	      8    t        j                  | ||||||      S r-   )r   4SentencePieceProcessor__NBestEncodeAsSerializedProtor  s          r   _NBestEncodeAsSerializedProtoz4SentencePieceProcessor._NBestEncodeAsSerializedProto  s4    RRSWY]_ikrt{  ~E  GU  V  	Vr#   c           	      8    t        j                  | ||||||      S r-   )r   3SentencePieceProcessor__NBestEncodeAsImmutableProtor  s          r   _NBestEncodeAsImmutableProtoz3SentencePieceProcessor._NBestEncodeAsImmutableProto  s4    QQRVX\^hjqsz  }D  FT  U  	Ur#   c
                 >    t        j                  | |||||||||	
      S r-   )r   1SentencePieceProcessor__SampleEncodeAndScoreAsIds
r   r   num_samplesr:  worinclude_bestr;  r<  r=  r>  s
             r   _SampleEncodeAndScoreAsIdsz1SentencePieceProcessor._SampleEncodeAndScoreAsIds  sI    OOPTVZ\ginps  vB  DK  MT  V]  _m  n  	nr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   4SentencePieceProcessor__SampleEncodeAndScoreAsPiecesr  s
             r   _SampleEncodeAndScoreAsPiecesz4SentencePieceProcessor._SampleEncodeAndScoreAsPieces  sI    RRSWY]_jlqsv  yE  GN  PW  Y`  bp  q  	qr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   =SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProtor  s
             r   &_SampleEncodeAndScoreAsSerializedProtoz=SentencePieceProcessor._SampleEncodeAndScoreAsSerializedProto  sI    [[\`bfhsuz|  BN  PW  Y`  bi  ky  z  	zr#   c
                 >    t        j                  | |||||||||	
      S r-   )r   <SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProtor  s
             r   %_SampleEncodeAndScoreAsImmutableProtoz<SentencePieceProcessor._SampleEncodeAndScoreAsImmutableProto  sI    ZZ[_aegrty{~  AM  OV  X_  ah  jx  y  	yr#   c                 .    t        j                  | |      S r-   )r   !SentencePieceProcessor__Normalizer   r   s     r   
_Normalizez!SentencePieceProcessor._Normalize  s    ??dKKr#   c                 .    t        j                  | |      S r-   )r   ,SentencePieceProcessor__NormalizeWithOffsetsr  s     r   _NormalizeWithOffsetsz,SentencePieceProcessor._NormalizeWithOffsets  s    JJ4QUVVr#   c                 0    t        j                  | ||      S r-   )r   (SentencePieceProcessor__CalculateEntropy)r   r   r:  s      r   _CalculateEntropyz(SentencePieceProcessor._CalculateEntropy  s    FFtTSXYYr#   c                 2    t        j                  | |||      S r-   )r   -SentencePieceProcessor__CalculateEntropyBatch)r   rL  r:  rM  s       r   _CalculateEntropyBatchz-SentencePieceProcessor._CalculateEntropyBatch  s    KKDRUW\^ijjr#   c                 .    t        j                  | |      S r-   )r   .SentencePieceProcessor__OverrideNormalizerSpecr  s     r   _OverrideNormalizerSpecz.SentencePieceProcessor._OverrideNormalizerSpec  s    LLTSWXXr#   NFg?c                     t        |        || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _	        |s|r| j                  ||       yy)a  Initialzie sentencepieceProcessor.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
          reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                      from the all hypothesis (lattice) using
                      forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and dropout probability of
               merge operations for BPE-dropout.
        num_threads: number of threads in batch processing (Default = -1, auto-detected)
      )
model_filemodel_protoN)$_sentencepiece_processor_init_native	_out_type_add_bos_add_eos_reverse_emit_unk_piece_enable_sampling_nbest_size_alpha_num_threadsLoad)r   r  r  out_typer;  r<  r=  r>  r8  r9  r:  rM  s               r   InitzSentencePieceProcessor.Init  sj    D +40dndmdmdm+d-d#ddk%d	{		Z[	A #r#   c                    || j                   }|| j                  }|| j                  }|| j                  }|| j                  }|| j
                  }|| j                  }|	| j                  }	|
| j                  }
|dk(  r||dk(  s|dk(  s|	t        d      |
t        |
      t        urt        d      t        |      t        u r|t        u r| j                  ||
|||	||||	      S |t        u r| j                  ||
|||	||||	      S |dk(  s|dk(  r| j!                  ||
|||	||||	      S |dk(  r| j#                  ||
|||	||||	      S |t        u r| j%                  ||||	||||      S |t        u r| j'                  ||||	||||      S |dk(  s|dk(  r| j)                  ||||	||||      S |dk(  r| j+                  ||||	||||      S t        d	j-                  |            )
a~  Encode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
                 reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                    from the all hypothesis (lattice) using
                    forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and merge probability for
               BPE-dropout (probablity 'p' in BPE-dropout paper).
        num_threads: the number of threads used in the batch processing (Default = -1).
      Tr   r   a  When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. when "nbest_size = -1" , this method samples from all candidates on the lattice instead of nbest segmentations.num_threads must be intserialized_protor   immutable_protozunknown out_type={})r  r  r  r  r  r  r  r  r  RuntimeErrorr   intlistrN  rq   rQ  rT  rW  r?  rB  rE  rH  rd   )r   inputr  r;  r<  r=  r>  r8  r9  r:  rM  s              r   EncodezSentencePieceProcessor.Encode  s   @ 
	>>	--	--	--		--		 //		%%
			''	D	 j&8J!O&0Ao.
 	
 
	[ 1 <455	e	s?''{OZ(-w.Z Zs?**5+PZ+0'7G^] ]))X-@33E;Yc497GWVdf f((225+Xb38'7GUce e 
S  !&'>S 	S	S##E?J$)7GWnV 	V	'	'8w+>,,UOZ-2GWg~_ 	_	&	&++E?J,17GWn^ 	^ .55h?@@r#   c                 4     | j                   d|t        d|S Nr  r  r$   r  rq   r   r  kwargss      r   EncodeAsPiecesz%SentencePieceProcessor.EncodeAsPieces5      T[[=us=f==r#   c                 4     | j                   d|t        d|S r  r  r  r  s      r   EncodeAsIdsz"SentencePieceProcessor.EncodeAsIds9  r  r#   c                 ,     | j                   d|dd|S )Nr  r  r$   r  r  s      r   EncodeAsSerializedProtoz.SentencePieceProcessor.EncodeAsSerializedProto=  s    T[[Lu/ALVLLr#   c                 ,     | j                   d|dd|S )Nr  r  r$   r  r  s      r   EncodeAsImmutableProtoz-SentencePieceProcessor.EncodeAsImmutableProtoA  s    T[[Ku/@KFKKr#   c           	      :     | j                   d|||t        dd|S NTr  r9  r:  r  r8  r$   r  r   r  r9  r:  r  s        r   SampleEncodeAsPiecesz+SentencePieceProcessor.SampleEncodeAsPiecesE  1    T[[ Gu5"%tG?EG Gr#   c           	      :     | j                   d|||t        dd|S r  r  r  s        r   SampleEncodeAsIdsz(SentencePieceProcessor.SampleEncodeAsIdsJ  r  r#   c           	      2     | j                   d|||ddd|S )Nr  Tr  r$   r  r  s        r   SampleEncodeAsSerializedProtoz4SentencePieceProcessor.SampleEncodeAsSerializedProtoO  s1    T[[ Vu5"4dVNTV Vr#   c           	      2     | j                   d|||ddd|S )Nr  Tr  r$   r  r  s        r   SampleEncodeAsImmutableProtoz3SentencePieceProcessor.SampleEncodeAsImmutableProtoT  s1    T[[ Uu5"3TUMSU Ur#   c                 D     j                    j                   j                   j                   j                   j
                  dk  rd fd}t        |      t        u r|D 	cg c]
  }	 ||	       c}	S  ||      S c c}	w )a  NBestEncode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: nbest size
      r   r   c                    t         u rj                  |       S t        u rj                  |       S dk(  sdk(  rj	                  |       S dk(  rj                  |       S t        d      )Nr  r   r  zunknown out_type)r  r  rq   r  r  r  r  )r   r;  r<  r>  r9  r  r=  r   s    r   _encodez3SentencePieceProcessor.NBestEncode.<locals>._encode}  s    s?''j(/'>S Ss?**4+2GWnV V))X-@33D*4;Wg~_ _((2243:GWn^ ^ -..r#   )r  r  r  r  r  r  r   r  )
r   r  r  r;  r<  r=  r>  r9  r  ns
   ` ``````  r   NBestEncodez"SentencePieceProcessor.NBestEncodeY  s    ( 
	>>	--	--	--		--		%%
	q
/ /  
e	$)*q
**U^ +s   Bc                 6     | j                   d||t        d|S Nr  r9  r  r$   )r  rq   r   r  r9  r  s       r   NBestEncodeAsPiecesz*SentencePieceProcessor.NBestEncodeAsPieces  +    T 6Ej'*6.46 6r#   c                 6     | j                   d||t        d|S r  )r  r  r  s       r   NBestEncodeAsIdsz'SentencePieceProcessor.NBestEncodeAsIds  r  r#   c                 .     | j                   d||dd|S )Nr  r  r$   r  r  s       r   NBestEncodeAsSerializedProtoz3SentencePieceProcessor.NBestEncodeAsSerializedProto  s/    T EEj'9E=CE Er#   c                 .     | j                   d||dd|S )Nr  r  r$   r  r  s       r   NBestEncodeAsImmutableProtoz2SentencePieceProcessor.NBestEncodeAsImmutableProto  s/    T DEj'8D<BD Dr#   c           
         	
  j                    j                   j                   j                   j                  dd	d	
d
dk  rt        d      
r	st        d      
 	f
d}t        |      t        u r|D cg c]
  } ||       c}S  ||      S c c}w )a   SampleEncodeAndScore text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        num_samples: How many samples to return (Default = 1)
        alpha: inverse temperature for sampling
        wor: whether to sample without replacement (Default = false)
        include_best: whether to include the best tokenization, requires wor=True (Default = false)
      r   g      ?Fr   znum_examples must be positivez8When include_best is True, We must specify "wor = True".c                    
 t         u r	j                  | 
	      S t        u r	j                  | 
	      S dk(  sdk(  r	j	                  | 
	      S dk(  r	j                  | 
	      S t        d      )Nr  r   r  zunknown output type)r  r  rq   r  r  r  r  )r   r;  r<  r:  r>  r  r  r  r=  r   r  s    r   r  z<SentencePieceProcessor.SampleEncodeAndScore.<locals>._encode  s    s?00{E3P\18'7N\ \s?33D+ucS_4;Wg~_ _ ))X-@<<T;PUWZ\h=DgwXfh h ((;;D+uVY[g<CWgWeg g 011r#   )r  r  r  r  r  r  r   r  )r   r  r  r;  r<  r=  r>  r  r:  r  r  r  r  s   ` `````````  r   SampleEncodeAndScorez+SentencePieceProcessor.SampleEncodeAndScore  s    4 
	>>	--	--	--		--								:;;	cUVV2 2& 
e	$)*q
**U^ +s   #B=c                 8     | j                   d|||t        d|S Nr  r  r:  r  r$   )r  rq   r   r  r  r:  r  s        r   SampleEncodeAndScoreAsPiecesz3SentencePieceProcessor.SampleEncodeAndScoreAsPieces  .    &T&& ?USX03?7=? ?r#   c                 8     | j                   d|||t        d|S r  )r  r  r  s        r   SampleEncodeAndScoreAsIdsz0SentencePieceProcessor.SampleEncodeAndScoreAsIds  r  r#   c                 0     | j                   d|||dd|S )Nr  r   r$   r  r  s        r   %SampleEncodeAndScoreAsSerializedProtoz<SentencePieceProcessor.SampleEncodeAndScoreAsSerializedProto  s2    &T&& NUSX0BNFLN Nr#   c                 0     | j                   d|||dd|S )Nr  r   r$   r  r  s        r   $SampleEncodeAndScoreAsImmutableProtoz;SentencePieceProcessor.SampleEncodeAndScoreAsImmutableProto  s2    &T&& MUSX0AMEKM Mr#   c                 	   || j                   }|t        |      t        urt        d      |sy|t        u r&t        |      t        u r| j                  |g      S t        |      t        u r| j                  |g      S t        |      t        u rt        |      dk(  st        |d         t        u r| j                  |      S t        |d         t        u r| j                  |      S t        |d         t        u rct        |d         dk(  st        |d   d         t        u r| j                  ||      S t        |d   d         t        u r| j                  ||      S |t        u r&t        |      t        u r| j                  |g      S t        |      t        u r| j                  |g      S t        |      t        u rt        |      dk(  st        |d         t        u r| j                  |      S t        |d         t        u r| j                  |      S t        |d         t        u rct        |d         dk(  st        |d   d         t        u r| j                  ||      S t        |d   d         t        u r| j                  ||      S |dk(  r&t        |      t        u r| j                  |g      S t        |      t        u r| j                  |g      S t        |      t        u rt        |      dk(  st        |d         t        u r| j                  |      S t        |d         t        u r| j                  |      S t        |d         t        u rct        |d         dk(  st        |d   d         t        u r| j!                  ||      S t        |d   d         t        u r| j#                  ||      S |dk(  r&t        |      t        u r| j%                  |g      S t        |      t        u r| j'                  |g      S t        |      t        u rt        |      dk(  st        |d         t        u r| j%                  |      S t        |d         t        u r| j'                  |      S t        |d         t        u rct        |d         dk(  st        |d   d         t        u r| j)                  ||      S t        |d   d         t        u r| j+                  ||      S t        d      )zDecode processed id or token sequences.

      Args:
        out_type: output type. str, bytes or 'serialized_proto' or 'immutable_proto' (Default = str)
        num_threads: the number of threads used in the batch processing (Default = -1).
      r  r   r   r  r  zunknown output or input type)r  r   r  r  rq   r\  rc  r  r   rs  r  bytesr_  rv  rf  ri  ry  r  rl  ro  r|  r  )r   r  r  rM  s       r   DecodezSentencePieceProcessor.Decode  sz    
	''		[ 1 <455	S;#%)
);###UG,
,;$Z1_U1X# 5??5))%(^s"%%e,,%(^t#58}!T%(1+%6#%=((<<E!HQK C'++E;??	U	;#''0
0;###UG,
,;$Z1_U1X# 5))%00%(^s"%%e,,%(^t#58}!T%(1+%6#%=//{CCE!HQK C'++E;??	'	';#115':
:;#44eW=
=;$Z1_U1X# 533E::%(^s"66u==%(^t#58}!T%(1+%6#%=99%MME!HQK C'<<UKPP 
&	&;#00%9
9;#33UG<
<;$Z1_U1X# 522599%(^s"55e<<%(^t#58}!T%(1+%6#%=88LLE!HQK C';;E;OO 788r#   c                 ,     | j                   d||d|S r  r  r   r  r  r  s       r   DecodePiecesz#SentencePieceProcessor.DecodePiecesg      T[[BuxB6BBr#   c                 ,     | j                   d||d|S r  r  r  s       r   	DecodeIdsz SentencePieceProcessor.DecodeIdsk  r  r#   c                 ,     | j                   d||d|S r  r  r  s       r   DecodePiecesAsSerializedProtoz4SentencePieceProcessor.DecodePiecesAsSerializedProtoo  r  r#   c                 ,     | j                   d||d|S r  r  r  s       r   DecodeIdsAsSerializedProtoz1SentencePieceProcessor.DecodeIdsAsSerializedProtos  r  r#   c                 ,     | j                   d||d|S r  r  r  s       r   DecodePiecesAsImmutableProtoz3SentencePieceProcessor.DecodePiecesAsImmutableProtow  r  r#   c                 ,     | j                   d||d|S r  r  r  s       r   DecodeIdsAsImmutableProtoz0SentencePieceProcessor.DecodeIdsAsImmutableProto{  r  r#   c                     t        |      t        u r?|| j                  }|t        |      t        urt	        d      | j                  |||      S | j                  ||      S )zCalculate sentence entropyr  )r   r  r  r  r  r  r  )r   r  r:  rM  s       r   r  z'SentencePieceProcessor.CalculateEntropy  se    	e	))+${"33">67
7**5%EE##E511r#   c                 z      fd}t        |      t        u r|D cg c]
  } ||       c}S  ||      S c c}w )Nc                 L    rj                  |       S j                  |       S r-   r  r  r   r   with_offsetss    r   
_normalizez4SentencePieceProcessor.Normalize.<locals>._normalize  &    ++D1
1t$$r#   r   r  r   r  r"  r#  r>   s   ` `  r   	Normalizez SentencePieceProcessor.Normalize  ;    %
 
e	',-!
1-- .   8c                 t    i }|j                         D ]  \  }}t        |      ||<    | j                  |      S r-   )itemsrq   r  )r   r  
new_kwargskeyr    s        r   OverrideNormalizerSpecz-SentencePieceProcessor.OverrideNormalizerSpec  s>    j %*#ue*
3%))*55r#   c                 "    | j                         S r-   r  rK   s    r   
piece_sizez!SentencePieceProcessor.piece_size        r#   c                 "    | j                         S r-   r0  rK   s    r   
vocab_sizez!SentencePieceProcessor.vocab_size  r2  r#   c                 "    | j                         S r-   r/  rK   s    r   __getstate__z#SentencePieceProcessor.__getstate__      ((**r#   c                 F    | j                          | j                  |       y r-   rL   r   r   r/  s     r   __setstate__z#SentencePieceProcessor.__setstate__      
mmo
""#9:r#   c                 "    | j                         S r-   r0  rK   s    r   r   zSentencePieceProcessor.__len__  r2  r#   c                 $    | j                  |      S r-   )r
  r	  s     r   r   z"SentencePieceProcessor.__getitem__  s    ^^E""r#   c                 h    |r|rt        d      |r| j                  |      S | j                  |      S )a  Overwride SentencePieceProcessor.Load to support both model_file and model_proto.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto. Either `model_file`
          or `model_proto` must be set.
      z-model_file and model_proto must be exclusive.)r  r   r4  )r   r  r  s      r   r  zSentencePieceProcessor.Load  s:     
JKK	++K88z**r#   )	NNNNNNNNNNN)NNNNNNr-   )r  )r  )ir   r   r6   r   r   r   r   rL   r   delete_SentencePieceProcessorrt   r   r   r   r   r   r   r  r  r
  r  r  r  r  r  r  r"  r&  r)  r,  r/  r4  r?  rB  rE  rH  rN  rQ  rT  rW  r\  r_  rc  rf  ri  rl  ro  rs  rv  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  rq   r  r  r  r  r  r  r  r'  r.  r1  r4  r7  r<  r   r   r  r$   r#   r   r   r      s   -/IOdeGHj%CC___VK_SHLIHIIHFBBBBRM]`ihnqzyKRQ\b[a]dnm`qpJMVUnqzyLWZkY !"-Bd "#[|>>MLG
G
V
U "   #'#7t6
6
E
D '+%)%)%),0)-#'!%*.JZ?
?
N
M
 &)d \~ ,/ C ), CCCCC	26!!+;
!#+r#   r   c                 ,    t        j                  |       S r-   )r   SetRandomGeneratorSeed)seeds    r   rD  rD    s    0066r#   c                 ,    t        j                  |       S r-   )r   SetMinLogLevel)rC   s    r   rG  rG    s    ((++r#   c                       e Zd Z ed d d      Zd ZeZed        Z	ed        Z
ed        Zed	        Zed
        Zedd       Zedd       Zy)SentencePieceTrainerc                 6    | j                   j                         S r-   r<   r=   s    r   r?   zSentencePieceTrainer.<lambda>  r@   r#   c                 8    | j                   j                  |      S r-   r<   rB   s     r   r?   zSentencePieceTrainer.<lambda>  rD   r#   rE   rF   c                     t        d      )NzNo constructor defined)r   )r   r  r  s      r   rL   zSentencePieceTrainer.__init__  s    566r#   c                 ,    t        j                  |       S r-   )r   %SentencePieceTrainer__TrainFromString)r3  s    r   _TrainFromStringz%SentencePieceTrainer._TrainFromString  s    CCCHHr#   c                 ,    t        j                  |       S r-   )r   "SentencePieceTrainer__TrainFromMapr  s    r   _TrainFromMapz"SentencePieceTrainer._TrainFromMap  s    @@FFr#   c                 .    t        j                  | |      S r-   )r   #SentencePieceTrainer__TrainFromMap2r  iters     r   _TrainFromMap2z#SentencePieceTrainer._TrainFromMap2      AA$MMr#   c                 ,    t        j                  |       S r-   )r   #SentencePieceTrainer__TrainFromMap3rR  s    r   _TrainFromMap3z#SentencePieceTrainer._TrainFromMap3  s    AA$GGr#   c                 .    t        j                  | |      S r-   )r   #SentencePieceTrainer__TrainFromMap4rV  s     r   _TrainFromMap4z#SentencePieceTrainer._TrainFromMap4  rY  r#   Nc                    | &t        |       t        u rt        j                  |       S d }d}d}i }|j	                         D ]  \  }}|dv r|}|dv r|} ||      ||<     |r@|rt        j                  ||      }nt        j                  |      }|j                  |       y|rt        j                  ||      S t        j                  |      S )zDTrain Sentencepiece model. Accept both kwargs and legacy string arg.Nc                 8   t        |       t        u rzt        j                  d   dk(  rt	               }n
t               }t        j                  |d      }|j                  | D cg c]  }t        |       c}       |j                         S t        |       S c c}w )zEncode value to CSV..r      r   )lineterminator)r   r  sysr   StringIOBytesIOcsvwriterwriterowrq   getvalue)r    frh  rC   s       r   r  z,SentencePieceTrainer._Train.<locals>._encode  su    ;$a A%
A	A::a3&
//51a3q61
2
U
 2s   #B)sentence_iteratorsentence_reader)model_writer)
r   rq   rI  rO  r+  r_  r\  writerX  rS  )	r3  r  r  rl  rn  r,  r-  r    r  s	            r   _TrainzSentencePieceTrainer._Train  s     
T#Y#-#44S99 lj +*#u::#
$$,#EN*S/+ 
,;;J;LN+ -;;JG+;'  %44ZARS
S%33J?
?r#   c                 t    t        |      5  t        j                  dd| i| d d d        y # 1 sw Y   y xY w)N)ostreamr3  r$   )
_LogStreamrI  rp  )r3  	logstreamr  s      r   TrainzSentencePieceTrainer.Train  s5    i( 7##66v67 7 7s   .7r-   rA  )r   r   r6   r   r   rL   r   r   staticmethodrO  rS  rX  r\  r_  rp  ru  r$   r#   r   rI  rI    s    -/IOdeG7HI I G G N N H H N N * *X 7 7r#   rI  c                       e Zd Z ed d d      ZeZd Zej                  Z
d Zd Zd Zd	 Zd
 Zd Zd Zd Z	 	 	 	 	 	 	 ddZddZd Zd Zy)SentencePieceNormalizerc                 6    | j                   j                         S r-   r<   r=   s    r   r?   z SentencePieceNormalizer.<lambda>  r@   r#   c                 8    | j                   j                  |      S r-   r<   rB   s     r   r?   z SentencePieceNormalizer.<lambda>  rD   r#   rE   rF   c                 T    t        j                  | t        j                                y r-   )r    SentencePieceNormalizer_swiginitnew_SentencePieceNormalizerrK   s    r   rL   z SentencePieceNormalizer.__init__   s    77n>h>h>jkr#   c                 .    t        j                  | |      S r-   )r   /SentencePieceNormalizer_LoadFromSerializedProtor   s     r   r   z/SentencePieceNormalizer.LoadFromSerializedProto$  s    MMdT^__r#   c                 .    t        j                  | |      S r-   )r   'SentencePieceNormalizer_LoadFromRuleTSV)r   r   s     r   LoadFromRuleTSVz'SentencePieceNormalizer.LoadFromRuleTSV'  s    EEdHUUr#   c                 .    t        j                  | |      S r-   )r   (SentencePieceNormalizer_LoadFromRuleName)r   r   s     r   LoadFromRuleNamez(SentencePieceNormalizer.LoadFromRuleName*  s    FFtTRRr#   c                 ,    t        j                  |       S r-   )r   .SentencePieceNormalizer_serialized_model_protorK   s    r   r/  z.SentencePieceNormalizer.serialized_model_proto-  s    LLTRRr#   c                 .    t        j                  | |      S r-   )r   $SentencePieceNormalizer_LoadFromFiler2  s     r   r4  z$SentencePieceNormalizer.LoadFromFile0  s    BB4MMr#   c                 .    t        j                  | |      S r-   )r   "SentencePieceNormalizer__Normalizer  s     r   r  z"SentencePieceNormalizer._Normalize3  s    @@tLLr#   c                 .    t        j                  | |      S r-   )r   -SentencePieceNormalizer__NormalizeWithOffsetsr  s     r   r  z-SentencePieceNormalizer._NormalizeWithOffsets6  s    KKDRVWWr#   c                 0    t        j                  | ||      S r-   )r   &SentencePieceNormalizer__SetProtoField)r   r   r    s      r   _SetProtoFieldz&SentencePieceNormalizer._SetProtoField9  s    DDT4QVWWr#   Nc                 B   t        |        |r| j                  |      }nG|r| j                  |      }n3|r| j                  |      }n|r| j	                  |      }nt        d      |r7| j                  d|       | j                  d|       | j                  d|       yy)a  Initialzie sentencePieceNormalizer.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        rule_tsv: The normalization rule file in TSV format.
        rule_name: Pre-defined normalization name.
        add_dummy_prefix: add dummy prefix.
        escape_whitespaces: escape whitespaces.
        remove_extra_whitespaces: remove extra whitespaces.
      zno model is specifiedadd_dummy_prefixescape_whitespacesremove_extra_whitespacesN)%_sentencepiece_normalizer_init_nativer4  r   r  r  r  r  )	r   r  r  rule_tsv	rule_namer  r  r  statuss	            r   r  zSentencePieceNormalizer.Init<  s    ( ,D1	"":.--k:%%h/&&y1233	.0@A02DE68PQ 
r#   c                 z      fd}t        |      t        u r|D cg c]
  } ||       c}S  ||      S c c}w )Nc                 L    rj                  |       S j                  |       S r-   r   r!  s    r   r#  z5SentencePieceNormalizer.Normalize.<locals>._normalizec  r$  r#   r%  r&  s   ` `  r   r'  z!SentencePieceNormalizer.Normalizeb  r(  r)  c                 "    | j                         S r-   r6  rK   s    r   r7  z$SentencePieceNormalizer.__getstate__m  r8  r#   c                 F    | j                          | j                  |       y r-   r:  r;  s     r   r<  z$SentencePieceNormalizer.__setstate__q  r=  r#   )NNNNFFFr-   )r   r   r6   r   r   r   r   rL   r   delete_SentencePieceNormalizerrt   r   r  r  r/  r4  r  r  r  r  r'  r7  r<  r$   r#   r   rx  rx    s    -/IOdeGHl%DD`VSSNMXX # %&+$RL+;r#   rx  c                 ,    t        j                  |       S r-   )r   
SetDataDir)data_dirs    r   r  r  y  s    $$X..r#   )re  )rf  c                 8   i }| j                   j                         D ]V  \  }}t        j                  d|      st        j                  dd|      j                         j                  dd      }|||<   X |j                         D ]  \  }}t        | ||        y)z1Added snake_cased method from CammelCased method.z^[A-Z]+z(?<!^)(?=[A-Z])_n_bestnbestN)r/   r+  rematchsublowerreplacesetattr)	classname	snake_mapkrC   snakes        r   _add_snake_caser    s     )  &&( da	xx
Aff'' : i	
 oo daIq!r#   c                 P    t        | |d      fdfd}t        | ||       y)z4Enables batch request for the method classname.name.Nc                 ~    t        |      t        u r#|dk  s|| j                         k\  rt        d       | |      S )Nr   zpiece id is out of range.)r   r  r1  r   )rC   r  funcs     r   _funcz_batchnize.<locals>._func  s9    Aw#~1q5A$72331:r#   c                 p    t        |      t        u r|D cg c]  } | |       c}S  | |      S c c}w r-   r%  )r   r3  r  r  s      r   _batched_funcz!_batchnize.<locals>._batched_func  s8    CyD&)*eD!n**4 +s   3)r   r  )r  r   r  r  r  s      @@r   
_batchnizer    s)    	D$	'$
 
)T=)r#   rL   )r
  r  r  r  r  r  r  )__version__sentencepiecepackage_datac                        e Zd ZddZd Zd Zy)rs  Nc                 r    || _         | j                   $t        j                  j                         | _        y y r-   )rr  rd  stderrfilenoorig_stream_fileno)r   rr  s     r   rL   z_LogStream.__init__  s-    DL|| #

 1 1 3d  r#   c                     | j                   ]t        j                  | j                        | _        t        j
                  | j                   j                         | j                         y y r-   )rr  osdupr  orig_stream_dupdup2r  rK   s    r   	__enter__z_LogStream.__enter__  sI    ||VVD$;$;<dggdll!!#T%<%<=  r#   c                 "   | j                   t        j                  | j                         t        j                  | j
                  | j                         t        j                  | j
                         | j                   j                          y y r-   )rr  r  closer  r  r  )r   r   r    	tracebacks       r   __exit__z_LogStream.__exit__  s_    ||hht&&'ggd""D$;$;<hht##$
ll	  r#   r-   )r   r   r6   rL   r  r  r$   r#   r   rs  rs    s    4
>
r#   rs  )>rd  r   _swig_python_version_info__package__r   r   r   builtinsr   ImportErrorr   r%   r*   r3   r   r5   objectr:   >ImmutableSentencePieceText_ImmutableSentencePiece_swigregisterrx   'ImmutableSentencePieceText_swigregisterr   ,ImmutableNBestSentencePieceText_swigregisterr   #SentencePieceProcessor_swigregisterrD  rG  rI  !SentencePieceTrainer_swigregisterrx  $SentencePieceNormalizer_swigregisterr  r  rg  r  importlib.resources	importlibiore  rf  r  r  rL   r  r  r  r  r  Tokenizer  
Detokenizemset_random_generator_seedset_min_log_level_versionr  pathr   rq   	resourcesfilesrs  r$   r#   r   <module>r     s8   :#/ "[
L$ L
3 3n N M MN  AD DP 7 6 67Q R4f 4p < ; ;<[ \J+V J+\ 3 2 23I J7,K76 K7^ 1 0 01E FW;f W;v 4 3 34K L/ 
 
 
 	   
*" (>'F'F $(?(H(H % 
,B,G,G H -D-I-I J"8"?"?  $:$A$A  !
 (A #Q'	( & ' $ % ' (2 "  ! 
277<<I//55oFGX Y Y%  s   G2 2	G?>G?