
    9i                    j    d Z ddlmZ ddlmZ ddlmZ 	 ddlZdZ G d d	e      Z
y# e	$ r dZY w xY w)
zNLTK text splitter.    )annotations)Any)TextSplitterNTFc                  J     e Zd ZdZ	 	 ddd	 	 	 	 	 	 	 	 	 d fdZddZ xZS )	NLTKTextSplitterz"Splitting text using NLTK package.F)use_span_tokenizec               r   t        |   di | || _        || _        || _        | j                  r| j                  rd}t        |      t        sd}t        |      | j                  r/t        j                  j                  | j                        | _        yt        j                  j                  | _        y)zInitialize the NLTK splitter.z6When use_span_tokenize is True, separator should be ''zANLTK is not installed, please install it with `pip install nltk`.N )super__init__
_separator	_language_use_span_tokenize
ValueError	_HAS_NLTKImportErrornltktokenize_get_punkt_tokenizer
_tokenizersent_tokenize)self	separatorlanguager   kwargsmsg	__class__s         g/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/langchain_text_splitters/nltk.pyr   zNLTKTextSplitter.__init__   s     	"6"#!"3""tJCS/!UCc"""""mm@@PDO"mm99DO    c                j   | j                   rot        | j                  j                  |            }g }t	        |      D ]:  \  }\  }}|dkD  r||dz
     d   }||| ||| z   }n||| }|j                  |       < n| j                  || j                        }| j                  || j                        S )z&Split incoming text and return chunks.r      )r   )	r   listr   span_tokenize	enumerateappendr   _merge_splitsr   )	r   textspanssplitsistartendprev_endsentences	            r   
split_textzNLTKTextSplitter.split_text,   s     ""66t<=EF#,U#3 (<E3q5$QU|AH#HU3d5oEH#E#Hh'( __TDNN_CF!!&$//::r   )z

english)
r   strr   r1   r   boolr   r   returnNone)r'   r1   r3   z	list[str])__name__
__module____qualname____doc__r   r/   __classcell__)r   s   @r   r   r      sR    ,  !:
 #(:: :
  : : 
:0;r   r   )r8   
__future__r   typingr   langchain_text_splitters.baser   r   r   r   r   r
   r   r   <module>r=      s@     "  6I
*;| *;	  Is   ( 22