
    +Si<=                        d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
mZmZ ddlmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZmZmZmZm Z  dd	l!m"Z"m#Z#  ejH                  e%      Z& G d
 de      Z' G d d      Z( G d de(      Z) G d de(      Z* G d de*      Z+ G d de(      Z, G d de,      Z- G d de)      Z. G d de,      Z/ G d de)      Z0 G d de,      Z1 G d  d!      Z2 G d" d#ee         Z3y)$zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)IterableIteratorMutableMapping)AnyBinaryIOClassVarTextIOcast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     X/var/www/html/leadgen/airagagent/rag_env/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   &   s    r   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eez  e
z  ddfdZddZd
edee
   fdZy)CMapBaser   kwargsreturnNc                 .    |j                         | _        y N)copyattrsselfr"   s     r   __init__zCMapBase.__init__-   s    28++-
r   c                 @    | j                   j                  dd      dk7  S )NWModer   )r'   getr)   s    r   is_verticalzCMapBase.is_vertical0   s    zz~~gq)Q..r   kvc                 "    || j                   |<   y r%   )r'   )r)   r0   r1   s      r   set_attrzCMapBase.set_attr3   s    

1r   codecidc                      y r%   r   )r)   r4   r5   s      r   add_code2cidzCMapBase.add_code2cid6       r   c                      y r%   r   )r)   r5   r4   s      r   add_cid2unichrzCMapBase.add_cid2unichr9   r8   r   c                      y r%   r   )r)   cmaps     r   use_cmapzCMapBase.use_cmap<   r8   r   c                     t         r%   )NotImplementedError)r)   r4   s     r   decodezCMapBase.decode?   s    !!r   )r<   r!   r#   N)r   r   r   debugobjectr*   boolr/   strr3   intr7   r   bytesr:   r=   r   r@   r   r   r   r!   r!   *   s    E@ @D @/T /# & T  3 4 # Y->-D  "5 "Xc] "r   r!   c            	           e Zd Zdeez  ddfdZdefdZdeddfdZde	de
e   fd	Zej                  dd
fdedeeef   dz  deedf   ddfdZy)CMapr"   r#   Nc                 >    t        j                  | fi | i | _        y r%   )r!   r*   code2cidr(   s     r   r*   zCMap.__init__D   s    $)&)+-r   c                 V    dj                  | j                  j                  d            S )Nz
<CMap: {}>CMapNameformatr'   r-   r.   s    r   __repr__zCMap.__repr__H   s     ""4::>>*#=>>r   r<   c                     t        |t              sJ t        t        |                   dt        t
        t        f   dt        t
        t        f   dd ffd | j                  |j                         y )Ndstsrcr#   c                     |j                         D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r%   )items
isinstancedict)rQ   rR   r0   r1   dr&   s        r   r&   zCMap.use_cmap.<locals>.copyN   sF    		 1a&+-ACFAJCFr   )rU   rH   rD   typerV   rE   rB   rJ   )r)   r<   r&   s     @r   r=   zCMap.use_cmapK   s`    $%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r   r4   c              #     K   t         j                  d| |       | j                  }t        |      D ]V  }||v rD||   }t	        |t
              r| | j                  }-t        t        t
        t        f   |      }K| j                  }X y w)Nzdecode: %r, %r)	logrA   rJ   iterrU   rE   r
   rV   rB   )r)   r4   rW   ixs        r   r@   zCMap.decodeY   sy     		"D$/MMd 		"AAvaDa%GAT#v+.2AMM		"s   B	Br   outrJ   .c           	      $   || j                   }d}t        |j                               D ]d  \  }}g ||}t        |t              r|j                  d|d| d       6| j                  |t        t        t        t        f   |      |       f y )Nr   zcode z = cid 
)r^   rJ   r4   )
rJ   sortedrT   rU   rE   writedumpr
   rV   rB   )r)   r^   rJ   r4   r0   r1   cs          r   rc   z	CMap.dumpg   s     }}HD8>>+, 	PDAq$

A!S!		E!gaS34		cDc6k1BA,FQ	O	Pr   )r   r   r   rD   rE   r*   rO   r!   r=   rF   r   r@   sysstdoutr	   rV   rB   tuplerc   r   r   r   rH   rH   C   s    .s .t .?# ?+X +$ +"5 "Xc] "  jj-1 "	PP sF{#d*P CHo	P
 
Pr   rH   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapr4   r#   .c                 d    t        |      dz  }|r t        j                  d| d|d |dz         S y)N   >Hr   lenstructunpackr)   r4   ns      r   r@   zIdentityCMap.decodey   s8    IN==1QCq4!a%=99r   Nr   r   r   rF   rg   rE   r@   r   r   r   ri   ri   x       5 U38_ r   ri   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapByter4   r#   .c                 X    t        |      }|rt        j                  d| d|d |       S y)Nrl   Br   rn   rr   s      r   r@   zIdentityCMapByte.decode   s0    I==1QCq4844r   Nrt   r   r   r   rw   rw      ru   r   rw   c                   f    e Zd Zdeez  ddfdZdefdZdedefdZej                  fde
ddfd	Zy)

UnicodeMapr"   r#   Nc                 >    t        j                  | fi | i | _        y r%   )r!   r*   
cid2unichrr(   s     r   r*   zUnicodeMap.__init__   s    $)&)*,r   c                 V    dj                  | j                  j                  d            S )Nz<UnicodeMap: {}>rL   rM   r.   s    r   rO   zUnicodeMap.__repr__   s     !((
)CDDr   r5   c                 N    t         j                  d| |       | j                  |   S )Nget_unichr: %r, %r)rZ   rA   r}   r)   r5   s     r   
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r   r^   c                     t        | j                  j                               D ]  \  }}|j                  d| d|d        y )Nzcid z = unicode r`   )ra   r}   rT   rb   )r)   r^   r0   r1   s       r   rc   zUnicodeMap.dump   sB    4??0023 	4DAqIIQC{1%r23	4r   )r   r   r   rD   rE   r*   rO   r   re   rf   r	   rc   r   r   r   r{   r{      sV    -s -t -E# E$c $c $ "% 4 4 4r   r{   c                       e Zd ZdedefdZy)IdentityUnicodeMapr5   r#   c                 F    t         j                  d| |       t        |      S )z+Interpret character id as unicode codepointr   )rZ   rA   chrr   s     r   r   zIdentityUnicodeMap.get_unichr   s    		&c23xr   N)r   r   r   rE   rD   r   r   r   r   r   r      s    c c r   r   c                        e Zd ZdededdfdZy)FileCMapr4   r5   r#   Nc                 P   t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ];  }t        |      }||v r!t        t        t        t        f   ||         }3i }|||<   |}= t        |d         }|||<   y )N)	rU   rD   rE   rX   rJ   ordr
   rV   rB   )r)   r4   r5   rW   rd   cits          r   r7   zFileCMap.add_code2cid   s    $$C)= 	
s$Zc#@
 	
= MMcr 	AQBQwc6k*AbE2')"	 b]"r   )r   r   r   rD   rE   r7   r   r   r   r   r      s     3 4 r   r   c                   ,    e Zd Zdedeez  ez  ddfdZy)FileUnicodeMapr5   r4   r#   Nc                    t        |t              sJ t        t        |                   t        |t              r2t        |j
                  t              sJ t        |j
                        }nJt        |t              r|j                  dd      }n't        |t              rt        |      }nt        |      |dk(  r| j                  j                  |      dk(  ry || j                  |<   y )NzUTF-16BEignore     )rU   rE   rD   rX   r   namer   rF   r@   r   r   r}   r-   )r)   r5   r4   unichrs       r   r:   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3#dI&dii---!$)),Fe$[[X6Fc"YFt$$ X$//"5"5c":c"A%r   )r   r   r   rE   r   rF   r:   r   r   r   r   r      s&    &# &Y->-D & &r   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler#   Nc                     t         |   |       |j                  | _        |j                  rd| j
                  d<   y y N)rL      r,   )superr*   CODE2CIDrJ   IS_VERTICALr'   )r)   r   r   	__class__s      r   r*   zPyCMap.__init__   s:    $'"#DJJw r   )r   r   r   rD   r   r*   __classcell__r   s   @r   r   r      s"    $S $# $$ $ $r   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr#   Nc                     t         |   |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r   )r   r*   CID2UNICHR_Vr}   r'   CID2UNICHR_H)r)   r   r   r   r   s       r   r*   zPyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr   )r   r   r   rD   r   rC   r*   r   r   s   @r   r   r      s)    2S 2# 2 2$ 2 2r   r   c                       e Zd ZU i Zeeeef      ed<   i Z	eeee
e   f      ed<    G d de      Zededefd       Zededefd       Zedded	edefd
       Zy)CMapDB_cmap_cache_umap_cachec                       e Zd Zy)CMapDB.CMapNotFoundNr   r   r   r   CMapNotFoundr      s    r   r   r   r#   c           
      `   |j                  dd      }| d}t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }t        j                  j                  ||      }t        j                  j                  |      }t        j                  j                  |      }|j                  |t        j                  z         st        j                  j                  |      st        j                  |      5 }t!        t#        |      dt%        j&                  |j)                                     cd d d        c S  t*        j-                  |      # 1 sw Y   xY w)	N  z
.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/r<   r   )replacerZ   rA   osenvironr-   pathjoindirname__file__realpath
startswithsepexistsgzipopenrX   rD   pickleloadsreadr   r   )	clsr   filename
cmap_paths	directoryr   resolved_pathresolved_directorygzfiles	            r   
_load_datazCMapDB._load_data   s?   ||D"%V:&		-&JJNN;(>?GGLL2F;

 $ 
	LI77<<	84DGG,,T2M!#!1!1)!< ++,>,GHww~~m,YY}- LD	2v||FKKM/JKL L
	L !!$''L Ls   
7F##F-	c                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r,   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)ri   rw   r   KeyErrorr   r   )r   r   datar<   s       r   get_cmapzCMapDB.get_cmap   s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-dD'99s   A 	A! A!r   c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|       }dD cg c]  }t        |||       nc c}w c}| j                   |<   | j                   |   |   S )Nzto-unicode-)FT)r   r   r   r   )r   r   r   r   r1   s        r   get_unicode_mapzCMapDB.get_unicode_map
  s{    	??4(22 		~~D623FS TdD!!< T Tt$X..s    	  AN)F)r   r   r   r   r   rV   rD   r   __annotations__r   listr   r   r   classmethodr   r   r!   r   rC   r{   r   r   r   r   r   r      s    /1K$sF{+,1;=K$sD$6678=y  (c (c ( (* C H  " /3 /$ /: / /r   r   c                   L   e Zd ZdededdfdZddZ ed      Z ed      Z	 ed	      Z
 ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      ZdededdfdZdeddfdZy)
CMapParserr<   fpr#   Nc                 j    t        j                  | |       || _        d| _        t	               | _        y )NT)r   r*   r<   _in_cmapset	_warnings)r)   r<   r   s      r   r*   zCMapParser.__init__  s)    tR(	#&5r   c                     t        j                  t              5  | j                          d d d        y # 1 sw Y   y xY wr%   )
contextlibsuppressr   
nextobjectr.   s    r   runzCMapParser.run  s/      ' 	OO	 	 	s   4=s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 :   || j                   u rd| _        | j                          y|| j                  u rd| _        y| j                  sy|| j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y|| j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y|| j                  u r| j                          y|| j                   u r| j                          y|| j"                  u r| j                          y|| j$                  u rj| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]7  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt)        |t.              s| j-                  d	       nt1        |
      t1        |      k7  r| j-                  d
       |
dd }|dd }||k7  r| j-                  d       |
dd }|dd }t3        |      }t3        |      }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C : y|| j<                  u r| j                          y|| j>                  u ru| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]C  \  }}t)        |t*              st)        |t.              s(| j                  j;                  ||       E y|| j@                  u r| j                          y|| jB                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt1        |
      t1        |      k7  r| j-                  d       ut3        |
      }t3        |      }t)        |tD              rft1        |      ||z
  dz   k7  r| j-                  d       tG        t5        ||dz         |d      D ]!  \  }}| j                  j;                  ||       # t)        |t*              sJ |dd }t3        |      }|dd }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C  y|| jH                  u r| j                          y|| jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j;                  t3        |      |       N y|| jL                  u r| j                          y|| jN                  u r| j                          y| jQ                  ||f       y# t        $ r Y yw xY w# t        $ r Y yt        j                  $ r Y yw xY wc c}}w c c}}w c c}}w c c}}w )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrk   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.)strict))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr<   r3   r   r   KEYWORD_USECMAPr=   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rU   rF   
_warn_oncerE   ro   r   rangerp   packr:   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGEr   zipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r)   r   r   _r0   r1   cmapname__objobjs
start_byteend_byter5   start_prefix
end_prefixsvarevarstartendvlenr\   r]   r4   unicode_valuevarbaseprefixs                              r   
do_keywordzCMapParser.do_keyword2  s7   
 D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7-5a-> 9)
Hc!*e4OO$VW!(E2OO$TU!#s+OO$TUz?c(m3OO- )#2%cr]
:-OO: !"#}dm4ysU{Q/ 9A$v{{4'CTEF'KKAII,,S1Wa89;9@ D---KKMD+++)-7IRC7D7%a. 8	TdE*z#s/CII,,S$78 D---KKMD+++)-7IRC7D7.6q$.? ?*
Hd!*e4OO$EF!(E2OO$CDz?c(m3OO$TU
+h'dD)4yC%K!O3F /2eS1W-tE/ E*] 		00mDE
 &dE222rs)C"3<D!#2YFs8D"3;?3 ?"V[[tax%@$%HH		00A>?9?> D,,,KKMD***)-7IRC7D7%a. A	Tc5)ju.EII,,WS\4@A D111KKMD///KKM		3,E !  !   &&   8P 8 8N 8sC   ?Y (AY" 5ZZ/ZZ	YY"	Z-ZZmsgc                     || j                   vr6| j                   j                  |       d}t        j                  ||z          yy)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addrZ   warning)r)   r  base_msgs      r   r   zCMapParser._warn_once  sA    dnn$NNs#/ 
 KK3' %r   )r#   N)r   r   r   r!   r   r*   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rE   r   r  rD   r   r   r   r   r   r     s   )X )8 ) ) L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23W c W ) W  W r	(c 	(d 	(r   r   )4__doc__r   r   loggingr   os.pathr   rp   re   collections.abcr   r   r   typingr   r   r   r	   r
   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   rZ   r   r!   rH   ri   rw   r{   r   r   r   r   r   r   r   r   r   r   <module>r#     s   	    	    
 > >  - = 6 T T ,g!	 	" "22P8 2Pj8 | 4 4" t $&Z &*$T $2: 27/ 7/t(y) (r   