
    +Siܒ                     D   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZmZmZmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lm Z  d dl!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@  ej                  eB      ZC G d de)      ZD G d deE      ZF G d de"      ZG G d de"      ZH G d de"      ZI G d de"      ZJ G d deJ      ZK G d d eL      ZM G d! d"eL      ZN G d# d$eJ      ZO e8d%      ZP e8d&      ZQ e8d'      ZR G d( d)      ZS G d* d+eS      ZT G d, d-eT      ZU G d. d/eS      ZV G d0 d1      ZW G d2 d3eW      ZX G d4 d5eX      ZY G d6 d7      ZZ G d8 d9e       Z[y):    N)CallableIterableIteratorKeysViewSequence)md5sha256sha384sha512)AnyClassVarcast)default_backend)Cipher
algorithmsmodes)settings)Arcfour)safe_int)
NumberTree)PDFExceptionPDFKeyErrorPDFObjectNotFoundPDFTypeError)	PDFParserPDFStreamParserPDFSyntaxError)	DecipherCallable	PDFStreamdecipher_all
dict_value	int_value
list_value	str_valuestream_value
uint_value)PSEOF)KWDLITliteral_name)choplistdecode_textformat_int_alphaformat_int_romannunpack	unpad_aesc                       e Zd Zy)PDFNoValidXRefN__name__
__module____qualname__     ]/var/www/html/leadgen/airagagent/rag_env/lib/python3.12/site-packages/pdfminer/pdfdocument.pyr2   r2   4       r8   r2   c                       e Zd ZdZy)PDFNoValidXRefWarningztLegacy warning for missing xref.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr4   r5   r6   __doc__r7   r8   r9   r<   r<   8       r8   r<   c                       e Zd Zy)PDFNoOutlinesNr3   r7   r8   r9   rA   rA   ?   r:   r8   rA   c                       e Zd Zy)PDFNoPageLabelsNr3   r7   r8   r9   rC   rC   C   r:   r8   rC   c                       e Zd Zy)PDFDestinationNotFoundNr3   r7   r8   r9   rE   rE   G   r:   r8   rE   c                       e Zd Zy)PDFEncryptionErrorNr3   r7   r8   r9   rG   rG   K   r:   r8   rG   c                       e Zd Zy)PDFPasswordIncorrectNr3   r7   r8   r9   rI   rI   O   r:   r8   rI   c                       e Zd ZdZy)PDFEncryptionWarningzyLegacy warning for failed decryption.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr=   r7   r8   r9   rK   rK   S   r?   r8   rK   c                       e Zd ZdZy)"PDFTextExtractionNotAllowedWarningzLegacy warning for PDF that does not allow extraction.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr=   r7   r8   r9   rM   rM   Z   r?   r8   rM   c                       e Zd Zy)PDFTextExtractionNotAllowedNr3   r7   r8   r9   rO   rO   a   r:   r8   rO   ObjStmXRefCatalogc                   f    e Zd Zdeeef   fdZdee   fdZ	dede
edz  eef   fdZdeddfdZy)	PDFBaseXRefreturnc                     t         NNotImplementedErrorselfs    r9   get_trailerzPDFBaseXRef.get_trailerl       !!r8   c                     g S rW   r7   rZ   s    r9   
get_objidszPDFBaseXRef.get_objidso   s    	r8   objidNc                     t        |      rW   )r   r[   r`   s     r9   get_poszPDFBaseXRef.get_posu   s    %  r8   parserc                     t         rW   rX   )r[   rd   s     r9   loadzPDFBaseXRef.loadx   r]   r8   )r4   r5   r6   dictstrr   r\   r   intr_   tuplerc   r   rf   r7   r8   r9   rT   rT   k   s\    "T#s(^ "HSM !S !U3:sC+?%@ !"9 " "r8   rT   c                       e Zd ZddZdefdZdeddfdZdeddfdZde	ee
f   fdZdee   fd	Zd
edeedz  eef   fdZy)PDFXRefrU   Nc                      i | _         i | _        y rW   )offsetstrailerrZ   s    r9   __init__zPDFXRef.__init__}   s    ?A')r8   c                 >    d| j                   j                         dS )Nz<PDFXRef: offsets=>rn   keysrZ   s    r9   __repr__zPDFXRef.__repr__   s    #DLL$5$5$7#:!<<r8   rd   c           
         	 	 |j                         \  }}|j                         }|s(	 |j	                  d      r|j                  |       n
|j                  d      }t        |      dk7  rd|d|}t        |      	 t        t        |      \  }}t        |||z         D ]  }		 |j                         \  }
}|j                         }|j                  d      }t        |      dk7  rd	|d
|}t        |      |\  }}}|dk7  ret        |      }t        |      }||d ||f| j                  |	<   t        j                  d|	 d|d|d        Vt        j!                  d| j                         | j#                  |       y # t        $ r}t        d      |d }~ww xY w# t        $ r}d|d|}t        |      |d }~ww xY w# t        $ r}t        d      |d }~ww xY w)Nz Unexpected EOF - file corrupted?   trailer       zTrailer not found: z: line=zInvalid line:    zInvalid XRef format: z, line=   nzNot adding object z to xref because position z or generation number z cannot be parsed as an intzxref objects: %r)nextlinestripr'   r2   
startswithseeksplitlenmapri   
ValueErrorranger   rn   logwarningdebugload_trailer)r[   rd   poslineerrf	error_msgstartnobjsr`   _pos_bgenno_buse_bpos_igenno_is                   r9   rf   zPDFXRef.load   s"   R$oo/dzz|  z*C 

4 A1v{1&74(K	$Y//9!$S! ueem4 V & 1IQ::<D JJt$q6Q;"7z QI(33*+'D= "7+$)<+/*@DLL'KK,UG3MeY W007{:UW') X 			$dll3&!O  R$%GHcQR  9,VJgdXF	$Y/S89  V()KLRUUVsG   %F
 F' '#G
	F$FF$'	G	0GG		G&G!!G&c                    	 |j                         \  }}|t        d      u sJ t        |             |j                         \  }}| j                  j                  t        |             t        j                  d| j                         y # t        $ r* |j                  d      }|st        d      d |d   \  }}Y ww xY w)Nrw      zUnexpected EOF - file corruptedr   z
trailer=%r)	nexttokenr(   rh   
nextobjectr'   popr2   ro   updater!   r   r   )r[   rd   r   kwddicxs         r9   r   zPDFXRef.load_trailer   s    	'')HQ#j/)33s83)((*HQ 	JsO,		,-  	

1A$%FGTQtHQ		s   AB
 
0B=<B=c                     | j                   S rW   ro   rZ   s    r9   r\   zPDFXRef.get_trailer       ||r8   c                 6    | j                   j                         S rW   rs   rZ   s    r9   r_   zPDFXRef.get_objids   s    ||  ""r8   r`   c                      | j                   |   S rW   )rn   rb   s     r9   rc   zPDFXRef.get_pos   s    ||E""r8   rU   N)r4   r5   r6   rp   rh   ru   r   rf   r   rg   r   r\   r   ri   r_   rj   rc   r7   r8   r9   rl   rl   |   s    *=# =."9 ." ."`.9 . .T#s(^ #HSM ##S #U3:sC+?%@ #r8   rl   c                   L    e Zd ZdefdZ ej                  d      ZdeddfdZ	y)PDFXRefFallbackrU   c                 >    d| j                   j                         dS )Nz<PDFXRefFallback: offsets=rr   rs   rZ   s    r9   ru   zPDFXRefFallback.__repr__   s    +DLL,=,=,?+B!DDr8   z^(\d+)\s+(\d+)\s+obj\brd   Nc                 H   |j                  d       	 	 |j                         \  }}|j                  d      rC|j                  |       | j	                  |       t
        j                  d| j                         y |j                  d      }| j                  j                  |      }|s|j                         \  }}t        |      }t        |      }	d ||	f| j                  |<   |j                  |       |j                         \  }
}t        |t               rx|j#                  d      t$        u rat'        |      }	 |d   }t1        |j3                               }g }	 	 |j                         \  }
}|j5                  t7        t        |             3# t        $ r Y y w xY w# t(        $ r$ t*        j,                  rt/        d|      d d}Y w xY w# t        $ r Y nw xY wt9        |t;        |      dz        }t=        |      D ]  }||dz     }||df| j                  |<    )	Nr   rw   trailer: %rzlatin-1TypeNN is not defined: ry   )r   r|   r'   r~   r   r   r   ro   decode
PDFOBJ_CUEmatchgroupsri   rn   r   
isinstancer   getLITERAL_OBJSTMr%   KeyErrorr   STRICTr   r   get_dataappendr   minr   r   )r[   rd   r   
line_bytesr   mobjid_sgenno_sr`   gennor   objstreamnparser1objsindexobjid1s                     r9   rf   zPDFXRefFallback.load   s   A$*OO$5!j $$Z0C !!&)		-6$$Y/D%%d+A!"WgLELE#'e"4DLLKK((*HQ#y)cggfo.O%c*sA
 *&//*;<"$#*#5#5#7CDcN3 A   ,   ,/A&-LMSWWA  3t9>*"1X =E!%!)_F,15!+<DLL(=s5   F <F" 4G 	FF"*GG	GG)
r4   r5   r6   rh   ru   recompiler   r   rf   r7   r8   r9   r   r      s6    E# E 56J*=9 *= *=r8   r   c                   z    e Zd ZddZdefdZdeddfdZdeee	f   fdZ
dee   fdZd	edeedz  eef   fd
Zy)PDFXRefStreamrU   Nc                 X    d | _         d | _        d | _        d | _        d | _        g | _        y rW   )dataentlenfl1fl2fl3rangesrZ   s    r9   rp   zPDFXRefStream.__init__   s,    "&	"&###-/r8   c                 "    d| j                   dS )Nz<PDFXRefStream: ranges=rr   )r   rZ   s    r9   ru   zPDFXRefStream.__repr__  s    (q99r8   rd   c           	         |j                         \  }}|j                         \  }}|j                         \  }}|j                         \  }}t        |t              r|j	                  d      t
        urt        d      |d   }|j	                  dd|f      }t        |      dz  dk7  rt        d      | j                  j                  t        t        t        t        t        f      t        d|                   |d   \  | _        | _        | _        | j                   | j"                  | j$                  J |j'                         | _        | j                   | j"                  z   | j$                  z   | _        |j,                  | _        t0        j3                  d	d
j5                  t7        t8        | j                              | j                   | j"                  | j$                         y )Nr   zInvalid PDF stream spec.SizeIndexr   ry   zInvalid index numberWz&xref stream: objid=%s, fields=%d,%d,%dz, )r   r   r   r   r   LITERAL_XREFr2   r   r   r   extendr   r   rj   ri   r+   r   r   r   r   r   r   attrsro   r   r   joinr   repr)	r[   rd   r   _objid_genno_kwdr   sizeindex_arrays	            r9   rf   zPDFXRefStream.load
  s   &&(F&&(F$$&	D'')F&),

60B,0V !;<<f~jj1d)4{a1$ !7884sCx 98A{;STU)/&48TXxx#(<AUUUOO%	hh)DHH4||		4IIc$,-HHHHHH	
r8   c                     | j                   S rW   r   rZ   s    r9   r\   zPDFXRefStream.get_trailer#  r   r8   c              #   8  K   | j                   D ]  \  }}t        |      D ]t  }| j                  J | j                  J | j                  |z  }| j                  ||| j                  z    }t	        |d | j
                   d      }|dk(  s|dk(  sn||z    v  y w)Nr   ry   )r   r   r   r   r/   r   )r[   r   r   ioffsetentf1s          r9   r_   zPDFXRefStream.get_objids&  s      KK 	$LE55\ ${{...yy,,,qii$++)=>S488_a07bAg!)O$	$s   BBBr`   c                 n   d}| j                   D ]!  \  }}||k  r|||z   k  r
|||z
  z  } n||z  }# t        |      | j                  J | j                  J | j                  | j
                  | j                  J | j                  |z  }| j                  ||| j                  z    }t        |d | j                   d      }t        || j                  | j                  | j
                  z          }t        || j                  | j
                  z   d        }	|dk(  rd ||	fS |dk(  r||	dfS t        |      )Nr   r   ry   )r   r   r   r   r   r   r   r/   )
r[   r`   r   r   r   r   r   r   f2f3s
             r9   rc   zPDFXRefStream.get_pos1  sJ    KK 	%LE5~%%%-"7&	% e$${{&&&yy$$$xx#(<AUUUu$ii$++!56S488_a(SDHHtxx$789SDHH,./07"b>!1WA; e$$r8   r   )r4   r5   r6   rp   rh   ru   r   rf   rg   r   r\   r   ri   r_   rj   rc   r7   r8   r9   r   r      sm    0:# :
9 
 
2T#s(^ 	$HSM 	$%S %U3:sC+?%@ %r8   r   c                   L   e Zd ZU dZdZeedf   ed<   	 ddee	   de
eef   dedd	fd
ZddZddZddZdefdZdefdZdefdZde	de	fdZde	de	fdZdede	d	z  fdZde	de	d	z  fdZde	defdZde	de	d	z  fdZ	 d dedede	de
eef   d	z  de	f
dZdedede	de	fdZy	)!PDFStandardSecurityHandlers    (N^NuAd NV.. h>/dSiz)ry   rz   .supported_revisionsdocidparampasswordrU   Nc                 N    || _         || _        || _        | j                          y rW   )r   r   r   init)r[   r   r   r   s       r9   rp   z#PDFStandardSecurityHandler.__init__R  s"     

 		r8   c                     | j                          | j                  | j                  vrd| j                  }t	        |      | j                          y )NzUnsupported revision: param=)init_paramsrr   r   rG   init_key)r[   r   s     r9   r   zPDFStandardSecurityHandler.init]  sD    661116tzznEI$Y//r8   c                    t        | j                  j                  dd            | _        t        | j                  d         | _        t        | j                  d   d      | _        t        | j                  d         | _        t        | j                  d         | _	        t        | j                  j                  dd	            | _
        y )
NVr   RP    OULength(   )r"   r   r   vr   r&   pr$   oulengthrZ   s    r9   r   z&PDFStandardSecurityHandler.init_paramsd  s    4::>>#q124::c?+DJJsOR04::c?+4::c?+

x <=r8   c                 h    | j                  | j                        | _        | j                  t        y rW   )authenticater   keyrI   rZ   s    r9   r   z#PDFStandardSecurityHandler.init_keyl  s-    $$T]]388&& r8   c                 2    t        | j                  dz        S )N   boolr   rZ   s    r9   is_printablez'PDFStandardSecurityHandler.is_printableq      DFFQJr8   c                 2    t        | j                  dz        S )N   r  rZ   s    r9   is_modifiablez(PDFStandardSecurityHandler.is_modifiablet  r
  r8   c                 2    t        | j                  dz        S )N   r  rZ   s    r9   is_extractablez)PDFStandardSecurityHandler.is_extractablew  s    DFFRK  r8   r  c                    | j                   dk(  r$t        |      j                  | j                        S t	        | j                        }|j                  | j                  d          t        |      j                  |j                               }t        dd      D ]?  dj                  fdt        |      D              }t        |      j                  |      }A ||z  }|S )Nry   r   r      r8   c              3   <   K   | ]  }t        |z  f        y wrW   bytes.0cr   s     r9   	<genexpr>z7PDFStandardSecurityHandler.compute_u.<locals>.<genexpr>       @UAE8_@   )r   r   encryptPASSWORD_PADDINGr   r   r   digestr   r   iter)r[   r  hashresultkr   s        @r9   	compute_uz$PDFStandardSecurityHandler.compute_uz  s    66Q;3<''(=(=>> t,,-DKK

1&S\))$++-8F1b\ 4HH@d3i@@ ++F34 fFMr8   c                 B   || j                   z   d d }t        |      }|j                  | j                         |j                  t	        j
                  d| j                               |j                  | j                  d          | j                  dk\  r+t        t        |       j                  s|j                  d       |j                         }d}| j                  dk\  r;| j                  dz  }t        d	      D ]  }t        |d |       j                         }  |d | S )
Nr   <Lr   r  s      rz   r  2   )r  r   r   r   structpackr   r   r   r   PDFStandardSecurityHandlerV4encrypt_metadatar  r  r   )r[   r   r   r!  r   r   s         r9   compute_encryption_keyz1PDFStandardSecurityHandler.compute_encryption_key  s    t444cr:8}DFFFKKdff-.DJJqM"FFaK5t<MMKK+,66Q;q A2Y 2VBQZ//12bqzr8   c                 p    |j                  d      }| j                  |      }|| j                  |      }|S )Nlatin1)encodeauthenticate_user_passwordauthenticate_owner_password)r[   r   password_bytesr  s       r9   r  z'PDFStandardSecurityHandler.authenticate  s:    !2--n=;22>BC
r8   c                 L    | j                  |      }| j                  |      r|S y rW   )r,  verify_encryption_key)r[   r   r  s      r9   r0  z5PDFStandardSecurityHandler.authenticate_user_password  s(    ))(3%%c*Jr8   c                     | j                  |      }| j                  dk(  r|| j                  k(  S |d d | j                  d d k(  S )Nry   r  )r#  r   r   )r[   r  r   s      r9   r4  z0PDFStandardSecurityHandler.verify_encryption_key  sD    NN366Q;;"v$$r8   c                 T   || j                   z   d d }t        |      }| j                  dk\  r)t        d      D ]  }t        |j	                               } d}| j                  dk\  r| j
                  dz  }|j	                         d | }| j                  dk(  r%t        |      j                  | j                        }n[| j                  }t        ddd      D ]?  d	j                  fd
t        |      D              }t        |      j                  |      }A | j                  |      S )Nr   rz   r'  r&  r  ry      r8   c              3   <   K   | ]  }t        |z  f        y wrW   r  r  s     r9   r  zIPDFStandardSecurityHandler.authenticate_owner_password.<locals>.<genexpr>  r  r  )r  r   r   r   r  r  r   decryptr   r   r  r0  )	r[   r   r   r   r   r  user_passwordr"  r   s	           @r9   r1  z6PDFStandardSecurityHandler.authenticate_owner_password  s   t444cr:8}66Q;2Y *4;;=)*66Q;q AkkmBQ66Q;#CL008M FFM2r2& BHH@d3i@@ '
 2 2= AB ..}==r8   r`   r   r   r   c                 (    | j                  |||      S rW   )decrypt_rc4)r[   r`   r   r   r   s        r9   r:  z"PDFStandardSecurityHandler.decrypt  s     ud33r8   c                 4   | j                   J | j                   t        j                  d|      d d z   t        j                  d|      d d z   }t        |      }|j	                         d t        t        |      d       }t        |      j                  |      S )Nr%  rz   ry   r  )	r  r(  r)  r   r  r   r   r   r:  )r[   r`   r   r   r  r   s         r9   r=  z&PDFStandardSecurityHandler.decrypt_rc4  s    xx###hhT51"155D%8PQSRS8TT3xkkm/c#c(B/0s|##D))r8    r   rW   )r4   r5   r6   r  r   rj   ri   __annotations__r   r  rg   rh   r   rp   r   r   r   r  r	  r  r  r#  r,  r  r0  r4  r1  r:  r=  r7   r8   r9   r   r   L  st   U  ,2sCx1 			 CH~	 		
 
	>'
 d   t  ! !U u u  *S UT\ 5 UT\ % %4 %>E >edl >2 (,44 4 	4
 CH~$4 
4* *S * *% *r8   r   c                        e Zd ZU dZeedf   ed<   d fdZdede	eee
ge
f   dz  fdZ	 	 dd	ed
ede
deeef   dz  dedz  de
fdZd	ed
ede
de
fdZd	ed
ede
de
fdZ xZS )r*  )r  .r   rU   Nc                 (   t         |           d| _        t        | j                  j                  d            | _        t        | j                  d         | _        t        | j                  d         | _	        t        | j                  j                  dd            | _        | j                  | j                  k7  rd| j                  }t        |      i | _        | j                  j                         D ]M  \  }}| j                  t        |d               }|d	| j                  }t        |      || j                  |<   O | j                   | j                  d
<   | j                  | j                  vrd| j                  }t        |      y )N   CFStmFStrFEncryptMetadataTz Unsupported crypt filter: param=CFMz#Unknown crypt filter method: param=IdentityzUndefined crypt filter: param=)superr   r  r!   r   r   cfr*   stmfstrfr  r+  rG   cfmitemsget_cfmdecrypt_identity)r[   r   r"  r   r   	__class__s        r9   r   z(PDFStandardSecurityHandlerV4.init_params  sN   TZZ^^D12 F!34	 F!34	 $TZZ^^4Et%L M99		!:4::.II$Y//GGMMO 	DAq\!E(34AyA$**P	(33DHHQK	  $4499DHH$8GI$Y// %r8   namec                 H    |dk(  r| j                   S |dk(  r| j                  S y )NV2AESV2)r=  decrypt_aes128r[   rT  s     r9   rQ  z$PDFStandardSecurityHandlerV4.get_cfm  s+    4<###W_&&&r8   r`   r   r   r   c                     | j                   s%|#|j                  d      }|t        |      dk(  r|S || j                  } | j                  |   |||      S )Nr   Metadata)r+  r   r*   rN  rO  )r[   r`   r   r   r   rT  ts          r9   r:  z$PDFStandardSecurityHandlerV4.decrypt  s\     $$):		&!A}aJ!><99Dtxx~eUD11r8   c                     |S rW   r7   )r[   r`   r   r   s       r9   rR  z-PDFStandardSecurityHandlerV4.decrypt_identity	  s    r8   c                    | j                   J | j                   t        j                  d|      d d z   t        j                  d|      d d z   dz   }t        |      }|j	                         d t        t        |      d       }|d d }|dd  }t        t        j                  |      t        j                  |      t                     }|j                         j                  |      }	t        |	      S )Nr%  rz   ry   s   sAlTr  backend)r  r(  r)  r   r  r   r   r   r   AESr   CBCr   	decryptorr   r0   )
r[   r`   r   r   r  r   initialization_vector
ciphertextcipher	plaintexts
             r9   rX  z+PDFStandardSecurityHandlerV4.decrypt_aes128  s    xx###HHkk$&r*+kk$&r*+  	 3xkkm/c#c(B/0 $Sb	"#Y
NN3II+,#%

 $$&--j9	##r8   r   )NN)r4   r5   r6   r   rj   ri   rA  r   rh   r   r  rQ  rg   r   r:  rR  rX  __classcell__rS  s   @r9   r*  r*    s    +/sCx/0,C Hc3->-E$F$M  (,22 2 	2
 CH~$2 Dj2 
2 c # U u $C $ $5 $U $r8   r*  c            	            e Zd ZdZd fdZdedeeeegef   dz  fdZ	dededz  fdZ
dedefd	Z	 dded
ededz  defdZ	 dded
ededz  defdZ	 dded
ededz  defdZededefd       ZdedededefdZdedededefdZ xZS )PDFStandardSecurityHandlerV5)r&     rU   Nc                    t         |           d| _        t        | j                  d         | _        t        | j                  d         | _        | j                  d d | _        | j                  dd | _	        | j                  dd  | _
        | j                  d d | _        | j                  dd | _        | j                  dd  | _        y )N   OEUEr   r   )rK  r   r  r$   r   oeuer   o_hasho_validation_salt
o_key_saltr   u_hashu_validation_salt
u_key_salt)r[   rS  s    r9   r   z(PDFStandardSecurityHandlerV5.init_params$  s    DJJt,-DJJt,-ffSbk!%2&&+ffSbk!%2&&+r8   rT  c                 &    |dk(  r| j                   S y )NAESV3)decrypt_aes256rY  s     r9   rQ  z$PDFStandardSecurityHandlerV5.get_cfm0  s    7?&&&r8   r   c                    | j                  |      }| j                  || j                  | j                        }|| j                  k(  r| j                  || j
                  | j                        }t        t        j                  |      t        j                  d      t                     }|j                         j                  | j                        S | j                  || j                        }|| j                   k(  r| j                  || j"                        }t        t        j                  |      t        j                  d      t                     }|j                         j                  | j$                        S y )Ns                   r_  )_normalize_password_password_hashrt  r   rs  ru  r   r   ra  r   rb  r   rc  r   rq  rw  rv  rx  rr  )r[   r   
password_br   rf  s        r9   r  z)PDFStandardSecurityHandlerV5.authenticate6  s%   --h7
"":t/E/EtvvN4;;&&z4??DFFKDt$		*%')F
 ##%,,TWW55"":t/E/EF4;;&&z4??CDt$		*%')F
 ##%,,TWW55r8   c                 j    | j                   dk(  r|syddlm}  ||      }|j                  d      d d S )Nrl  r8   r   )saslprepzutf-8   )r   pdfminer._saslprepr  r/  )r[   r   r  s      r9   r}  z0PDFStandardSecurityHandlerV5._normalize_passwordL  s6    66Q;3)Hw'--r8   saltvectorc                 r    | j                   dk(  r| j                  |||      S | j                  ||dd |      S )z2Compute password hash depending on revision numberr&  r   r  )r   _r5_password_r6_password)r[   r   r  r  s       r9   r~  z+PDFStandardSecurityHandlerV5._password_hashV  s?     66Q;$$XtV<<  4!9f==r8   c                     t        |      }|j                  |       ||j                  |       |j                         S )z#Compute the password for revision 5)r	   r   r  )r[   r   r  r  r   s        r9   r  z)PDFStandardSecurityHandlerV5._r5_passworda  s8     hDKK{{}r8   c                    t        |      }|j                  |       ||j                  |       |j                         }t         t        t        f}dx}}|dk  s||dz
  kD  r{||z   |xs dz   dz  }	| j                  |dd |dd |	      }
|| j                  |
dd          } ||
      j                         }|
t        |
      dz
     }|dz  }|dk  rr||dz
  kD  r{|dd S )	z#Compute the password for revision 6Nr   @   r   r8   r  )r  ivr   r   )r	   r   r  r
   r   _aes_cbc_encrypt_bytes_mod_3r   )r[   r   r  r  initial_hashr"  hashesround_nolast_byte_valk1e	next_hashs               r9   r  z)PDFStandardSecurityHandlerV5._r6_passwordn  s
    h'D!'!&&)#$$=m}x"}<Q,&-C0B6B%%!CR&Qr"XB%GA t003B89I!##%Ac!fqjMMMH m}x"}< "vr8   input_bytesc                 ,    t        d | D              dz  S )Nc              3   &   K   | ]	  }|d z    yw)rz   Nr7   )r  bs     r9   r  z<PDFStandardSecurityHandlerV5._bytes_mod_3.<locals>.<genexpr>  s     .Q1q5.s   rz   )sum)r  s    r9   r  z)PDFStandardSecurityHandlerV5._bytes_mod_3  s     .+..22r8   r  r  r   c                     t        t        j                  |      t        j                  |            }|j                         }|j                  |      |j                         z   S rW   )r   r   ra  r   rb  	encryptorr   finalize)r[   r  r  r   rf  r  s         r9   r  z-PDFStandardSecurityHandlerV5._aes_cbc_encrypt  sL    
s+UYYr];$$&	%	(:(:(<<<r8   r`   r   c                    |d d }|dd  }| j                   J t        t        j                  | j                         t	        j
                  |      t                     }|j                         j                  |      }t        |      S )Nr  r_  )
r  r   r   ra  r   rb  r   rc  r   r0   )r[   r`   r   r   rd  re  rf  rg  s           r9   r{  z+PDFStandardSecurityHandlerV5.decrypt_aes256  s|     $Sb	"#Y
xx###NN488$II+,#%

 $$&--j9	##r8   r   rW   )r4   r5   r6   r   r   rh   r   ri   r  rQ  r  r}  r~  r  r  staticmethodr  r  r{  rh  ri  s   @r9   rk  rk  !  sa    
&C Hc3->-E$F$M S UT\ ,.C .E .  $		>	> 	> 		>
 
	>  $	  	
 
"  $	  	
 
2 3% 3C 3 3=E =u =E =e =

$C 
$ 
$5 
$U 
$r8   rk  c                   x   e Zd ZU dZeeeedZee	e
ee   f      ed<   	 	 	 d dededededd	f
d
Z ed      Zd!dedd	fdZdede
de
defdZdedeee   e
f   fdZde
de
defdZde
defdZeeeeeef   Zdee   fdZdee   fdZ dedee!z  defdZ"dee!z  defdZ#dede
fdZ$dede
dee%   dd	fdZ&y	)"PDFDocumentaP  PDFDocument object represents a PDF document.

    Since a PDF file can be very big, normally it is not loaded at
    once. So PDF document has to cooperate with a PDF parser in order to
    dynamically import the data as processing goes.

    Typical usage:
      doc = PDFDocument(parser, password)
      obj = doc.getobj(objid)

    )r   ry   r  r&  security_handler_registryrd   r   cachingfallbackrU   Nc                    || _         g | _        g | _        i | _        d| _        d| _        d| _        i | _        i | _        || _        | j                  j                  |        dx| _
        x| _        | _        	 | j                  |      }| j                  ||| j                         | j                  D ]  }|j)                         }|sd|v r:d|v rt+        |d         nd}	|	t-        |d         f| _        | j/                  |       d|v r'| j                  j'                  t-        |d                d|v st-        |d         | _         n t1        d      | j                  j3                  d	      t4        urt6        j8                  rt1        d
      yy# t        $ rC |r=d|_        t#               }|j%                  |       | j                  j'                  |       Y :w xY w)z1Set the document to use a given PDFParser object.NTEncryptID)r8   r8   InfoRootz(No /Root object! - Is this really a PDF?r   zCatalog not found!)r  xrefsinfocatalog
encryptiondecipher_parser_cached_objs_parsed_objsset_documentr	  r  r  	find_xrefread_xref_fromr2   r  r   rf   r   r\   r#   r!   _initialize_passwordr   r   LITERAL_CATALOGr   r   )
r[   rd   r   r  r  r   newxrefxrefro   id_values
             r9   rp   zPDFDocument.__init__  s    (*
	')2615;=AC!!$'GKKKD.1D	+..(CTZZ8 JJ 	MD&&(GG# 9=:gdm4Z#+Z	8J-K"L))(3 		  GFO!<= )'&/:#	M& !!KLL<<F#?:x !566 @O:7  	+"&)+V$

!!'*	+s   8.F AG! G!   objc                    | j                   J | j                   \  }}t        |j                  d            dk7  rt        d|      t	        |j                  dd            }| j
                  j                  |      }|t        d|       ||||      }|j                  | _        |j                         | _        |j                         | _	        |j                         | _
        | j                  J d| j                  _        y )NFilterStandardzUnknown filter: param=r   r   zUnknown algorithm: param=F)r  r*   r   rG   r"   r  r:  r  r	  r  r  r  r  )r[   r   r   r   r   factoryhandlers          r9   r  z PDFDocument._initialize_password  s    ***		(+,
:$'=eY%GHHeiiQ'(0044Q7?$'@	%JKK%1#002$224%446||''' %r8   r   r   r`   c                 ^   |j                   | j                  v r| j                  |j                      \  }}nI| j                  |      \  }}| j                  r)|j                   J ||f| j                  |j                   <   |dz  |z   }	 ||   }|S # t        $ r}t        d|      |d }~ww xY w)Nry   zindex too big: )r`   r  _get_objectsr  
IndexErrorr   )	r[   r   r   r`   r   r   r   r   r   s	            r9   _getobj_objstmzPDFDocument._getobj_objstm  s    <<4,,,))&,,7IT1))&1IT1||||///37)!!&,,/EEM	Gq'C 
  	G ?5)!<=3F	Gs   B 	B,B''B,c                    |j                  d      t        urt        j                  rt	        d|      	 t        t        |d         }t        |j                               }|j                  |        g }	 	 |j                         \  }}|j                  |       %# t        $ r$ t        j                  rt	        d|      d d}Y w xY w# t        $ r Y ||fS w xY w)Nr   zNot a stream object: r   r   r   )r   r   r   r   r   r   ri   r   r   r   r  r   r   r'   )r[   r   r   rd   r   r   r   s          r9   r  zPDFDocument._get_objects  s    ::f^3 #8
!CDD	S&+&A
 !!23D!	!,,.CC    	$'9&%DE4OA	  	ay	s#   B 7&C *C
	C
	CCr   c                 n   | j                   J | j                   j                  |       | j                   j                         \  }}| j                   j                         \  }}| j                   j                         \  }}||k7  r`g }|| j                  ur=| j                   j                         \  }}|j	                  |       || j                  ur=t        |      dk\  r|d   }||k7  rt        d|d|      |t        d      k7  rt        d|      | j                   j                         \  }}|S )Nry   zobjid mismatch: =r  zInvalid object spec: offset=)	r  r   r   KEYWORD_OBJr   r   r   r(   r   )	r[   r   r`   r   r   r   r   r   r   s	            r9   _getobj_parsezPDFDocument._getobj_parse!  s#   ||'''#ll,,.Fll,,.F<<))+C U?AT---<<113C T--- 1v{2U? #3F:Qui!HII#f+ #?w!GHH<<**,C
r8   c                    | j                   st        d      t        j                  d|       || j                  v r| j                  |   \  }}|S | j                   D ]  }	 |j                  |      \  }}}	 |.t        | j                  |            }| j                  |||      }n6| j                  ||      }| j                  rt        | j                  |||      }t        |t              r|j                  ||        n t%        |      t        j                  d||       | j&                  r||f| j                  |<   |S # t        $ r Y w xY w# t         t"        f$ r Y w xY w)zGet object from PDF

        :raises PDFException if PDFDocument is not initialized
        :raises PDFObjectNotFound if objid does not exist in PDF
        zPDFDocument is not initializedzgetobj: objid=%rzregister: objid=%r: %r)r  r   r   r   r  rc   r   r%   getobjr  r  r  r    r   r   	set_objidr'   r   r   r  )r[   r`   r   r   r  strmidr   r   s           r9   r  zPDFDocument.getobj=  sd    zz?@@		$e, D%%%,,U3LS%4 
1 

 /-1\\%-@*VUE)!-dkk&.A!B"11&%G"00>==".t}}eUC"PC!#y1eU3!/( (..II.s;||,/<!!%(
+    ~. s%    E6BE	EEE$#E$c                     d| j                   vrt        dt        dt        dt        t
        j                     ffd | j                   d   d      S )NOutlinesentrylevelrU   c              3   Z  K   t        |       } d| v r[d| v sd| v rSt        t        | d               }| j                  d      }| j                  d      }| j                  d      }|||||f d| v rd| v r | d   |dz         E d {    d| v r | d   |      E d {    y y 7 7 w)	NTitleADestSEFirstLastr   Next)r!   r,   r$   r   )r  r  titledestactionsesearchs         r9   r  z(PDFDocument.get_outlines.<locals>.searchk  s     u%E%SE\Vu_#IeGn$=>yy(3YYt_eT6266%FeO!%.%!)<<<!%-777  =7s$   BB+B'	B+ B)!B+)B+r   )r  rA   objectri   r   r  OutlineType)r[   r  s    @r9   get_outlineszPDFDocument.get_outlinesg  sR    T\\)	8& 	8 	8+:Q:Q1R 	8 dll:.22r8   c                     | j                   J 	 t        | j                   d         }|j
                  S # t        t        f$ r}t        |d}~ww xY w)zGenerate page label strings for the PDF document.

        If the document includes page labels, generates strings, one per page.
        If not, raises PDFNoPageLabels.

        The resulting iteration is unbounded.
        N
PageLabels)r  r  r   r   rC   labels)r[   page_labelsr   s      r9   get_page_labelszPDFDocument.get_page_labelsz  sY     ||'''	+$T\\,%?@K !!! h' 	+!s*	+s   4 AA

Acatr  c                     	 t        | j                  d         }t        |         }dt
        t        t        f   dt        ffd |      S # t        t        f$ r}t	        f      |d }~ww xY w)NNamesdrU   c           	      d   d| v rt        | d         \  }}|k  s|k  ry d| v rQt        | d         }t        t        t        t        t
        t        z  t        f      t        d|                  }|   S d| v r*t        | d         D ]  } 	t        |            }|s|c S  t        f      )NLimitsr  ry   Kids)r#   rg   r   r   rj   rh   r  r   r+   r!   r   )
r  r  k2r   namesr  r   r  r  lookups
          r9   r  z'PDFDocument.lookup_name.<locals>.lookup  s    1}%ak2R8rCx!|!!G*-%eS(8"9:HQ<MN Sz!{#AfI. !Az!}-A ! sCj))r8   )r!   r  r   r   r   rg   rh   r   )r[   r  r  r  r   d0r  s    ``   @r9   lookup_namezPDFDocument.lookup_name  sz    	3t||G45E c
#	*d38n 	* 	*$ bz/ h' 	3sCj)s2	3s   A A6#A11A6rT  c                     	 | j                  d|      }|S # t        $ rK d| j                  vrt        |      d t	        | j                  d         }||vrt        |      d ||   }Y |S w xY w)NDests)r  r   r  rE   r!   )r[   rT  r   r  s       r9   get_destzPDFDocument.get_dest  s    
	""7D1C 
  	dll*,T2<DLL12B2~,T2<T(C
	s    AA*)A*c                 R   d}|j                         D ]  }|j                         }t        j                  d|       |dk(  rVt        j                  d|       |j	                         st        d|      t        |      }|dk\  st        d|       |c S |s|} t        d      )	z0Internal function used to locate the first XRef.r8   zfind_xref: %rs	   startxrefzxref found: pos=%rzInvalid xref position: r   z Invalid negative xref position: Unexpected EOF)revreadlinesr}   r   r   isdigitr2   ri   )r[   rd   prevr   r   s        r9   r  zPDFDocument.find_xref  s     '') 	D::<DIIot,|#		.5||~(+B4()KLLD	z(+KE7)STT%	( -..r8   r   r  c                    |j                  |       |j                          	 |j                         \  }}t
        j                  d||       t        |t              r=|j                  |       |j                          t               }|j                  |       n9||j                  u r|j                          t               }|j                  |       |j                  |       |j                         }t
        j                  d|       d|v r!t!        |d         }| j#                  |||       d|v r"t!        |d         }| j#                  |||       yy# t        $ r}t	        d      |d}~ww xY w)z$Reads XRefs from the given location.r  Nz"read_xref_from: start=%d, token=%rr   XRefStmPrev)r   resetr   r'   r2   r   r   r   ri   r   rf   KEYWORD_XREFr|   rl   r   r\   r"   r  )	r[   rd   r   r  r   tokenr   r  ro   s	            r9   r  zPDFDocument.read_xref_from  s>    	E	<!++-LS% 			6uEeS!KKLLN -DIIf+++!9DIIfT""$		-)GI./CU3WGFO,CU3 )  	< !12;	<s   E 	E0E++E0)r@  TTr?  )'r4   r5   r6   r>   r   r*  rk  r  r   rg   ri   typerA  r   rh   r  rp   r(   r  r  r   r  r  rj   listr  r  r  r   r  r   r  r  r  r  r  r  rT   r  r7   r8   r9   r  r    s   
 &%''	XxS$7Q2R-R(ST  5757 57 	57
 57 
57n f+K&S &$ &"Y s 3 6 9 tF|S7H1I ( S V 8&C &F &P S#sC/0K3h{3 3&"# ""s u  8S5[ S /	 /c /6"4"4 "4 K 	"4
 
"4r8   r  c                   J    e Zd ZdZedee   fd       Zede	de
defd       Zy)r  zWPageLabels from the document catalog.

    See Section 8.3.1 in the PDF Reference.
    rU   c           	   #   X  K   | j                   }t        |      dk(  s|d   d   dk7  r/t        j                  rt	        d      |j                  ddi f       t        |d      D ]  \  }\  }}t        |      }|j                  d      }t        t        |j                  dd                  }t        |j                  dd            }|t        |      k(  rt        j                  |      }	n||   \  }
}|
|z
  }t        |||z         }	|	D ]  }| j                  ||      }||z      y w)Nr   z"PageLabels is missing page index 0r   Sr   r8   St)valuesr   r   r   r   insert	enumerater!   r   r,   r$   r"   	itertoolscountr   _format_page_label)r[   r   nextr   label_dict_unchecked
label_dictstyleprefixfirst_valuer  endr   range_lengthvaluelabels                  r9   r  zPageLabels.labels  s*     v;!vay|q0$%IJJ a!R)3<VQ3G 	%/D/5.#$89JNN3'E :>>#s+C!DEF#JNN4$;<Ks6{" )2(DQ"U{{K,,FG %//u=un$%	%s   D(D*r  r  c                 ~   |d}|S |t        d      u rt        |       }|S |t        d      u rt        |       j                         }|S |t        d      u rt        |       }|S |t        d      u rt	        |       j                         }|S |t        d      u rt	        |       }|S t
        j                  d|       d}|S )z+Format page label value in a specific styler@  Dr   r   r  azUnknown page label style: %r)r)   rh   r.   upperr-   r   r   )r  r  r  s      r9   r  zPageLabels._format_page_label  s     =E  c#hJE  c#h$U+113E  c#h$U+E  c#h$U+113E  c#h$U+E  KK6>Er8   N)r4   r5   r6   r>   propertyr   rh   r  r  ri   r   r  r7   r8   r9   r  r    sO    
 % % %< # c c  r8   r  )\r
  loggingr   r(  collections.abcr   r   r   r   r   hashlibr   r	   r
   r   typingr   r   r   cryptography.hazmat.backendsr   &cryptography.hazmat.primitives.ciphersr   r   r   pdfminerr   pdfminer.arcfourr   pdfminer.castingr   pdfminer.data_structuresr   pdfminer.pdfexceptionsr   r   r   r   pdfminer.pdfparserr   r   r   pdfminer.pdftypesr   r   r    r!   r"   r#   r$   r%   r&   pdfminer.psexceptionsr'   pdfminer.psparserr(   r)   r*   pdfminer.utilsr+   r,   r-   r.   r/   r0   	getLoggerr4   r   r2   SyntaxWarningr<   rA   rC   rE   rG   rI   UserWarningrK   rM   rO   r   r   r  rT   rl   r   r   r   r*  rk  r  r  r7   r8   r9   <module>r/     s     	  L L / /  9 L L  $ % /  J I
 
 
 ( 4 4  g!	^ 	M 	L 		l 		\ 		 		- 	;  	"4 	
 X6{i." ""L#k L#^0=g 0=fK%K K%\I* I*XF$#= F$Rz$#? z$zU4 U4p
7 7r8   