
    +Si9                        d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZ ddlZddlmZmZmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZm Z m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z'  ejP                           ejR                  e*      Z+ ejX                  d      Z-de.e/z  de.fdZ0d)dede1de.dz  ddfdZ2	 d*dedede3ddfdZ4	 	 d+dedede.dz  de3ddf
dZ5	 	 	 	 d,dede.dede	e6   de.de3de.dz  de.dz  ddfd Z7 e#d!      Z8 e#d"      Z9de.de.de.ddfd#Z:	 	 	 	 	 d-dede.de
e6   de	e6   de.de3de.dz  de.dz  de3ddfd$Z;defd%Z<d)d&e=e.   dz  ddfd'Z>e*d(k(  r e>        yy).z#Extract pdf structure in XML format    N)ArgumentParser)	ContainerIterable)AnyTextIOcast)PDFDocumentPDFNoOutlinesPDFXRefFallback)
PDFIOErrorPDFObjectNotFoundPDFTypeErrorPDFValueError)PDFPage)	PDFParser)	PDFObjRef	PDFStreamresolve1stream_value)LIT	PSKeyword	PSLiteral)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                 l    t        | t              rt        | d      n| }t        j	                  d |      S )Nzlatin-1c                 >    dt        | j                  d             dS )Nz&#r   ;)ordgroup)ms    ]/var/www/html/leadgen/airagagent/rag_env/lib/python3.12/site-packages/../../../bin/dumppdf.py<lambda>zescape.<locals>.<lambda>"   s    2c!''!*o%6a!8     )
isinstancebytesstrESC_PATsub)r   uss     r"   escaper+       s+    (E2Q		B;;8"==r$   outobjcodecc                    || j                  d       y t        |t              r| j                  dt        |       d       |j	                         D ]H  \  }}| j                  d| d       | j                  d       t        | |       | j                  d       J | j                  d       y t        |t              rT| j                  d	t        |       d       |D ]  }t        | |       | j                  d
       ! | j                  d       y t        |t        t        f      r+| j                  dt        |       dt        |       d       y t        |t              r|dk(  r | j                  |j                                y |dk(  r | j                  |j                                y | j                  d       t        | |j                         | j                  d       |dk(  r:|j                         }| j                  dt        |       dt        |       d       | j                  d       y t        |t              r | j                  d|j                   d       y t        |t               r | j                  d|j"                   d       y t        |t$              r | j                  d|j"                   d       y t'        |      r| j                  d| d       y t)        |      )Nz<null />z<dict size="">
z<key>z</key>
z<value>z	</value>
z</dict>z<list size="
z</list>z<string size="z">z	</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="z</data>
z	</stream>z	<ref id="z" />z	<keyword>z
</keyword>z	<literal>z
</literal>z<number>z	</number>)writer%   dictlenitemsdumpxmllistr'   r&   r+   r   get_rawdataget_dataattrsr   objidr   namer   r   r   )r,   r-   r.   kvdatas         r"   r9   r9   %   sw   
{		*#t		LS
$/0IIK 	$DAqIIaS)*IIi COIIl#		$
 			)#t		LS
$/0 	ACOIIdO	 			)#U|$		N3s8*Bvc{m9EF#y!E>IIcoo'( 	 hIIclln% 	 II+,C#IIn%||~		LT2fTl^9MNIIk"#y!		Icii[-.#y!		IchhZz23#y!		IchhZz23}		HSE+,
s
r$   docshow_fallback_xrefc                 6   |j                   D ]Q  }t        |t              r|s| j                  d       t	        | |j                                | j                  d       S t        d |j                   D              }|r|sd}t        j                  |       y y y )Nz
<trailer>
z
</trailer>

c              3   <   K   | ]  }t        |t                y wN)r%   r   ).0xrefs     r"   	<genexpr>zdumptrailers.<locals>.<genexpr>p   s     K:dO4Ks   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr%   r   r5   r9   get_trailerallloggerwarning)r,   rC   rD   rI   no_xrefsmsgs         r"   dumptrailersrR   f   s    
 		 *$04FIIm$C))+,II()	*
 KKKH*$ 	
 	s +xr$   c                    t               }| j                  d       |j                  D ]v  }|j                         D ]a  }||v r|j	                  |       	 |j                  |      }|.| j                  d| d       t        | ||       | j                  d       c x t        | ||       | j                  d       y # t        $ r}t        d|       Y d }~d }~ww xY w)Nz<pdf>z<object id="r0   r.   z
</object>

znot found: z</pdf>)
setr5   rK   
get_objidsaddgetobjr9   r   printrR   )	r,   rC   r.   rD   visitedrI   r>   r-   es	            r"   dumpallobjsr\   z   s     eGIIg		 +__& 	+EKK+jj';		Lt45S.		+,	++ c-.IIh % +A5)**+s   C )4C  	C!	CC!outfpfnameobjidspagenospassworddumpall
extractdirc           	         t        |d      5 }t        |      }	t        |	|      t        t	        j
                        d      D 
ci c]  \  }
}|j                  |
 }}
}dt        dt        ffd}	 j                         }| j                  d       |D ]  \  }}}}}d }
|r ||      }||d   j                     }
nc|ra|}t        |t              rO|j                  d      }|r<t        |      d	k(  r.|j                  d
      r ||d
         }||d   j                     }
t!        |      }| j                  d|d| d       |.| j                  d       t#        | |       | j                  d       |
| j                  d|
d       | j                  d       	 | j                  d       |	j'                          d d d        y c c}}
w # t$        $ r Y *w xY w# 1 sw Y   y xY w)Nrb   destr   c                 <   t        | t        t        f      rt        j	                  |             } n4t        | t
              r$t        j	                  | j                              } t        | t              r| d   } t        | t              r| j                         } | S )ND)
r%   r'   r&   r   get_destr   r?   r6   r   resolve)rg   rC   s    r"   resolve_destz!dumpoutline.<locals>.resolve_dest   sr    $e-T 23D),TYY 78$%Cy$	*||~Kr$   z<outlines>
r   Sz/'GoTo'ri   z<outline level="z	" title="r0   z<dest>z</dest>
z<pageno>z
</pageno>
z</outline>
z</outlines>
)openr   r	   	enumerater   create_pagespageidobjectr   get_outlinesr5   r>   r%   r6   getreprr+   r9   r
   close)r]   r^   r_   r`   ra   rb   r.   rc   fpparserpagenopagepagesrl   outlinesleveltitlerg   a_seactionsubtyper   rC   s                          @r"   dumpoutliner      s    
eT	 .b2&(+ #,G,@,@,Eq"I
 KK
 

		v 		# 			'')HKK'.6 ,*udAs'-D"47==1FF!&$/"(**S/"tG}	'AfjjQTo#/s#<D%*47==%9F5M.uiy4HI#KK)E4(KK,%KK(6*K @AN+),* KK( 	]. .
R  		Y. .sB   :G!GG!5D>G3G!G!	GG!GG!!G*FilespecEmbeddedFilec                   
 dt         dt        t        t        f   dd f
fd}t	        | d      5 }t        |      }t        ||      
t               }
j                  D ]p  }|j                         D ][  }
j                  |      }	||vst        |	t              s*|	j                  d      t        u sB|j                  |        |||	       ] r 	 d d d        y # 1 sw Y   y xY w)Nr>   r-   r   c                    t         j                  j                  |j                  d      xs- t	        t
        |j                  d            j                               }|d   j                  d      xs |d   j                  d      }j                  |j                        }t        |t              sd|d}t        |      |j                  d      t        urt        d|d      t         j                  j                  	| dd	|       }t         j                  j                  |      rt        d
|      t!        d|       t        j"                  t         j                  j%                  |      d       t'        |d      5 }|j)                  |j+                                d d d        y # 1 sw Y   y xY w)NUFFEFz%unable to process PDF: reference for z is not a PDFStreamTypez is not an EmbeddedFile06d-zfile exists: zextracting: T)exist_okwb)ospathbasenamert   r   r&   decoderX   r>   r%   r   r   LITERAL_EMBEDDEDFILEjoinexistsr   rY   makedirsdirnamern   r5   r<   )
r>   r-   filenamefilereffileobj	error_msgr   r,   rC   rc   s
           r"   extract1z!extractembedded.<locals>.extract1   sq   77##CGGDM$WT%5N5U5U5WXd)--%;Ts);**W]]+'9-7|CVW   	**;;v&::7| D) )  ww||J5+Qxj(AB77>>$}TH566TH%&
BGGOOD)D9$ 	*IIg&&()	* 	* 	*s    F55F>re   r   )intr6   r'   r   rn   r   r	   rU   rK   rV   rX   r%   rt   LITERAL_FILESPECrW   )r^   ra   rc   r   rw   rx   extracted_objidsrI   r>   r-   rC   s     `       @r"   extractembeddedr      s    * *$sCx. *T *, 
eT	 )b2&(+5II 		)D* )jj'!11"3-+;;$((/UC()		)	) ) )s   ACCC4CC%c	                    t        |d      5 }	t        |	      }
t        |
|      }|r&|D ]!  }|j                  |      }t	        | ||       # |rnt        t        j                  |            D ]M  \  }}||v s|r+|j                  D ]  }t        |      }t	        | ||        8t	        | |j                         O |rt        | |||       |s|s|st        | ||       d d d        |dvr| j                  d       y y # 1 sw Y    xY w)Nre   rT   )r2   r3   r1   )rn   r   r	   rX   r9   ro   r   rp   contentsr   r=   r\   rR   r5   )r]   r^   r_   r`   ra   rb   r.   rc   rD   rw   rx   rC   r>   r-   ry   rz   s                   r"   dumppdfr      s    
eT	 9b2&(+ 1jj's%01  )'*>*>s*C D 3W$#'== =C".s"3C#E3e<=  tzz23 sE+=>7%78'9( %%D &)9 9s   A*C?8A(C??Dc                     t        t        d      } | j                  dt        d dd       | j                  ddd	d
t        j
                          | j                  ddddd       | j                         }|j                  ddddd       |j                  ddt        d       | j                  dd      }|j                  dt        d dd       |j                  ddt        d        |j                  d!d"t        d#       |j                  d$d%ddd&       |j                  d'dd()       |j                  d*d+t        d,d-.       | j                  d/d0      }|j                  d1d2t        d3d4.       |j                         }|j                  d5d6ddd7       |j                  d8d9ddd:       |j                  d;d<ddd=       | S )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v)r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-P z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-or   zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)	r   __doc__add_argumentr'   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )rx   procedure_parserparse_paramsoutput_paramscodec_parsers        r"   create_parserr     s   $?F
.    !5!5 67	   '   ::<!!+ "  !!%	 "  ,,- - L ?   	   @	   B   7   ;   --4 . M    !==?L4   8   1   Mr$   argvc                 X   t               }|j                  |       }|j                  r1t        j                         j                  t        j                         |j                  r2|j                  j                  d      D cg c]  }t        |       c}ng }|j                  r|j                  D ch c]  }|dz
  	 }}nK|j                  r5|j                  j                  d      D ch c]  }t        |      dz
   }}n
t               }|j                  }|j                  rd}n |j                  rd}n|j                   rd}nd }|j"                  dk(  rt$        j&                  nt)        |j"                  d      5 }|j*                  D ]y  }	|j,                  rt/        ||	||||j0                  |d 	       -|j2                  rt5        |	||j2                  
       Rt7        ||	||||j0                  |d |j8                  	       { 	 d d d        y c c}w c c}w c c}w # 1 sw Y   y xY w)N)args,rf   r2   r3   r4   r   w)ra   rb   r.   rc   )ra   rc   )ra   rb   r.   rc   rD   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGobjectssplitr   page_numbersr`   rU   ra   
raw_streambinary_streamtext_streamoutfilesysstdoutrn   r   extract_tocr   rM   extract_embeddedr   r   rD   )
r   rx   r   xr_   r`   ra   r.   r]   r^   s
             r"   mainr     s   _F$'Dzz$$W]]3:>,,dll0056c!f6BF"&"3"34Q1q544	'+||'9'9#'>?!3q6A:??%}}H!						 ||s*T\\30G 5ZZ 	E% HH#	 &&H9N9N % HH#'+'>'>
#	 + 7 5?  s   H4H,H>B	H  H)__main__rG   )F)NF)r   FNN)r   FNNF)?r   r   os.pathr   rer   argparser   collections.abcr   r   typingr   r   r   r   pdfminer.pdfdocumentr	   r
   r   pdfminer.pdfexceptionsr   r   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rN   compiler(   r'   r&   r+   rr   r9   boolrR   r\   r   r   r   r   r   r   r   r:   r    r$   r"   <module>r      s   )   	 
 # / $ $  L L  % ( J J 7 7 #    			8	$
"**>
?>cEk >c >
> >f >S4Z >4 >H  %		  
	. $			 : 	
 
> !888 8 s^	8
 8 8 :8 d
8 
8v z? >* $)3 $)# $)3 $)4 $)X !$    SM  s^	 
     :  d
    
 Fx~ xv8tCy4 84 8v zF r$   