Examples of org.pdfbox.pdmodel.PDDocument

org.pdfbox.pdmodel.PDDocument
This is the in-memory representation of the PDF document. You need to call close() on this object when you are done using it!! @author Ben Litchfield @version $Revision: 1.43 $

     *
     * @throws IOException If there is an error parsing the document.
     */
    private static PDDocument parseDocument( InputStream input )throws IOException
    {
        PDDocument document = PDDocument.load( input );
        if( document.isEncrypted() )
        {
            try
            {
                document.decrypt( "" );
            }
            catch( InvalidPasswordException e )
            {
                System.err.println( "Error: The document is encrypted." );
            }

View Full Code Here

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    try
                    {
                        document.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: Document is encrypted with a password." );
                        System.exit( 1 );
                    }
                }
                PrintTextLocations printer = new PrintTextLocations();
                List allPages = document.getDocumentCatalog().getAllPages();
                for( int i=0; i<allPages.size(); i++ )
                {
                    PDPage page = (PDPage)allPages.get( i );
                    System.out.println( "Processing page: " + i );
                    printer.processStream( page, page.findResources(), page.getContents().getStream() );
                }
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }

View Full Code Here

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    System.err.println( "Error: Cannot add metadata to encrypted document." );
                    System.exit( 1 );
                }
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentInformation info = document.getDocumentInformation();
                
                //Right now, PDFBox does not have any XMP library, so we will
                //just consruct the XML by hand.
                StringBuffer xmp= new StringBuffer();
                xmp.append(
                "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" + 
                "<?adobe-xap-filters esc=\"CRLF\"?>\n" + 
                "<x:xmpmeta>\n" + 
                "    <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" + 
                "        <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " +
                                         "pdf:Keywords='" + fixNull( info.getKeywords() ) + "' " + 
                                         "pdf:Producer='" + fixNull( info.getProducer() ) + "'></rdf:Description>\n" + 
                "        <rdf:Description rdf:about='' xmlns:xap='http://ns.adobe.com/xap/1.0/' " + 
                                         "xap:ModifyDate='" + fixNull( info.getModificationDate() ) + "' " +
                                         "xap:CreateDate='" + fixNull( info.getCreationDate() ) + "' " + 
                                         "xap:CreatorTool='" + fixNull( info.getCreator() ) + "' " + 
                                         "xap:MetadataDate='" + fixNull( new GregorianCalendar() )+ "'>\n" + 
                "        </rdf:Description>\n" + 
                "        <rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/' " + 
                                         "dc:format='application/pdf'>\n" + 
                "            <dc:title>\n" + 
                "                <rdf:Alt>\n" + 
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getTitle() ) +"</rdf:li>\n" + 
                "                </rdf:Alt>\n" + 
                "            </dc:title>\n" + 
                "            <dc:creator>\n" + 
                "                <rdf:Seq>\n" + 
                "                    <rdf:li>PDFBox.org</rdf:li>\n" + 
                "                </rdf:Seq>\n" + 
                "            </dc:creator>\n" + 
                "            <dc:description>\n" + 
                "                <rdf:Alt>\n" + 
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getSubject() ) +"</rdf:li>\n" + 
                "                </rdf:Alt>\n" + 
                "            </dc:description>\n" + 
                "        </rdf:Description>\n" + 
                "    </rdf:RDF>\n" + 
                "</x:xmpmeta>\n" );
                
                //xmp spec says we should put padding, so that the metadata can be appended to 
                //in place
                xmp.append( PADDING );
                xmp.append( PADDING );
                xmp.append( PADDING );
                xmp.append( "\n<?xpacket end='w'?>" );
                ByteArrayInputStream mdInput = new ByteArrayInputStream( xmp.toString().getBytes() );
                PDMetadata metadataStream = new PDMetadata(document, mdInput, false );
                catalog.setMetadata( metadataStream );
                
                
                document.save( args[1] );
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }

View Full Code Here

     * 
     * @throws Exception If there is an error extracting the URLs.
     */
    public static void main(String[] args) throws Exception
    {
        PDDocument doc = null;
        try
        {
            if( args.length != 1 )
            {
                usage();
            }
            else
            {
                doc = PDDocument.load( args[0] );
                List allPages = doc.getDocumentCatalog().getAllPages();
                for( int i=0; i<allPages.size(); i++ )
                {
                    PDFTextStripperByArea stripper = new PDFTextStripperByArea();
                    PDPage page = (PDPage)allPages.get( i );
                    List annotations = page.getAnnotations();
                    //first setup text extraction regions
                    for( int j=0; j<annotations.size(); j++ )
                    {
                        PDAnnotation annot = (PDAnnotation)annotations.get( j );
                        if( annot instanceof PDAnnotationLink )
                        {
                            PDAnnotationLink link = (PDAnnotationLink)annot;
                            PDRectangle rect = link.getRectangle();
                            //need to reposition link rectangle to match text space
                            float x = rect.getLowerLeftX();
                            float y = rect.getUpperRightY();
                            float width = rect.getWidth();
                            float height = rect.getHeight();
                            int rotation = page.findRotation();
                            if( rotation == 0 )
                            {
                                PDRectangle pageSize = page.findMediaBox();
                                y = pageSize.getHeight() - y;
                            }
                            else if( rotation == 90 )
                            {
                                //do nothing
                            }
                            
                            Rectangle2D.Float awtRect = new Rectangle2D.Float( x,y,width,height );
                            stripper.addRegion( "" + j, awtRect );
                        }
                    }
                    
                    stripper.extractRegions( page );
                    
                    for( int j=0; j<annotations.size(); j++ )
                    {
                        PDAnnotation annot = (PDAnnotation)annotations.get( j );
                        if( annot instanceof PDAnnotationLink )
                        {
                            PDAnnotationLink link = (PDAnnotationLink)annot;
                            PDAction action = link.getAction();
                            String urlText = stripper.getTextForRegion( "" + j );
                            if( action instanceof PDActionURI )
                            {
                                PDActionURI uri = (PDActionURI)action;
                                System.out.println( "Page " + (i+1) +":'" + urlText + "'=" + uri.getURI() );
                            }
                        }
                    }
                }
            }
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }

View Full Code Here

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            FileInputStream file = null;
            try
            {
                file = new FileInputStream( args[0] );
                PDFParser parser = new PDFParser( file );
                parser.parse();
                document = parser.getPDDocument();
                if( document.isEncrypted() )
                {
                    try
                    {
                        document.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: Document is encrypted with a password." );
                        System.exit( 1 );
                    }
                }
                PrintDocumentMetaData meta = new PrintDocumentMetaData();
                meta.printMetadata( document );
            }
            finally
            {
                if( file != null )
                {
                    file.close();
                }
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }

View Full Code Here

     * 
     * @throws Exception If there is an error during the process.
     */
    public static void main(String[] args) throws Exception
    {
        PDDocument doc = null;
        try
        {
            if( args.length != 2 )
            {
                usage();
            }
            else
            {
                doc = PDDocument.load( args[0] );
                List allPages = doc.getDocumentCatalog().getAllPages();
                for( int i=0; i<allPages.size(); i++ )
                {
                    PDPage page = (PDPage)allPages.get( i );
                    List annotations = page.getAnnotations();
                    
                    for( int j=0; j<annotations.size(); j++ )
                    {
                        PDAnnotation annot = (PDAnnotation)annotations.get( j );
                        if( annot instanceof PDAnnotationLink )
                        {
                            PDAnnotationLink link = (PDAnnotationLink)annot;
                            PDAction action = link.getAction();
                            if( action instanceof PDActionURI )
                            {
                                PDActionURI uri = (PDActionURI)action;
                                String oldURI = uri.getURI();
                                String newURI = "http://www.pdfbox.org";
                                System.out.println( "Page " + (i+1) +": Replacing " + oldURI + " with " + newURI );
                                uri.setURI( newURI );
                            }
                        }
                    }
                }
                doc.save( args[1] );
            }
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }

View Full Code Here

     * @throws COSVisitorException If there is an error writing the PDF.
     */
    public void doIt( String file) throws IOException, COSVisitorException
    {
        // the document
        PDDocument doc = null;
        try
        {
            doc = new PDDocument();
            
            PDPage page = new PDPage();
            doc.addPage( page );
            PDFont font = PDType1Font.HELVETICA_BOLD;
            
            PDPageContentStream contentStream = new PDPageContentStream(doc, page);
            contentStream.beginText();
            contentStream.setFont( font, 12 );
            contentStream.moveTextPositionByAmount( 100, 700 );
            contentStream.drawString( "Go to Document->File Attachments to View Embedded Files" );
            contentStream.endText();
            contentStream.close();
            
            //embedded files are stored in a named tree
            PDEmbeddedFilesNameTreeNode efTree = new PDEmbeddedFilesNameTreeNode();
            
            
            //first create the file specification, which holds the embedded file
            PDComplexFileSpecification fs = new PDComplexFileSpecification();
            fs.setFile( "Test.txt" );
            //create a dummy file stream, this would probably normally be a FileInputStream
            byte[] data = "This is the contents of the embedded file".getBytes();
            ByteArrayInputStream fakeFile = 
                new ByteArrayInputStream( data );
            PDEmbeddedFile ef = new PDEmbeddedFile(doc, fakeFile );
            //now lets some of the optional parameters
            ef.setSubtype( "test/plain" );
            ef.setSize( data.length );
            ef.setCreationDate( new GregorianCalendar() );
            fs.setEmbeddedFile( ef );
            
            //now add the entry to the embedded file tree and set in the document.
            Map efMap = new HashMap();
            efMap.put( "My first attachment", fs );
            efTree.setNames( efMap );
            PDDocumentNameDictionary names = new PDDocumentNameDictionary( doc.getDocumentCatalog() );
            names.setEmbeddedFiles( efTree );
            doc.getDocumentCatalog().setNames( names );
            
            
            doc.save( file );
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }

View Full Code Here

     *
     * @throws IOException If there is an error getting the document.
     */
    public PDDocument getPDDocument() throws IOException
    {
        return new PDDocument( getDocument() );
    }

View Full Code Here

    public void doIt(final String file, final String message,
            final String fontfile) throws IOException, COSVisitorException 
    {


        // the document
        PDDocument doc = null;
        try 
        {
            doc = new PDDocument();


            PDPage page = new PDPage();
            doc.addPage(page);
            PDFont font = new PDType1AfmPfbFont(doc,fontfile);


            PDPageContentStream contentStream = new PDPageContentStream(doc,
                    page);
            contentStream.beginText();
            contentStream.setFont(font, 12);
            contentStream.moveTextPositionByAmount(100, 700);
            contentStream.drawString(message);
            contentStream.endText();
            contentStream.close();
            doc.save(file);
            System.out.println(file + " created!");
        } 
        finally 
        {
            if (doc != null) 
            {
                doc.close();
            }
        }
    }

View Full Code Here

     *
     * @param doc The document to decrypt.
     */
    public DocumentEncryption( COSDocument doc )
    {
        pdDocument = new PDDocument( doc );
        document = doc;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.pdfbox.pdmodel.PDDocument

br.com.caelum.stella.boleto.transformer.BoletoTransformerIntegrationTest

com.stimulus.archiva.extraction.PDFExtractor

eu.lsem.bakalarka.filetypeprocess.document.PdfDocumentParser

net.sf.jabref.util.XMPUtil

org.apache.jackrabbit.core.query.PdfTextFilter

org.apache.jackrabbit.extractor.PdfTextExtractor

org.apache.nutch.parse.pdf.PdfParser

org.exoplatform.services.document.impl.PDFDocumentReader

org.infoglue.cms.controllers.kernel.impl.simple.LuceneController

org.pdfbox.cos.COSArray

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.