Package org.pdfbox.pdmodel

Examples of org.pdfbox.pdmodel.PDDocument


     *
     * @throws IOException If there is an error parsing the document.
     */
    private void addContent( Document document, InputStream is, String documentLocation ) throws IOException
    {
        PDDocument pdfDocument = null;
        try
        {
            pdfDocument = PDDocument.load( is );

            if( pdfDocument.isEncrypted() )
            {
                //Just try using the default password and move on
                pdfDocument.decrypt( "" );
            }

            //create a writer where to append the text content.
            StringWriter writer = new StringWriter();
            if( stripper == null )
            {
                stripper = new PDFTextStripper();
            }
            else
            {
                stripper.resetEngine();
            }
            stripper.writeText( pdfDocument, writer );

            // Note: the buffer to string operation is costless;
            // the char array value of the writer buffer and the content string
            // is shared as long as the buffer content is not modified, which will
            // not occur here.
            String contents = writer.getBuffer().toString();

            StringReader reader = new StringReader( contents );

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                addTextField( document, "CreationDate", info.getCreationDate() );
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                addTextField( document, "ModificationDate", info.getModificationDate() );
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
            addUnindexedField( document, "summary", summary );
        }
        catch( CryptographyException e )
        {
            throw new IOException( "Error decrypting document(" + documentLocation + "): " + e );
        }
        catch( InvalidPasswordException e )
        {
            //they didn't suppply a password and the default of "" was wrong.
            throw new IOException(
                "Error: The document(" + documentLocation +
                ") is encrypted and will not be indexed." );
        }
        finally
        {
            if( pdfDocument != null )
            {
                pdfDocument.close();
            }
        }
    }
View Full Code Here


        {
            usage();
        }
        else
        {
            PDDocument document = null;
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    try
                    {
                        document.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: Document is encrypted with a password." );
                        System.exit( 1 );
                    }
                }
                PrintImageLocations printer = new PrintImageLocations();
                List allPages = document.getDocumentCatalog().getAllPages();
                for( int i=0; i<allPages.size(); i++ )
                {
                    PDPage page = (PDPage)allPages.get( i );
                    System.out.println( "Processing page: " + i );
                    printer.processStream( page, page.findResources(), page.getContents().getStream() );
                }
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }
View Full Code Here

     * @throws IOException If there is an error parsing the document.
     * @throws COSVisitorException If there is an error while copying the document.
     */
    public void doIt(String in, String out) throws IOException, COSVisitorException
    {
        PDDocument doc = null;
        try
        {
            doc = PDDocument.load( in );
            if( doc.isEncrypted() )
            {
                try
                {
                    doc.decrypt( "" );
                }
                catch( InvalidPasswordException e )
                {
                    System.err.println( "Error: The document is encrypted." );
                }
                catch( org.pdfbox.exceptions.CryptographyException e )
                {
                    e.printStackTrace();
                }
            }

            for (Iterator i = doc.getDocument().getObjects().iterator(); i.hasNext();)
            {
                COSBase base = ((COSObject) i.next()).getObject();
                if (base instanceof COSStream)
                {
                    // just kill the filters
                    COSStream cosStream = (COSStream)base;
                    cosStream.getUnfilteredStream();
                    cosStream.setFilters(null);
                }
            }
            doc.save( out );
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }
View Full Code Here

     * @throws IOException If there is an error importing the FDF document.
     * @throws CryptographyException If there is an error decrypting the document.
     */
    public static void main(String[] args) throws IOException, CryptographyException
    {
        PDDocument pdf = null;
        try
        {
            if( args.length != 1 )
            {
                usage();
            }
            else
            {
                pdf = PDDocument.load( args[0] );
                PrintFields exporter = new PrintFields();
                if( pdf.isEncrypted() )
                {
                    try
                    {
                        pdf.decrypt( "" );
                    }
                    catch( InvalidPasswordException e )
                    {
                        System.err.println( "Error: The document is encrypted." );
                        usage();
                    }
                }
                exporter.printFields( pdf );
            }
        }
        finally
        {
            if( pdf != null )
            {
                pdf.close();
            }
        }
    }
View Full Code Here

        importer.importXFDF( args );
    }

    private void importXFDF( String[] args ) throws Exception
    {
        PDDocument pdf = null;
        FDFDocument fdf = null;

        try
        {
            if( args.length != 3 )
            {
                usage();
            }
            else
            {
                ImportFDF importer = new ImportFDF();
                pdf = PDDocument.load( args[0] );
                fdf = FDFDocument.loadXFDF( args[1] );

                importer.importFDF( pdf, fdf );
                pdf.save( args[2] );
                fdf.save( "tmp/outputXFDFtoPDF.fdf");
            }
        }
        finally
        {
View Full Code Here

     *
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void write(COSDocument doc) throws COSVisitorException
    {
        PDDocument pdDoc = new PDDocument( doc );
        write( pdDoc );
    }
View Full Code Here

     * @return The document text.
     * @throws IOException If there is an error extracting the text.
     */
    public String getText( COSDocument doc ) throws IOException
    {
        return getText( new PDDocument( doc ) );
    }
View Full Code Here

     * @param outputStream The stream to write the text to.
     * @throws IOException If there is an error extracting the text.
     */
    public void writeText( COSDocument doc, Writer outputStream ) throws IOException
    {
        writeText( new PDDocument( doc ), outputStream );
    }
View Full Code Here

     * @throws COSVisitorException If there is an error writing the PDF.
     */
    public void createPDFFromImage( String file, String image) throws IOException, COSVisitorException
    {
        // the document
        PDDocument doc = null;
        try
        {
            doc = new PDDocument();
           
            PDPage page = new PDPage();
            doc.addPage( page );
           
            PDXObjectImage ximage = null;
            if( image.toLowerCase().endsWith( ".jpg" ) )
            {
                ximage = new PDJpeg(doc, new FileInputStream( image ) );
            }
            else if (image.toLowerCase().endsWith(".tif") || image.toLowerCase().endsWith(".tiff"))
            {
                ximage = new PDCcitt(doc, new RandomAccessFile(new File(image),"r"));
            }
            else
            {
                //BufferedImage awtImage = ImageIO.read( new File( image ) );
                //ximage = new PDPixelMap(doc, awtImage);
                throw new IOException( "Image type not supported:" + image );
            }
            PDPageContentStream contentStream = new PDPageContentStream(doc, page);
           
            contentStream.drawImage( ximage, 20, 20 );
         
            contentStream.close();
            doc.save( file );
        }
        finally
        {
            if( doc != null )
            {
                doc.close();
            }
        }
    }
View Full Code Here

        {
            usage();
        }
        else
        {
            PDDocument document = null;
            try
            {
                document = PDDocument.load( args[0] );
                if( document.isEncrypted() )
                {
                    throw new IOException( "Encrypted documents are not supported for this example" );
                }
                List allpages = new ArrayList();
                document.getDocumentCatalog().getPages().getAllKids(allpages);
               
                for (int i=0; i < allpages.size(); i++)
                {
                    PDPage apage = (PDPage) allpages.get(i);
                    List annotations = apage.getAnnotations();
                   
                    PDAnnotationRubberStamp rs = new PDAnnotationRubberStamp();
                    rs.setName(PDAnnotationRubberStamp.NAME_TOP_SECRET);
                    rs.setRectangle(new PDRectangle(100,100));
                    rs.setContents("A top secret note");
                   
                    annotations.add(rs);
                }
               
                document.save( args[1] );
            }
            finally
            {
                if( document != null )
                {
                    document.close();
                }
            }
        }
    }
View Full Code Here

TOP

Related Classes of org.pdfbox.pdmodel.PDDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.