Package org.apache.pdfbox.pdmodel

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation


        }
        if (source.isEncrypted())
        {
            throw new IOException("Error: source PDF is encrypted, can't append encrypted PDF documents.");
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto(srcInfo.getDictionary());

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        // use the highest version number for the resulting pdf
View Full Code Here


            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                try
                {
                    addTextField( document, "CreationDate", info.getCreationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                try
                {
                    addTextField( document, "ModificationDate", info.getModificationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

        }
    }

    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, Metadata.TITLE, info.getTitle());
        addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
        addMetadata(metadata, Metadata.CREATOR, info.getCreator());
        addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }
View Full Code Here

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                try
                {
                    addTextField( document, "CreationDate", info.getCreationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                try
                {
                    addTextField( document, "ModificationDate", info.getModificationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

                    }
                }
                else
                {
                    // The pdf doesn't contain any metadata, try to use the document information instead
                    PDDocumentInformation information = document.getDocumentInformation();
                    if ( information != null)
                    {
                        showDocumentInformation(information);
                    }
                }
View Full Code Here

     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printMetadata( PDDocument document ) throws IOException
    {
        PDDocumentInformation info = document.getDocumentInformation();
        PDDocumentCatalog cat = document.getDocumentCatalog();
        PDMetadata metadata = cat.getMetadata();
        System.out.println( "Page Count=" + document.getNumberOfPages() );
        System.out.println( "Title=" + info.getTitle() );
        System.out.println( "Author=" + info.getAuthor() );
        System.out.println( "Subject=" + info.getSubject() );
        System.out.println( "Keywords=" + info.getKeywords() );
        System.out.println( "Creator=" + info.getCreator() );
        System.out.println( "Producer=" + info.getProducer() );
        System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) );
        System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) );
        System.out.println( "Trapped=" + info.getTrapped() );
        if( metadata != null )
        {
            System.out.println( "Metadata=" + metadata.getInputStreamAsString() );
        }
    }
View Full Code Here

                {
                    System.err.println( "Error: Cannot add metadata to encrypted document." );
                    System.exit( 1 );
                }
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentInformation info = document.getDocumentInformation();

                XMPMetadata metadata = new XMPMetadata();

                XMPSchemaPDF pdfSchema = metadata.addPDFSchema();
                pdfSchema.setKeywords( info.getKeywords() );
                pdfSchema.setProducer( info.getProducer() );

                XMPSchemaBasic basicSchema = metadata.addBasicSchema();
                basicSchema.setModifyDate( info.getModificationDate() );
                basicSchema.setCreateDate( info.getCreationDate() );
                basicSchema.setCreatorTool( info.getCreator() );
                basicSchema.setMetadataDate( new GregorianCalendar() );

                XMPSchemaDublinCore dcSchema = metadata.addDublinCoreSchema();
                dcSchema.setTitle( info.getTitle() );
                dcSchema.addCreator( "PDFBox" );
                dcSchema.setDescription( info.getSubject() );

                PDMetadata metadataStream = new PDMetadata(document);
                metadataStream.importXMPMetadata( metadata );
                catalog.setMetadata( metadataStream );
View Full Code Here

     *
     * @return The thread information.
     */
    public PDDocumentInformation getThreadInfo()
    {
        PDDocumentInformation retval = null;
        COSDictionary info = (COSDictionary)thread.getDictionaryObject( "I" );
        if( info != null )
        {
            retval = new PDDocumentInformation( info );
        }

        return retval;
    }
View Full Code Here

TOP

Related Classes of org.apache.pdfbox.pdmodel.PDDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.