Package org.apache.pdfbox.pdmodel

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation


        }
    }

    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, TikaCoreProperties.TITLE, info.getTitle());
        addMetadata(metadata, TikaCoreProperties.CREATOR, info.getAuthor());
        addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
        addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        // TODO: Move to description in Tika 2.0
        addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            // TODO Remove these in Tika 2.0
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
            addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }
View Full Code Here


        }
    }

    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, Metadata.TITLE, info.getTitle());
        addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
        addMetadata(metadata, Metadata.CREATOR, info.getCreator());
        addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }
View Full Code Here

        parse(stream, handler, metadata, new ParseContext());
    }

    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, Metadata.TITLE, info.getTitle());
        addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
        addMetadata(metadata, Metadata.CREATOR, info.getCreator());
        addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }
View Full Code Here

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

public class Info {
    public Info(Scrutinizer scrutinizer) {
        DocumentAdapter documentAdapter = scrutinizer.getDocumentAdapter();
        if (documentAdapter != null && documentAdapter.IsDocLoaded()) {
            PDDocument pd = documentAdapter.getDocument();
            PDDocumentInformation docinfo = pd.getDocumentInformation();
            if (docinfo != null) {
                Title = title = docinfo.getTitle();
                Author = author = docinfo.getAuthor();
                Subject = subject = docinfo.getSubject();
                Keywords = keywords = docinfo.getKeywords();
                Creator = creator = docinfo.getCreator();
                Producer = producer = docinfo.getProducer();
                try {
                    CreationDate = creationdate = docinfo.getCreationDate();
                    ModDate = moddate = docinfo.getModificationDate();
                } catch (IOException e) {
                    // should already be triggered at  de.pdf_scrutinizer.DocumentAdapter.getDocInfo
                }
                Trapped = trapped = docinfo.getTrapped();
            }
        }
    }
View Full Code Here

                ScriptableObject.putProperty(scope, "URL", Context.javaToJS(doc.URL, scope));
                ScriptableObject.putProperty(scope, "numPages", Context.javaToJS(doc.numPages, scope));

                // if document catalog contains custom metadata we have to put them into the context.
                if (scrutinizer.getDocumentAdapter() != null) {
                    PDDocumentInformation info = scrutinizer.getDocumentAdapter().getDocument().getDocumentInformation();
                    if (info != null) {
                        String[] docinfokeys = {"Author", "Creator", "CreationDate", "Subject", "Title", "Keywords", "ModDate", "Producer", "Trapped"};
                        Set<String> metadataKeys = info.getMetadataKeys();
                        metadataKeys.removeAll(Arrays.asList(docinfokeys)); // remove non-custom keys

                        for (String metadataKey : metadataKeys) {
                            log.debug(String.format("adding custom info metadata: %s value: %s", "info." + metadataKey, info.getCustomMetadataValue(metadataKey)));
                            ScriptableObject.putProperty((Scriptable) scope.get("info", scope), metadataKey, Context.javaToJS(info.getCustomMetadataValue(metadataKey), scope));
                        }
                    }
                }

                app.viewerVersion = Double.parseDouble(viewerVersion);
View Full Code Here

                dcSchema = xmp.getDublinCoreSchema();
            }
        } catch (IOException e) {
            //swallow
        }
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema);
        extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema);
        extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
        addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
        addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);

        // TODO: Move to description in Tika 2.0
        addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            // TODO Remove these in Tika 2.0
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
            addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList("Author", "Creator", "CreationDate", "ModDate",
                "Keywords", "Producer", "Subject", "Title", "Trapped");
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }

        //try to get the various versions
        //Caveats:
View Full Code Here

                  if (props.isEmpty())
                  {
                     // The pdf doesn't contain any metadata, try to use the document
                     // information instead
                     PDDocumentInformation docInfo = pdDocument.getDocumentInformation();

                     if (docInfo != null)
                     {
                        try
                        {
                           if (docInfo.getAuthor() != null)
                              props.put(DCMetaData.CONTRIBUTOR, docInfo.getAuthor());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getAuthor failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getCreationDate() != null)
                              props.put(DCMetaData.DATE, docInfo.getCreationDate());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getCreationDate failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getCreator() != null)
                              props.put(DCMetaData.CREATOR, docInfo.getCreator());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getCreator failed: " + e.getMessage());
                        }
                        try
                        {

                           if (docInfo.getKeywords() != null)
                              props.put(DCMetaData.SUBJECT, docInfo.getKeywords());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getKeywords failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getModificationDate() != null)
                              props.put(DCMetaData.DATE, docInfo.getModificationDate());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getModificationDate failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getProducer() != null)
                              props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getProducer failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getSubject() != null)
                              props.put(DCMetaData.DESCRIPTION, docInfo.getSubject());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getSubject failed: " + e.getMessage());
                        }
                        try
                        {
                           if (docInfo.getTitle() != null)
                              props.put(DCMetaData.TITLE, docInfo.getTitle());
                        }
                        catch (Exception e)
                        {
                           LOG.warn("getTitle failed: " + e.getMessage());
                        }
View Full Code Here

TOP

Related Classes of org.apache.pdfbox.pdmodel.PDDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.