Examples of PDDocumentInformation

org.apache.pdfbox.pdmodel.PDDocumentInformation
This is the document metadata. Each getXXX method will return the entry if it exists or null if it does not exist. If you pass in null for the setXXX method then it will clear the value. @author Ben Litchfield @author Gerardo Ortiz @version $Revision: 1.12 $
org.pdfbox.pdmodel.PDDocumentInformation
This is the document metadata. Each getXXX method will return the entry if it exists or null if it does not exist. If you pass in null for the setXXX method then it will clear the value. @author Ben Litchfield @version $Revision: 1.12 $

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
        if (source.isEncrypted())
        {
            throw new IOException("Error: source PDF is encrypted, can't append encrypted PDF documents.");
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto(srcInfo.getDictionary());


        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();


        // use the highest version number for the resulting pdf

View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation


            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );


            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                try
                {
                    addTextField( document, "CreationDate", info.getCreationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                try
                {
                    addTextField( document, "ModificationDate", info.getModificationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.

View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );


        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();


        if( destCatalog.getOpenAction() == null )

View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
    }


    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, Metadata.TITLE, info.getTitle());
        addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
        addMetadata(metadata, Metadata.CREATOR, info.getCreator());
        addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate(); 
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

  PDDocument pdDoc = null;
  try 
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }
   if ((title != null) && (!title.equals("")))       { doc.setTitle(title); }
   if ((keywords != null) && (!keywords.equals(""))) { doc.setMetadata(keywords); }
   if ((summary != null) && (!summary.equals("")))   { doc.setSummary(summary); }
  }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

   *            database is null the strings will not be resolved.
   */
  public static void writeDocumentInformation(PDDocument document,
      BibtexEntry entry, BibtexDatabase database) {


    PDDocumentInformation di = document.getDocumentInformation();


    if (database != null)
      entry = database.resolveForStrings(entry, false);


    // Set all the values including key and entryType
    Set<String> fields = entry.getAllFields();


    for (String field : fields){
      if (field.equals("author")) {
        di.setAuthor(entry.getField("author").toString());
      } else if (field.equals("title")) {
        di.setTitle(entry.getField("title").toString());
      } else if (field.equals("keywords")) {
        di.setKeywords(entry.getField("keywords").toString());
      } else if (field.equals("abstract")) {
        di.setSubject(entry.getField("abstract").toString());
      } else {
        di.setCustomMetadataValue("bibtex/" + field.toString(),
            entry.getField(field.toString()).toString());
      }
    }
    di
        .setCustomMetadataValue("bibtex/entrytype", entry.getType()
            .getName());
  }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation


            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );


            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null ) 
            {
                addTextField( document, "Author", info.getAuthor() );
                addTextField( document, "CreationDate", info.getCreationDate() );
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                addTextField( document, "ModificationDate", info.getModificationDate() );
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );
        
        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();


        if( destCatalog.getOpenAction() == null )

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

     * 
     * @return The thread information.
     */
    public PDDocumentInformation getThreadInfo()
    {
        PDDocumentInformation retval = null;
        COSDictionary info = (COSDictionary)thread.getDictionaryObject( "I" );
        if( info != null )
        {
            retval = new PDDocumentInformation( info );
        }
        
        return retval;
    }

View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

                {
                    System.err.println( "Error: Cannot add metadata to encrypted document." );
                    System.exit( 1 );
                }
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentInformation info = document.getDocumentInformation();
                
                //Right now, PDFBox does not have any XMP library, so we will
                //just consruct the XML by hand.
                StringBuffer xmp= new StringBuffer();
                xmp.append(
                "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" + 
                "<?adobe-xap-filters esc=\"CRLF\"?>\n" + 
                "<x:xmpmeta>\n" + 
                "    <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" + 
                "        <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " +
                                         "pdf:Keywords='" + fixNull( info.getKeywords() ) + "' " + 
                                         "pdf:Producer='" + fixNull( info.getProducer() ) + "'></rdf:Description>\n" + 
                "        <rdf:Description rdf:about='' xmlns:xap='http://ns.adobe.com/xap/1.0/' " + 
                                         "xap:ModifyDate='" + fixNull( info.getModificationDate() ) + "' " +
                                         "xap:CreateDate='" + fixNull( info.getCreationDate() ) + "' " + 
                                         "xap:CreatorTool='" + fixNull( info.getCreator() ) + "' " + 
                                         "xap:MetadataDate='" + fixNull( new GregorianCalendar() )+ "'>\n" + 
                "        </rdf:Description>\n" + 
                "        <rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/' " + 
                                         "dc:format='application/pdf'>\n" + 
                "            <dc:title>\n" + 
                "                <rdf:Alt>\n" + 
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getTitle() ) +"</rdf:li>\n" + 
                "                </rdf:Alt>\n" + 
                "            </dc:title>\n" + 
                "            <dc:creator>\n" + 
                "                <rdf:Seq>\n" + 
                "                    <rdf:li>PDFBox.org</rdf:li>\n" + 
                "                </rdf:Seq>\n" + 
                "            </dc:creator>\n" + 
                "            <dc:description>\n" + 
                "                <rdf:Alt>\n" + 
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getSubject() ) +"</rdf:li>\n" + 
                "                </rdf:Alt>\n" + 
                "            </dc:description>\n" + 
                "        </rdf:Description>\n" + 
                "    </rdf:RDF>\n" + 
                "</x:xmpmeta>\n" );

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.