Examples of PDDocumentInformation


Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
        if (source.isEncrypted())
        {
            throw new IOException("Error: source PDF is encrypted, can't append encrypted PDF documents.");
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto(srcInfo.getDictionary());

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        // use the highest version number for the resulting pdf
View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                try
                {
                    addTextField( document, "CreationDate", info.getCreationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                try
                {
                    addTextField( document, "ModificationDate", info.getModificationDate() );
                }
                catch( IOException io )
                {
                    //ignore, bad date but continue with indexing
                }
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );

        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

Examples of org.apache.pdfbox.pdmodel.PDDocumentInformation

        }
    }

    private void extractMetadata(PDDocument document, Metadata metadata)
            throws TikaException {
        PDDocumentInformation info = document.getDocumentInformation();
        metadata.set(PagedText.N_PAGES, document.getNumberOfPages());
        addMetadata(metadata, Metadata.TITLE, info.getTitle());
        addMetadata(metadata, Metadata.AUTHOR, info.getAuthor());
        addMetadata(metadata, Metadata.CREATOR, info.getCreator());
        addMetadata(metadata, Metadata.KEYWORDS, info.getKeywords());
        addMetadata(metadata, "producer", info.getProducer());
        addMetadata(metadata, Metadata.SUBJECT, info.getSubject());
        addMetadata(metadata, "trapped", info.getTrapped());
        try {
            addMetadata(metadata, "created", info.getCreationDate());
            addMetadata(metadata, Metadata.CREATION_DATE, info.getCreationDate());
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
        try {
            Calendar modified = info.getModificationDate();
            addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
        } catch (IOException e) {
            // Invalid date format, just ignore
        }
       
        // All remaining metadata is custom
        // Copy this over as-is
        List<String> handledMetadata = Arrays.asList(new String[] {
             "Author", "Creator", "CreationDate", "ModDate",
             "Keywords", "Producer", "Subject", "Title", "Trapped"
        });
        for(COSName key : info.getDictionary().keySet()) {
            String name = key.getName();
            if(! handledMetadata.contains(name)) {
          addMetadata(metadata, name, info.getDictionary().getDictionaryObject(key));
            }
        }
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

  PDDocument pdDoc = null;
  try
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }
   if ((title != null) && (!title.equals("")))       { doc.setTitle(title); }
   if ((keywords != null) && (!keywords.equals(""))) { doc.setMetadata(keywords); }
   if ((summary != null) && (!summary.equals("")))   { doc.setSummary(summary); }
  }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

   *            database is null the strings will not be resolved.
   */
  public static void writeDocumentInformation(PDDocument document,
      BibtexEntry entry, BibtexDatabase database) {

    PDDocumentInformation di = document.getDocumentInformation();

    if (database != null)
      entry = database.resolveForStrings(entry, false);

    // Set all the values including key and entryType
    Set<String> fields = entry.getAllFields();

    for (String field : fields){
      if (field.equals("author")) {
        di.setAuthor(entry.getField("author").toString());
      } else if (field.equals("title")) {
        di.setTitle(entry.getField("title").toString());
      } else if (field.equals("keywords")) {
        di.setKeywords(entry.getField("keywords").toString());
      } else if (field.equals("abstract")) {
        di.setSubject(entry.getField("abstract").toString());
      } else {
        di.setCustomMetadataValue("bibtex/" + field.toString(),
            entry.getField(field.toString()).toString());
      }
    }
    di
        .setCustomMetadataValue("bibtex/entrytype", entry.getType()
            .getName());
  }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                addTextField( document, "CreationDate", info.getCreationDate() );
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                addTextField( document, "ModificationDate", info.getModificationDate() );
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );
       
        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

     *
     * @return The thread information.
     */
    public PDDocumentInformation getThreadInfo()
    {
        PDDocumentInformation retval = null;
        COSDictionary info = (COSDictionary)thread.getDictionaryObject( "I" );
        if( info != null )
        {
            retval = new PDDocumentInformation( info );
        }
       
        return retval;
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

                {
                    System.err.println( "Error: Cannot add metadata to encrypted document." );
                    System.exit( 1 );
                }
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentInformation info = document.getDocumentInformation();
               
                //Right now, PDFBox does not have any XMP library, so we will
                //just consruct the XML by hand.
                StringBuffer xmp= new StringBuffer();
                xmp.append(
                "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
                "<?adobe-xap-filters esc=\"CRLF\"?>\n" +
                "<x:xmpmeta>\n" +
                "    <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" +
                "        <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " +
                                         "pdf:Keywords='" + fixNull( info.getKeywords() ) + "' " +
                                         "pdf:Producer='" + fixNull( info.getProducer() ) + "'></rdf:Description>\n" +
                "        <rdf:Description rdf:about='' xmlns:xap='http://ns.adobe.com/xap/1.0/' " +
                                         "xap:ModifyDate='" + fixNull( info.getModificationDate() ) + "' " +
                                         "xap:CreateDate='" + fixNull( info.getCreationDate() ) + "' " +
                                         "xap:CreatorTool='" + fixNull( info.getCreator() ) + "' " +
                                         "xap:MetadataDate='" + fixNull( new GregorianCalendar() )+ "'>\n" +
                "        </rdf:Description>\n" +
                "        <rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/' " +
                                         "dc:format='application/pdf'>\n" +
                "            <dc:title>\n" +
                "                <rdf:Alt>\n" +
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getTitle() ) +"</rdf:li>\n" +
                "                </rdf:Alt>\n" +
                "            </dc:title>\n" +
                "            <dc:creator>\n" +
                "                <rdf:Seq>\n" +
                "                    <rdf:li>PDFBox.org</rdf:li>\n" +
                "                </rdf:Seq>\n" +
                "            </dc:creator>\n" +
                "            <dc:description>\n" +
                "                <rdf:Alt>\n" +
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getSubject() ) +"</rdf:li>\n" +
                "                </rdf:Alt>\n" +
                "            </dc:description>\n" +
                "        </rdf:Description>\n" +
                "    </rdf:RDF>\n" +
                "</x:xmpmeta>\n" );
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.