Package org.pdfbox.pdmodel

Examples of org.pdfbox.pdmodel.PDDocumentInformation


  PDDocument pdDoc = null;
  try
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }
   if ((title != null) && (!title.equals("")))       { doc.setTitle(title); }
   if ((keywords != null) && (!keywords.equals(""))) { doc.setMetadata(keywords); }
   if ((summary != null) && (!summary.equals("")))   { doc.setSummary(summary); }
  }
View Full Code Here


   *            database is null the strings will not be resolved.
   */
  public static void writeDocumentInformation(PDDocument document,
      BibtexEntry entry, BibtexDatabase database) {

    PDDocumentInformation di = document.getDocumentInformation();

    if (database != null)
      entry = database.resolveForStrings(entry, false);

    // Set all the values including key and entryType
    Set<String> fields = entry.getAllFields();

    for (String field : fields){
      if (field.equals("author")) {
        di.setAuthor(entry.getField("author").toString());
      } else if (field.equals("title")) {
        di.setTitle(entry.getField("title").toString());
      } else if (field.equals("keywords")) {
        di.setKeywords(entry.getField("keywords").toString());
      } else if (field.equals("abstract")) {
        di.setSubject(entry.getField("abstract").toString());
      } else {
        di.setCustomMetadataValue("bibtex/" + field.toString(),
            entry.getField(field.toString()).toString());
      }
    }
    di
        .setCustomMetadataValue("bibtex/entrytype", entry.getType()
            .getName());
  }
View Full Code Here

            // Add the tag-stripped contents as a Reader-valued Text field so it will
            // get tokenized and indexed.
            addTextField( document, "contents", reader );

            PDDocumentInformation info = pdfDocument.getDocumentInformation();
            if( info != null )
            {
                addTextField( document, "Author", info.getAuthor() );
                addTextField( document, "CreationDate", info.getCreationDate() );
                addTextField( document, "Creator", info.getCreator() );
                addTextField( document, "Keywords", info.getKeywords() );
                addTextField( document, "ModificationDate", info.getModificationDate() );
                addTextField( document, "Producer", info.getProducer() );
                addTextField( document, "Subject", info.getSubject() );
                addTextField( document, "Title", info.getTitle() );
                addTextField( document, "Trapped", info.getTrapped() );
            }
            int summarySize = Math.min( contents.length(), 500 );
            String summary = contents.substring( 0, summarySize );
            // Add the summary as an UnIndexed field, so that it is stored and returned
            // with hit documents for display.
View Full Code Here

        }
        if( source.isEncrypted() )
        {
            throw new IOException( "Error: source PDF is encrypted, can't append encrypted PDF documents." );
        }
        PDDocumentInformation destInfo = destination.getDocumentInformation();
        PDDocumentInformation srcInfo = source.getDocumentInformation();
        destInfo.getDictionary().mergeInto( srcInfo.getDictionary() );
       
        PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
        PDDocumentCatalog srcCatalog = source.getDocumentCatalog();

        if( destCatalog.getOpenAction() == null )
View Full Code Here

     *
     * @return The thread information.
     */
    public PDDocumentInformation getThreadInfo()
    {
        PDDocumentInformation retval = null;
        COSDictionary info = (COSDictionary)thread.getDictionaryObject( "I" );
        if( info != null )
        {
            retval = new PDDocumentInformation( info );
        }
       
        return retval;
    }
View Full Code Here

                {
                    System.err.println( "Error: Cannot add metadata to encrypted document." );
                    System.exit( 1 );
                }
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentInformation info = document.getDocumentInformation();
               
                //Right now, PDFBox does not have any XMP library, so we will
                //just consruct the XML by hand.
                StringBuffer xmp= new StringBuffer();
                xmp.append(
                "<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n" +
                "<?adobe-xap-filters esc=\"CRLF\"?>\n" +
                "<x:xmpmeta>\n" +
                "    <rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>\n" +
                "        <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/' " +
                                         "pdf:Keywords='" + fixNull( info.getKeywords() ) + "' " +
                                         "pdf:Producer='" + fixNull( info.getProducer() ) + "'></rdf:Description>\n" +
                "        <rdf:Description rdf:about='' xmlns:xap='http://ns.adobe.com/xap/1.0/' " +
                                         "xap:ModifyDate='" + fixNull( info.getModificationDate() ) + "' " +
                                         "xap:CreateDate='" + fixNull( info.getCreationDate() ) + "' " +
                                         "xap:CreatorTool='" + fixNull( info.getCreator() ) + "' " +
                                         "xap:MetadataDate='" + fixNull( new GregorianCalendar() )+ "'>\n" +
                "        </rdf:Description>\n" +
                "        <rdf:Description rdf:about='' xmlns:dc='http://purl.org/dc/elements/1.1/' " +
                                         "dc:format='application/pdf'>\n" +
                "            <dc:title>\n" +
                "                <rdf:Alt>\n" +
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getTitle() ) +"</rdf:li>\n" +
                "                </rdf:Alt>\n" +
                "            </dc:title>\n" +
                "            <dc:creator>\n" +
                "                <rdf:Seq>\n" +
                "                    <rdf:li>PDFBox.org</rdf:li>\n" +
                "                </rdf:Seq>\n" +
                "            </dc:creator>\n" +
                "            <dc:description>\n" +
                "                <rdf:Alt>\n" +
                "                    <rdf:li xml:lang='x-default'>" + fixNull( info.getSubject() ) +"</rdf:li>\n" +
                "                </rdf:Alt>\n" +
                "            </dc:description>\n" +
                "        </rdf:Description>\n" +
                "    </rdf:RDF>\n" +
                "</x:xmpmeta>\n" );
View Full Code Here

     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printMetadata( PDDocument document ) throws IOException
    {
        PDDocumentInformation info = document.getDocumentInformation();
        PDDocumentCatalog cat = document.getDocumentCatalog();
        PDMetadata metadata = cat.getMetadata();
        System.out.println( "Page Count=" + document.getNumberOfPages() );
        System.out.println( "Title=" + info.getTitle() );
        System.out.println( "Author=" + info.getAuthor() );
        System.out.println( "Subject=" + info.getSubject() );
        System.out.println( "Keywords=" + info.getKeywords() );
        System.out.println( "Creator=" + info.getCreator() );
        System.out.println( "Producer=" + info.getProducer() );
        System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) );
        System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) );
        System.out.println( "Trapped=" + info.getTrapped() );
        if( metadata != null )
        {
            System.out.println( "Metadata=" + metadata.getInputStreamAsString() );
        }
    }
View Full Code Here

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getNumberOfPages()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

TOP

Related Classes of org.pdfbox.pdmodel.PDDocumentInformation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.