Examples of PDDocumentInformation


Examples of org.pdfbox.pdmodel.PDDocumentInformation

     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printMetadata( PDDocument document ) throws IOException
    {
        PDDocumentInformation info = document.getDocumentInformation();
        PDDocumentCatalog cat = document.getDocumentCatalog();
        PDMetadata metadata = cat.getMetadata();
        System.out.println( "Page Count=" + document.getNumberOfPages() );
        System.out.println( "Title=" + info.getTitle() );
        System.out.println( "Author=" + info.getAuthor() );
        System.out.println( "Subject=" + info.getSubject() );
        System.out.println( "Keywords=" + info.getKeywords() );
        System.out.println( "Creator=" + info.getCreator() );
        System.out.println( "Producer=" + info.getProducer() );
        System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) );
        System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) );
        System.out.println( "Trapped=" + info.getTrapped() );
        if( metadata != null )
        {
            System.out.println( "Metadata=" + metadata.getInputStreamAsString() );
        }
    }
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getNumberOfPages()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      metadata.add(Metadata.PAGE_COUNT, String.valueOf(pdf.getPageCount()));
      metadata.add(Metadata.AUTHOR, info.getAuthor());
      metadata.add(Metadata.SUBJECT, info.getSubject());
      metadata.add(Metadata.KEYWORDS, info.getKeywords());
      metadata.add(Metadata.CREATOR, info.getCreator());
      metadata.add(Metadata.PUBLISHER, info.getProducer());
     
      //TODO: Figure out why we get a java.io.IOException: Error converting date:1-Jan-3 18:15PM
      //error here
     
      //metadata.put(DATE, dcDateFormatter.format(info.getCreationDate().getTime()));
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

                String title = "";
                String summary = "";

                //get the additional data
                try {
                    PDDocumentInformation pdfinfo = document.getDocumentInformation();

                    if (!Util.isEmpty(pdfinfo.getAuthor())) {
                        author = pdfinfo.getAuthor();
                    }

                    if (!Util.isEmpty(pdfinfo.getTitle())) {
                        title = pdfinfo.getTitle();
                    }

                    if (!Util.isEmpty(pdfinfo.getSubject())) {
                        summary = pdfinfo.getSubject();
                    }
                } catch (Exception eR) {
                    String message = MessageUtil.getMessage("extractor.pdf.metadatamissing",
                            new Object[] { info.getUri() });
                    logger.info(message);
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

                log.error("parse() failed", ioe);
                throw new ConverterException("PDFConverter::parse() failed", ioe);
            }

            // get the meta data
            PDDocumentInformation info = document.getDocumentInformation();
            documentTitle = info.getTitle();
            documentAuthor = info.getAuthor();
            documentKeywords = info.getKeywords();
            if (document != null) {
                documentText = output.toString();
            }
        }
        catch (IOException ioe) {
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      // pdf.getPageCount();
      // info.getAuthor()
      // info.getSubject()
      // info.getKeywords()
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      // pdf.getPageCount();
      // info.getAuthor()
      // info.getSubject()
      // info.getKeywords()
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

      // collect text
      PDFTextStripper stripper = new PDFTextStripper();
      text = stripper.getText(pdf);

      // collect title
      PDDocumentInformation info = pdf.getDocumentInformation();
      title = info.getTitle();
      // more useful info, currently not used. please keep them for future use.
      // pdf.getPageCount();
      // info.getAuthor()
      // info.getSubject()
      // info.getKeywords()
View Full Code Here

Examples of org.pdfbox.pdmodel.PDDocumentInformation

             *   Subject -> description.abstract
             *   Keywords -> subject.other
             *    date is java.util.Calendar
             */
            PDDocument pd = new PDDocument(cos);
            PDDocumentInformation docinfo = pd.getDocumentInformation();
            String title = docinfo.getTitle();

            // sanity check: item must have a title.
            if (title == null)
                throw new MetadataValidationException("This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary.");
            log.debug("PDF Info dict title=\""+title+"\"");
            item.addDC("title", null, "en", title);
            String value;
            Calendar date;
            if ((value = docinfo.getAuthor()) != null)
            {
                item.addDC("contributor", "author", null, value);
                log.debug("PDF Info dict author=\""+value+"\"");
            }
            if ((value = docinfo.getCreator()) != null)
                item.addDC("description", "provenance", "en",
                              "Application that created the original document: "+value);
            if ((value = docinfo.getProducer()) != null)
                item.addDC("description", "provenance", "en",
                              "Original document converted to PDF by: "+value);
            if ((value = docinfo.getSubject()) != null)
                item.addDC("description", "abstract", null, value);
            if ((value = docinfo.getKeywords()) != null)
                item.addDC("subject", "other", null, value);

            // Take either CreationDate or ModDate as "date.created",
            // Too bad there's no place to put "last modified" in the DC.
            Calendar calValue;
            if ((calValue = docinfo.getCreationDate()) == null)
                calValue = docinfo.getModificationDate();
            if (calValue != null)
                item.addDC("date", "created", null,
                             (new DCDate(calValue.getTime())).toString());
            item.update();
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.