Package org.pdfbox.pdmodel

Examples of org.pdfbox.pdmodel.PDDocument


   * @param xmpString
   * @throws Exception
   */
  public void writeManually(File tempFile, String xmpString) throws Exception {

    PDDocument document = null;

    try {
      document = PDDocument.load(tempFile.getAbsoluteFile());
      if (document.isEncrypted()) {
        System.err
            .println("Error: Cannot add metadata to encrypted document.");
        System.exit(1);
      }
      PDDocumentCatalog catalog = document.getDocumentCatalog();

      // Convert to UTF8 and make available for metadata.
      ByteArrayOutputStream bs = new ByteArrayOutputStream();
      OutputStreamWriter os = new OutputStreamWriter(bs, "UTF8");
      os.write(xmpString);
      os.close();
      ByteArrayInputStream in = new ByteArrayInputStream(bs.toByteArray());

      PDMetadata metadataStream = new PDMetadata(document, in, false);
      catalog.setMetadata(metadataStream);

      document.save(tempFile.getAbsolutePath());

    } finally {
      if (document != null)
        document.close();
    }
  }
View Full Code Here


   */
  public void setUp() throws IOException, COSVisitorException {

    pdfFile = File.createTempFile("JabRef", ".pdf");

    PDDocument pdf = null;
    try {
      pdf = new PDDocument();
      pdf.addPage(new PDPage()); // Need page to open in Acrobat
      pdf.save(pdfFile.getAbsolutePath());
    } finally {
      if (pdf != null)
        pdf.close();
    }

    // Don't forget to initialize the preferences
    if (Globals.prefs == null) {
      Globals.prefs = JabRefPreferences.getInstance();
View Full Code Here

    assertEquals(BibtexEntryType.ARTICLE, e.getType());
  }

  public static String readManually(File tempFile) throws IOException {

    PDDocument document = null;

    try {
      document = PDDocument.load(tempFile.getAbsoluteFile());
      if (document.isEncrypted()) {
        System.err
            .println("Error: Cannot add metadata to encrypted document.");
        System.exit(1);
      }
      PDDocumentCatalog catalog = document.getDocumentCatalog();
      PDMetadata meta = catalog.getMetadata();

      if (meta == null) {
        return null;
      } else {
        // PDMetadata.getInputStreamAsString() does not work

        // Convert to UTF8 and make available for metadata.
        InputStreamReader is = new InputStreamReader(meta
            .createInputStream(), "UTF8");
        return slurp(is).trim(); // Trim to kill padding end-newline.
      }
    } finally {
      if (document != null)
        document.close();
    }
  }
View Full Code Here

      assertEquals(t1BibtexEntry(), e);

      // This is what we really want to test: Is the rest of the
      // descriptions still there?

      PDDocument document = null;
      try {
        document = PDDocument.load(pdfFile.getAbsoluteFile());
        if (document.isEncrypted()) {
          throw new IOException(
              "Error: Cannot read metadata from encrypted document.");
        }
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw != null) {
          meta = new XMPMetadata(XMLUtil.parse(metaRaw
              .createInputStream()));
        } else {
          meta = new XMPMetadata();
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
            XMPSchemaBibtex.class);

        List<XMPSchema> schemas = meta.getSchemas();

        assertEquals(4, schemas.size());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        assertEquals(1, schemas.size());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
        assertEquals("application/pdf", dc.getFormat());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
        assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());

        Calendar c = Calendar.getInstance();
        c.clear();
        c.set(Calendar.YEAR, 2006);
        c.set(Calendar.MONTH, Calendar.AUGUST);
        c.set(Calendar.DATE, 7);
        c.set(Calendar.HOUR, 14);
        c.set(Calendar.MINUTE, 44);
        c.set(Calendar.SECOND, 24);
        c.setTimeZone(TimeZone.getTimeZone("GMT+2"));

        Calendar other = bs.getCreateDate();

        assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
        assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
        assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
        assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
        assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
        assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
        assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas
            .get(0);
        assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));

      } finally {
        if (document != null) {
          document.close();
        }
      }
    }

    { // Now alter the Bibtex entry, write it and do all the checks again
      BibtexEntry toSet = t1BibtexEntry();
      toSet.setField("author", "Pokemon!");

      XMPUtil.writeXMP(pdfFile, toSet, null);

      List l = XMPUtil.readXMP(pdfFile.getAbsoluteFile());
      assertEquals(1, l.size());
      BibtexEntry e = (BibtexEntry) l.get(0);

      assertEquals(toSet, e);

      // This is what we really want to test: Is the rest of the
      // descriptions still there?

      PDDocument document = null;
      try {
        document = PDDocument.load(pdfFile.getAbsoluteFile());
        if (document.isEncrypted()) {
          throw new IOException(
              "Error: Cannot read metadata from encrypted document.");
        }
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        XMPMetadata meta;
        if (metaRaw != null) {
          meta = new XMPMetadata(XMLUtil.parse(metaRaw
              .createInputStream()));
        } else {
          meta = new XMPMetadata();
        }
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
            XMPSchemaBibtex.class);

        List schemas = meta.getSchemas();

        assertEquals(4, schemas.size());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
        assertEquals(1, schemas.size());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaDublinCore dc = (XMPSchemaDublinCore) schemas.get(0);
        assertEquals("application/pdf", dc.getFormat());

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaBasic.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaBasic bs = (XMPSchemaBasic) schemas.get(0);
        assertEquals("Acrobat PDFMaker 7.0.7", bs.getCreatorTool());

        Calendar c = Calendar.getInstance();
        c.clear();
        c.set(Calendar.YEAR, 2006);
        c.set(Calendar.MONTH, 7);
        c.set(Calendar.DATE, 7);
        c.set(Calendar.HOUR, 14);
        c.set(Calendar.MINUTE, 44);
        c.set(Calendar.SECOND, 24);
        c.setTimeZone(TimeZone.getTimeZone("GMT+2"));

        Calendar other = bs.getCreateDate();

        assertEquals(c.get(Calendar.YEAR), other.get(Calendar.YEAR));
        assertEquals(c.get(Calendar.MONTH), other.get(Calendar.MONTH));
        assertEquals(c.get(Calendar.DATE), other.get(Calendar.DATE));
        assertEquals(c.get(Calendar.HOUR), other.get(Calendar.HOUR));
        assertEquals(c.get(Calendar.MINUTE), other.get(Calendar.MINUTE));
        assertEquals(c.get(Calendar.SECOND), other.get(Calendar.SECOND));
        assertTrue(c.getTimeZone().hasSameRules(other.getTimeZone()));

        schemas = meta
            .getSchemasByNamespaceURI(XMPSchemaMediaManagement.NAMESPACE);
        assertEquals(1, schemas.size());
        XMPSchemaMediaManagement mm = (XMPSchemaMediaManagement) schemas
            .get(0);
        assertEquals("17", mm.getSequenceList("xapMM:VersionID").get(0));

      } finally {
        if (document != null) {
          document.close();
        }
      }
    }
  }
View Full Code Here

    List<BibtexEntry> l = new LinkedList<BibtexEntry>();
    l.add(t3BibtexEntry());

    XMPUtil.writeXMP(pdfFile, l, null, true);

    PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile());
    try {
      if (document.isEncrypted()) {
        System.err
            .println("Error: Cannot add metadata to encrypted document.");
        System.exit(1);
      }

      assertEquals("Kelly Clarkson and Ozzy Osbourne", document
          .getDocumentInformation().getAuthor());
      assertEquals("Hypersonic ultra-sound", document
          .getDocumentInformation().getTitle());
      assertEquals("Huey Duck and Dewey Duck and Louie Duck", document
          .getDocumentInformation().getCustomMetadataValue(
              "bibtex/editor"));
      assertEquals("Clarkson06", document.getDocumentInformation()
          .getCustomMetadataValue("bibtex/bibtexkey"));
      assertEquals("peanut,butter,jelly", document
          .getDocumentInformation().getKeywords());

      assertEquals(t3BibtexEntry(), XMPUtil
          .getBibtexEntryFromDocumentInformation(document
              .getDocumentInformation()));

      PDDocumentCatalog catalog = document.getDocumentCatalog();
      PDMetadata metaRaw = catalog.getMetadata();

      if (metaRaw == null) {
        fail();
        return;
      }

      XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw
          .createInputStream()));
      meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
          XMPSchemaBibtex.class);

      // Check Dublin Core
      List<XMPSchema> schemas = meta
          .getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");
      assertEquals(1, schemas.size());

      XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas
          .iterator().next();
      assertNotNull(dcSchema);

      assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
      assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
      assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
      assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
      assertEquals("Huey Duck", dcSchema.getContributors().get(0));
      assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
      assertEquals("Louie Duck", dcSchema.getContributors().get(2));
      assertEquals("Inproceedings", dcSchema.getTypes().get(0));
      assertEquals("bibtex/bibtexkey/Clarkson06", dcSchema
          .getRelationships().get(0));
      assertEquals("peanut", dcSchema.getSubjects().get(0));
      assertEquals("butter", dcSchema.getSubjects().get(1));
      assertEquals("jelly", dcSchema.getSubjects().get(2));

      /**
       * Bibtexkey, Journal, pdf, booktitle
       */
      assertEquals(4, dcSchema.getRelationships().size());

      assertEquals(t3BibtexEntry(), XMPUtil
          .getBibtexEntryFromDublinCore(dcSchema));

    } finally {
      document.close();
    }

  }
View Full Code Here

    List<BibtexEntry> l = new LinkedList<BibtexEntry>();
    l.add(t3BibtexEntry());

    XMPUtil.writeXMP(pdfFile, l, null, true);

    PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile());
    try {
      if (document.isEncrypted()) {
        System.err
            .println("Error: Cannot add metadata to encrypted document.");
        System.exit(1);
      }

      assertEquals("Kelly Clarkson and Ozzy Osbourne", document
          .getDocumentInformation().getAuthor());
      assertEquals("Hypersonic ultra-sound", document
          .getDocumentInformation().getTitle());
      assertEquals("Huey Duck and Dewey Duck and Louie Duck", document
          .getDocumentInformation().getCustomMetadataValue(
              "bibtex/editor"));
      assertEquals("Clarkson06", document.getDocumentInformation()
          .getCustomMetadataValue("bibtex/bibtexkey"));
      assertEquals("peanut,butter,jelly", document
          .getDocumentInformation().getKeywords());

      assertEquals(t3BibtexEntry(), XMPUtil
          .getBibtexEntryFromDocumentInformation(document
              .getDocumentInformation()));

      PDDocumentCatalog catalog = document.getDocumentCatalog();
      PDMetadata metaRaw = catalog.getMetadata();

      if (metaRaw == null) {
        fail();
      }

      XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw
          .createInputStream()));
      meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
          XMPSchemaBibtex.class);

      // Check Dublin Core
      List<XMPSchema> schemas = meta
          .getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");

      assertEquals(1, schemas.size());

      XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas
          .iterator().next();
      assertNotNull(dcSchema);

      assertEquals("Hypersonic ultra-sound", dcSchema.getTitle());
      assertEquals("1982-07", dcSchema.getSequenceList("dc:date").get(0));
      assertEquals("Kelly Clarkson", dcSchema.getCreators().get(0));
      assertEquals("Ozzy Osbourne", dcSchema.getCreators().get(1));
      assertEquals("Huey Duck", dcSchema.getContributors().get(0));
      assertEquals("Dewey Duck", dcSchema.getContributors().get(1));
      assertEquals("Louie Duck", dcSchema.getContributors().get(2));
      assertEquals("Inproceedings", dcSchema.getTypes().get(0));
      assertEquals("bibtex/bibtexkey/Clarkson06", dcSchema
          .getRelationships().get(0));
      assertEquals("peanut", dcSchema.getSubjects().get(0));
      assertEquals("butter", dcSchema.getSubjects().get(1));
      assertEquals("jelly", dcSchema.getSubjects().get(2));

      /**
       * Bibtexkey, Journal, pdf, booktitle
       */
      assertEquals(4, dcSchema.getRelationships().size());

      assertEquals(t3BibtexEntry(), XMPUtil
          .getBibtexEntryFromDublinCore(dcSchema));

    } finally {
      document.close();
    }

  }
View Full Code Here

      assertEquals(originalAuthors, AuthorList.getAuthorList(b.getField(
          "author").toString()));

      // Next check from Document Information
      PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile());
      try {

        assertEquals(originalAuthors, AuthorList.getAuthorList(document
            .getDocumentInformation().getAuthor()));

        b = XMPUtil.getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));

        // Now check from Dublin Core
        PDDocumentCatalog catalog = document.getDocumentCatalog();
        PDMetadata metaRaw = catalog.getMetadata();

        if (metaRaw == null) {
          fail();
        }

        XMPMetadata meta = new XMPMetadata(XMLUtil.parse(metaRaw
            .createInputStream()));
        meta.addXMLNSMapping(XMPSchemaBibtex.NAMESPACE,
            XMPSchemaBibtex.class);

        List<XMPSchema> schemas = meta
            .getSchemasByNamespaceURI("http://purl.org/dc/elements/1.1/");

        assertEquals(1, schemas.size());

        XMPSchemaDublinCore dcSchema = (XMPSchemaDublinCore) schemas
            .iterator().next();
        assertNotNull(dcSchema);

        assertEquals("David Patterson", dcSchema.getCreators().get(0));
        assertEquals("Arvind", dcSchema.getCreators().get(1));
        assertEquals("Krste Asanov\\'\\i{}c", dcSchema.getCreators()
            .get(2));

        b = XMPUtil.getBibtexEntryFromDublinCore(dcSchema);
        assertEquals(originalAuthors, AuthorList.getAuthorList(b
            .getField("author").toString()));
      } finally {
        document.close();
      }

    } finally {
      pdfFile.delete();
    }
View Full Code Here

  //*-- extract PDF document's textual content
  String docText = null;
  try
  { PDFTextStripper stripper = new PDFTextStripper();
    docText = stripper.getText(new PDDocument(cosDoc));
  }
  catch (OutOfMemoryError exc)
  { closeCOSDocument(cosDoc);
    logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage());
  }
  catch (Exception e)
  { closeCOSDocument(cosDoc);
    logger.error("Cannot get text from PDF document " + ifile + " " + e.getMessage());
    return;
  }
  //*-- Extract the entire text and save in the contents
  if (docText != null)
  { docText = StringTools.filterChars(docText); doc.setContents(new StringBuffer(docText) ); }

  //*-- Extract PDF document's meta-data
  PDDocument pdDoc = null;
  try
  {
   logger.info("Extracting metadata from PDF file " + ifile);
   pdDoc = new PDDocument(cosDoc);
   PDDocumentInformation docInfo = pdDoc.getDocumentInformation();
   String author   = StringTools.filterChars(docInfo.getAuthor());
   String title    = StringTools.filterChars(docInfo.getTitle());
   String keywords = StringTools.filterChars(docInfo.getKeywords());
   String summary  = StringTools.filterChars(docInfo.getSubject());
   if ((author != null) && (!author.equals("")))     { doc.setAuthor(author); }
View Full Code Here

  public static List<BibtexEntry> readXMP(InputStream inputStream)
      throws IOException {

    List<BibtexEntry> result = new LinkedList<BibtexEntry>();

    PDDocument document = null;

    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }

      XMPMetadata meta = getXMPMetadata(document);

      // If we did not find any XMP metadata, search for non XMP metadata
      if (meta != null) {

              List<XMPSchema> schemas = meta
                  .getSchemasByNamespaceURI(XMPSchemaBibtex.NAMESPACE);
       
              Iterator<XMPSchema> it = schemas.iterator();
              while (it.hasNext()) {
                XMPSchemaBibtex bib = (XMPSchemaBibtex) it.next();
       
                result.add(bib.getBibtexEntry());
              }
       
              // If we did not find anything have a look if a Dublin Core exists
              if (result.size() == 0) {
                schemas = meta
                    .getSchemasByNamespaceURI(XMPSchemaDublinCore.NAMESPACE);
                it = schemas.iterator();
                while (it.hasNext()) {
                  XMPSchemaDublinCore dc = (XMPSchemaDublinCore) it.next();
       
                  BibtexEntry entry = getBibtexEntryFromDublinCore(dc);
       
                  if (entry != null)
                    result.add(entry);
                }
              }
      }
      if (result.size() == 0) {
        BibtexEntry entry = getBibtexEntryFromDocumentInformation(document
            .getDocumentInformation());

        if (entry != null)
          result.add(entry);
      }
    } finally {
      if (document != null)
        document.close();
    }
   
    // return null, if no metadata was found
    if (result.size()==0) return null;
    return result;
View Full Code Here

   *         found.
   * @throws IOException
   */
  public static XMPMetadata readRawXMP(InputStream inputStream)
      throws IOException {
    PDDocument document = null;

    try {
      document = PDDocument.load(inputStream);
      if (document.isEncrypted()) {
        throw new EncryptionNotSupportedException(
            "Error: Cannot read metadata from encrypted document.");
      }

      return getXMPMetadata(document);

    } finally {
      if (document != null)
        document.close();
    }
  }
View Full Code Here

TOP

Related Classes of org.pdfbox.pdmodel.PDDocument

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.