Package org.apache.pdfbox.pdfparser

Examples of org.apache.pdfbox.pdfparser.PDFParser


  @Override
  public String getText(final InputStream source) {
    try {
      Assert.notNull(source, "source");

      PDFParser parser = new PDFParser(source);
      parser.parse();

      PDDocument document = parser.getPDDocument();
      PDFTextStripper stripper = new PDFTextStripper();

      String text = stripper.getText(document);

      document.close();
View Full Code Here


    if (inputStream == null) {
      throw new DocumentException("Invalid PDF source type");
    }

    PDFParser reader = new PDFParser(inputStream);
    return reader;
  }
View Full Code Here

      if (end == null) {
        end = Integer.valueOf(0);
      }

      PDFParser parser = PDFBox.read(source);
      parser.parse();

      PDDocument document = parser.getPDDocument();

      Splitter splitter = new Splitter();
      splitter.setSplitAtPage(1);

      List<PDDocument> list = splitter.split(document);
View Full Code Here

      if (source == null) {
        throw new DocumentException("Parameter 'source' + must be set");
      }

      PDFParser parser = PDFBox.read(source);
      parser.parse();

      PDDocument document = parser.getPDDocument();
      PDFTextStripper stripper = new PDFTextStripper();

      String text = stripper.getText(document);

      document.close();
View Full Code Here

      if (size == null) {
        throw new DocumentException("Parameter 'size' must be set");
      }

      PDFParser parser = PDFBox.read(source);
      parser.parse();

      PDDocument document = parser.getPDDocument();
      Splitter splitter = new Splitter();
      splitter.setSplitAtPage(size.intValue());

      List<PDDocument> list = splitter.split(document);
      InputStream[] array = new InputStream[list.size()];
View Full Code Here

     logger.debug("extracting pdf file");
   File file = null;
     PDDocument document = null;
     Writer output = null;
     try {
       PDFParser parser = new PDFParser(is);
       parser.parse();
       document = parser.getPDDocument();
       if (document.isEncrypted()) {
           DocumentEncryption decryptor = new DocumentEncryption(document);
           if (logger.isDebugEnabled()) {
               logger.debug("pdf document appears to be encrypted (will attempt decryption)");
          
View Full Code Here

     *
     * @throws IOException If there is an error reading from the stream.
     */
    public static PDDocument load( InputStream input, RandomAccess scratchFile ) throws IOException
    {
        PDFParser parser = new PDFParser( new BufferedInputStream( input ), scratchFile );
        parser.parse();
        return parser.getPDDocument();
    }
View Full Code Here

     *
     * @throws IOException If there is an error reading from the stream.
     */
    public static PDDocument load(InputStream input, RandomAccess scratchFile, boolean force) throws IOException
    {
        PDFParser parser = new PDFParser( new BufferedInputStream( input ), scratchFile, force);
        parser.parse();
        return parser.getPDDocument();
    }
View Full Code Here

    * @param is Document content
    * @return
    * @throws IOException
    */
    public static COSDocument parseDocument(final InputStream is) throws IOException {
       PDFParser parser = null;
       parser = new PDFParser(is);
       parser.parse();
       return parser.getDocument();
   }
View Full Code Here

     *
     * @throws IOException If there is an error reading from the stream.
     */
    public static PDDocument load( InputStream input, RandomAccess scratchFile ) throws IOException
    {
        PDFParser parser = new PDFParser( new BufferedInputStream( input ) , scratchFile );
        parser.parse();
        return parser.getPDDocument();
    }
View Full Code Here

TOP

Related Classes of org.apache.pdfbox.pdfparser.PDFParser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.