Package org.pdfbox.util

Examples of org.pdfbox.util.PDFTextStripper


   
    return super.parseProperty( key, values, metadata );
  }

  public PdfDocumentFactory() throws IOException {
    this.textStripper= new PDFTextStripper();
    this.wordReader = new FastBufferedReader();
  }
View Full Code Here


    this.wordReader = new FastBufferedReader();
  }
 
  public PdfDocumentFactory( final Properties properties ) throws IOException, ConfigurationException {
    super( properties );
    this.textStripper= new PDFTextStripper();
    this.wordReader = new FastBufferedReader();
  }
View Full Code Here

    this.wordReader = new FastBufferedReader();
  }

  public PdfDocumentFactory( final Reference2ObjectMap<Enum<?>,Object> defaultMetadata ) throws IOException {
    super( defaultMetadata );
    this.textStripper= new PDFTextStripper();
    this.wordReader = new FastBufferedReader();
  }
View Full Code Here

    this.wordReader = new FastBufferedReader();
  }

  public PdfDocumentFactory( final String[] property ) throws IOException, ConfigurationException {
    super( property );
    this.textStripper= new PDFTextStripper();
    this.wordReader = new FastBufferedReader();
  }
View Full Code Here

    return FieldType.TEXT;
  }

  private void readObject( final ObjectInputStream s ) throws IOException, ClassNotFoundException {
    s.defaultReadObject();
    textStripper = new PDFTextStripper();
  }
View Full Code Here

  return; }

  //*-- extract PDF document's textual content
  String docText = null;
  try
  { PDFTextStripper stripper = new PDFTextStripper();
    docText = stripper.getText(new PDDocument(cosDoc));
  }
  catch (OutOfMemoryError exc)
  { closeCOSDocument(cosDoc);
    logger.error("Ran out of memory for " + ifile + " or could be corrupt file " + exc.getMessage());
  }
View Full Code Here

    {
        URL url = new URL("http://localhost:8080/xwiki/bin/export/Main/WebHome?format=pdf");
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        InputStream is = connection.getInputStream();
        PDDocument pdd = PDDocument.load(is);
        PDFTextStripper stripper = new PDFTextStripper();
        String text = stripper.getText(pdd);
        pdd.close();
        is.close();

        assertTrue("Invalid content", text.contains("Welcome to your wiki"));
    }
View Full Code Here

      document = PDDocument.load(bis);
      if (document.isEncrypted()) {
        throw new DocumentAccessException("PDF is encrypted. Can not read content file=" + leaf.getName());
      }     
      if (log.isDebug()) log.debug("readContent PDDocument loaded");
      PDFTextStripper stripper = new PDFTextStripper();
      return stripper.getText(document);
    } finally {
      if (document != null) {
        document.close();
      }
      if (bis != null) {
View Full Code Here

            //create a writer where to append the text content.
            StringWriter writer = new StringWriter();
            if( stripper == null )
            {
                stripper = new PDFTextStripper();
            }
            else
            {
                stripper.resetEngine();
            }
View Full Code Here

     * @throws IOException If there is an error creating the test.
     */
    public TestTextStripper( String name ) throws IOException
    {
        super( name );
        stripper = new PDFTextStripper();
        stripper.setLineSeparator("\n");
    }
View Full Code Here

TOP

Related Classes of org.pdfbox.util.PDFTextStripper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.