Package org.pdfbox.util

Examples of org.pdfbox.util.PDFTextStripper


        if( !cosDoc.isEncrypted())
        {
            // extract PDF document's textual content
            try
            {
                PDFTextStripper stripper = new PDFTextStripper();
                docText = stripper.getText(new PDDocument(cosDoc));
            }
            finally
            {
                try
                {
View Full Code Here


                  ByteArrayOutputStream baos = new ByteArrayOutputStream();
                  if(!document.isEncrypted())
                  {
                    output = new OutputStreamWriter(baos, "UTF-8");
 
                      PDFTextStripper stripper = new PDFTextStripper();
 
                      //stripper.setSortByPosition( sort );
                      //stripper.setStartPage( startPage );
                      //stripper.setEndPage( endPage );
                      stripper.writeText( document, output );
                      text = baos.toString("UTF-8");
                      if(logger.isInfoEnabled())
                logger.info("PDF Document has " + text.length() + " chars\n\n" + text);
                  }
              }
View Full Code Here

public class PdfDocumentParser implements DocumentParser {
    public String getStringContent(File f) throws ParsingException {
        PDDocument doc = null;
        try {
            doc = PDDocument.load(f);
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(doc);
            StringBuffer buf = new StringBuffer(text);
            for (int i = 0; i < buf.length(); i++) {
                if (((int) buf.charAt(i)) == 0) {
                    buf.deleteCharAt(i);
                }
View Full Code Here

TOP

Related Classes of org.pdfbox.util.PDFTextStripper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.