Examples of org.pdfbox.util.PDFTextStripper

org.pdfbox.util.PDFTextStripper

This class will take a pdf document and strip out all of the text and ignore the formatting and such. @author Ben Litchfield @version $Revision: 1.69 $

        if( !cosDoc.isEncrypted())
        {
            // extract PDF document's textual content
            try
            {
                PDFTextStripper stripper = new PDFTextStripper();
                docText = stripper.getText(new PDDocument(cosDoc));
            }
            finally
            {
                try
                {

                  ByteArrayOutputStream baos = new ByteArrayOutputStream();
                  if(!document.isEncrypted())
                  {
                    output = new OutputStreamWriter(baos, "UTF-8");
  
                      PDFTextStripper stripper = new PDFTextStripper();
  
                      //stripper.setSortByPosition( sort );
                      //stripper.setStartPage( startPage );
                      //stripper.setEndPage( endPage );
                      stripper.writeText( document, output );
                      text = baos.toString("UTF-8");
                      if(logger.isInfoEnabled())
                logger.info("PDF Document has " + text.length() + " chars\n\n" + text);
                  }
              }

public class PdfDocumentParser implements DocumentParser {
    public String getStringContent(File f) throws ParsingException {
        PDDocument doc = null;
        try {
            doc = PDDocument.load(f);
            PDFTextStripper stripper = new PDFTextStripper();
            String text = stripper.getText(doc);
            StringBuffer buf = new StringBuffer(text);
            for (int i = 0; i < buf.length(); i++) {
                if (((int) buf.charAt(i)) == 0) {
                    buf.deleteCharAt(i);
                }

Related Classes of org.pdfbox.util.PDFTextStripper

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.