Package com.google.code.ftspc.lector.parsers.PDF

Source Code of com.google.code.ftspc.lector.parsers.PDF.PDFParserLocal

package com.google.code.ftspc.lector.parsers.PDF;

import com.google.code.ftspc.lector.indexers.AddDataToIndex;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.File;
import java.io.FileInputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

/**
*
* @author Arthur Khusnutdinov
*/
public class PDFParserLocal extends Thread implements Parser {

    private String pathToFile;
    private String fileName;

    @Override
    public void run() {
        try {
            String fileContent = "";
            File filePDF = new File(pathToFile);
            PDDocument pdDoc = PDDocument.load(new FileInputStream(filePDF));
            PDFTextStripper PDFTextStripper = null;

            Integer numberOfPages = pdDoc.getNumberOfPages();

            for (int page = 0; page < numberOfPages; page++) {
                PDFTextStripper = new PDFTextStripper("UTF-8");
                PDFTextStripper.setStartPage(page);
                PDFTextStripper.setEndPage(page);
                String text = PDFTextStripper.getText(pdDoc);
                fileContent += " " + text.replaceAll("\\s+", " ").trim();
                text = null;
            }

            pdDoc.close();
            pdDoc = null;
            filePDF = null;
            PDFTextStripper = null;

            if (fileContent.length() < 1) {
                ////////////////////
            }
            AddDataToIndex AddDataToIndex = new AddDataToIndex(null);
            AddDataToIndex.doAddData(fileContent, pathToFile, fileName);

            fileContent = null;
            Vars.current_run_indexes--;

        } catch (Exception ex) {
            Vars.current_run_indexes--;
            Vars.logger.fatal("Error: ", ex);
        }
    }

    @Override
    public void start_th(String pathToFile, String fileName) {
        this.pathToFile = pathToFile;
        this.fileName = fileName;
        this.start();
    }
}
TOP

Related Classes of com.google.code.ftspc.lector.parsers.PDF.PDFParserLocal

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.