package com.google.code.ftspc.lector.parsers.POI;
import com.google.code.ftspc.lector.indexers.AddDataToIndex;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
/**
* Class for the DOCX parser
* @author Arthur Khusnutdinov
*/
public class XDocParser extends Thread implements Parser {
private String pathToFile;
private String fileName;
@Override
public void run() {
try {
InputStream isr = new FileInputStream(pathToFile);
XWPFDocument document = new XWPFDocument(isr);
XWPFWordExtractor word = new XWPFWordExtractor(document);
String fileContent = word.getText();
AddDataToIndex.class.newInstance().doAddData(fileContent,
pathToFile, fileName);
Vars.current_run_indexes--;
} catch (Exception ex) {
Vars.current_run_indexes--;
Vars.logger.fatal("Error: ", ex);
}
}
@Override
public void start_th(String pathToFile, String fileName) {
this.pathToFile = pathToFile;
this.fileName = fileName;
this.start();
}
}