package com.google.code.ftspc.lector.parsers.RTF;
import com.google.code.ftspc.lector.indexers.AddDataToIndex;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import com.google.code.ftspc.lector.parsers.Parser;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.swing.text.Document;
import javax.swing.text.rtf.RTFEditorKit;
/**
* Class for the DOC parser
* @author Arthur Khusnutdinov
*/
public class RTFParser extends Thread implements Parser {
private String pathToFile;
private String fileName;
@Override
public void run() {
InputStream isr = null;
try {
isr = new FileInputStream(pathToFile);
String fileContent = "";
String fileContentForLanguageDetermination = "";
RTFEditorKit RTFEditorKit = new RTFEditorKit();
Document RTFdoc = RTFEditorKit.createDefaultDocument();
String lang = "";
RTFEditorKit.read(isr, RTFdoc, 0);
fileContent = RTFdoc.getText(0, RTFdoc.getLength()).trim();
fileContentForLanguageDetermination =
new String(fileContent.getBytes("ISO-8859-1"), "cp1251");
if (fileContentForLanguageDetermination.length() < 1000) {
lang = Vars.TextCategorizerLocal.categorize(
fileContentForLanguageDetermination.substring(0,
fileContentForLanguageDetermination.length()));
} else {
lang = Vars.TextCategorizerLocal.categorize(
fileContentForLanguageDetermination.substring(0, 1000));
}
if (lang.equals("russian")) {
fileContentForLanguageDetermination =
new String(fileContentForLanguageDetermination.getBytes("UTF-8"));
fileContent = fileContentForLanguageDetermination;
}
fileContentForLanguageDetermination = null;
AddDataToIndex AddDataToIndex = new AddDataToIndex(lang);
AddDataToIndex.doAddData(fileContent, pathToFile, fileName);
AddDataToIndex = null;
fileContent = null;
RTFEditorKit = null;
RTFdoc = null;
lang = null;
Vars.current_run_indexes--;
} catch (Exception ex) {
Vars.current_run_indexes--;
Vars.logger.fatal(ex.getMessage(), ex);
} finally {
try {
isr.close();
} catch (IOException ex) {
Vars.logger.fatal(ex.getMessage(), ex);
}
}
}
@Override
public void start_th(String pathToFile, String fileName) {
this.pathToFile = pathToFile;
this.fileName = fileName;
this.start();
}
}