/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.google.code.ftspc.lector.parsers.CHM;
import com.google.code.ftspc.lector.indexers.CommonFunctions;
import com.google.code.ftspc.lector.ini_and_vars.Vars;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import net.htmlparser.jericho.Source;
/**
* This class now for testing only.
* @author Arthur Khusnutdinov
*/
class CHMParserFunctions extends CommonFunctions {
public boolean deleteDirectory(File path) {
if (path.exists()) {
File[] files = path.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
deleteDirectory(files[i]);
} else {
files[i].delete();
}
}
}
return (path.delete());
}
public String processUnpackedObjects(File path) {
if (path.exists()) {
String localgeneralContent = "";
FileFilter filefilter = new FileFilter() {
@Override
public boolean accept(File file) {
if (file.getName().endsWith(".htm")
|| file.getName().endsWith(".html")) {
return true;
}
return false;
}
};
/*
* Let's process html files.
*/
for (File fileInDir : path.listFiles(filefilter)) {
String textFromHTML = getTextFromHTML(fileInDir);
localgeneralContent += " " + textFromHTML;
}
/*
* Let's process dirs.
*/
for (File fileInDir : path.listFiles()) {
if (fileInDir.isDirectory()) {
localgeneralContent += " " + processUnpackedObjects(fileInDir);
}
}
return localgeneralContent;
} else {
return " ";
}
}
String getTextFromHTML(File fileInDir) {
try {
String fileContent = "";
String pathToFile = fileInDir.getAbsolutePath();
File fileForParsing;
int length;
String fileEnc = this.detectEncoding(fileInDir.getAbsolutePath());
fileForParsing = new File(pathToFile);
length = (int) fileForParsing.length();
if (length != 0) {
Source source;
char[] cbuf = new char[length];
InputStreamReader isr = new InputStreamReader(
new FileInputStream(fileForParsing), fileEnc);
final int read = isr.read(cbuf);
fileContent = new String(cbuf, 0, read);
isr.close();
source = new Source(fileContent);
source.setLogger(null);
fileContent = source.getTextExtractor().toString();
pathToFile = null;
source = null;
isr = null;
fileForParsing = null;
if (!fileEnc.equals("UTF-8")) {
return new String(fileContent.getBytes("UTF-8"), "UTF-8");
} else {
return fileContent;
}
} else {
pathToFile = null;
fileContent = null;
fileForParsing = null;
return null;
}
} catch (Exception ex) {
Vars.logger.fatal("Error: ", ex);
return null;
}
}
}