/*
* Copyright (C) 2004 Paul Browne, http://www.firstpartners.net,
* built with the help of Fast-Soft (fastsoftdev@yahoo.com)
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
* This product includes software developed by the
* Apache Software Foundation (http://www.apache.org)."
*
* This product includes software developed by the
* Spring Framework Project (http://www.springframework.org)."
*
*/
package net.fp.rp.search.back.extractor;
import net.fp.rp.common.exception.RpException;
import net.fp.rp.search.back.extractor.util.UtilExtract;
import net.fp.rp.search.common.AddInfo;
import net.fp.rp.search.mid.global.PluginManager;
import net.fp.rp.search.plugins.IDataExtractor;
import net.fp.rp.search.plugins.INewInformation;
import org.apache.log4j.Logger;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Extracts information from a File Tree (Directory) in a format that can be
* added to an Index.
*
* @author brownpa
* Copyright @link www.firstpartners.net/red
*/
public class FileTreeDataExtractor implements IDataExtractor {
/** Logger for this class and subclasses */
protected final Logger logger = Logger.getLogger(getClass());
/** Extractor type */
private String type;
/** Maxim process document size */
private int maxFileSize;
/**
* The original place where we got this data
*
* @return pointer
*/
public String getOriginalUri() {
return "";
}
/**
* Carry out any initiation tasks
*/
public void onLoad() {
}
/**
* How well the plugin thinks it can handle a new piece of information
*
* @param info
*
* @return
*/
public int canHandle(INewInformation info) {
logger.debug(
"TreeExtractor - validate handling of the information from " +
info.getUri());
int returnInt = 0;
//if the specified content is a directory (file protocol) then -> maxim value
if (info.getUri().startsWith(UtilExtract.FILE_BASE)) {
File f = new File(info.getUri().substring(UtilExtract.FILE_BASE.length()));
if (f.isDirectory()) {
returnInt = 14;
}
}
return returnInt;
}
/**
* Comvert the tree information into a list of documents
*
* @param info Information to be converted
*
* @throws RpException If an error occur in xml processing
*/
public void convert(INewInformation info) throws RpException {
logger.info("TreeExtractor handling location :" + info.getUri() +
" with level " + info.getLevel());
//extraction the information from the files only if the level is present (>0)
if (info.getLevel() > 0) {
try {
URL url = new URL(info.getUri());
//if the protocol is file and is directory
File folder = new File(url.getPath());
if ((info.getUri().startsWith(UtilExtract.FILE_BASE)) &&
folder.isDirectory()) {
logger.debug("Process the files inside folder");
//get the list of the
File[] list = folder.listFiles();
//for each item call recursively the DataExtractor
for (int i = 0; i < list.length; i++) {
logger.debug("Process the file :" + list[i].getPath());
//validate if the size is too big
if (list[i].length() > maxFileSize) {
logger.warn("File " + list[i].getPath() +
" is too big and will not be process");
} else {
//must specify the category location, category name from the actual information
INewInformation childInfo = new AddInfo(info.getCategoryLocation(),
info.getCategoryName(), list[i].getPath(),
info.getLevel() - 1);
IDataExtractor extractor = PluginManager.getBestExtractor(childInfo);
if (extractor != null) {
logger.debug("Best extractor for location " +
list[i].getPath() + " is :" +
extractor.getClass().getName());
//process the information with level =1
try {
extractor.convert(childInfo);
} catch (RpException e) {
//no exception to be thrown -> continue the add
logger.debug("Error in extracting the data " +
e.getMessage(), e);
}
} else {
logger.warn(
"No extractor is available for extract the data " +
list[i].getPath());
}
}
}
} else {
throw new RpException("treeextractor.folder.notexists",
new Object[] { info.getUri() });
}
} catch (MalformedURLException e) {
logger.warn("Specified uri is not valid " + e.getMessage(), e);
throw new RpException("extractor.tree.foldernotexists",
new Object[] { info.getUri() });
}
} else {
logger.debug(
"Current level for the information is already 0-> no futher process for this location");
}
}
/**
* Return the list of supported extensions
*
* @return List of supported extesnsions
*/
public String[] getListExtensions() {
//no extension supported
return new String[] { };
}
/**
* Get the maxim file size which can be proces
*
* @return Maxim file size
*/
public int getMaxFileSize() {
return maxFileSize;
}
/**
* Set the maxim file size which can be process
*
* @param maxFileSize Maxim file size allowed.
*/
public void setMaxFileSize(int maxFileSize) {
this.maxFileSize = maxFileSize;
}
/**
* Get the extractor type
*
* @return Type of the extractor
*/
public String getType() {
return type;
}
/**
* Set the extractor type
*
* @param type Extractor type
*/
public void setType(String type) {
this.type = type;
}
}