/*
* Copyright (C) 2004 Paul Browne, http://www.firstpartners.net,
* built with the help of Fast-Soft (fastsoftdev@yahoo.com)
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
*/
package net.fp.rp.search.back.datastore;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Selector;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.rdf.model.impl.SelectorImpl;
import net.fp.rp.common.exception.RpException;
import net.fp.rp.search.back.datastore.vocabulary.RP;
import net.fp.rp.search.back.struct.DocumSetStruct;
import net.fp.rp.search.back.struct.DocumStruct;
import net.fp.rp.search.back.struct.NodeStruct;
import net.fp.rp.search.back.struct.TupleStruct;
import net.fp.rp.search.common.util.Util;
import net.fp.rp.search.mid.global.PluginManager;
import net.fp.rp.search.plugins.ICategoryStore;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
/**
* Simple Meta Data Store as an XML file.
*
* @author brownpa
* Copyright @link www.firstpartners.net/red
*/
public class BasicCategoryStore implements ICategoryStore {
/** Model name */
static final String MODEL_NAME = "node_";
/** Model extension */
static final String MODEL_EXTENSION = ".rdf";
/** Separator for the links list (categoryname/nodeid) */
private static final String SEPARATOR = "/";
/** Logger for this class and subclasses */
protected final Logger logger = Logger.getLogger(getClass());
/**
* Get the original location of the category from the model.
* Return the original location (category location of the first model
* from the category folder)
*
* @param path Category folder path
*
* @return Original location of the category
*
* @throws RpException If an error occur in reading the document location
*/
public String getOriginalLocation(final String path)
throws RpException {
File modelFolder = new File(path);
if (modelFolder.exists()) {
File[] list = modelFolder.listFiles(new FileFilter() {
public boolean accept(File arg0) {
if (arg0.getName().startsWith(MODEL_NAME)) {
//later extension for jar and zip
return true;
} else {
return false;
}
}
});
if (list.length > 0) {
logger.debug("Process the model from path:" +
list[0].getPath());
// create an empty model
Model model = ModelFactory.createDefaultModel();
//get the model path of the first file from the folder
String modelPath = list[0].getPath();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(
new FileInputStream(modelPath)), "");
//predicate for filtering the category location
Selector selector = new SelectorImpl(null,
RP.categoryLocation, (RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
if (iter.hasNext()) {
String location = iter.nextStatement().getObject()
.toString();
logger.debug("Original location was " + location);
return location;
} else {
logger.warn(
"Original location is not defined in the model ");
throw new RpException("document.location.missing");
}
} catch (FileNotFoundException e) {
logger.warn(
"Error in reading the model stored in the path " +
modelPath);
throw new RpException("document.model.read.error",
new Object[] { modelPath });
}
}
} else {
logger.warn("No model stored in the system path ");
throw new RpException("document.model.missing",
new Object[] { MODEL_NAME });
}
} else {
logger.warn("The category path doesn't exist");
throw new RpException("category.path.error", new Object[] { path });
}
}
/**
* Get the direct score for the specified document id in the category
*
* @param categName The category name
* @param documID The document id
*
* @return Direct score of the document
*
* @throws RpException If an error occur in reading the direct score of the
* document
*/
public double getDirectScore(final String categName, final String documID)
throws RpException {
//get the model path of the document/categoryname
String modelPath = getModelPath(categName, documID);
// create an empty model
Model model = ModelFactory.createDefaultModel();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(new FileInputStream(modelPath)),
"");
//predicate for filtering the category location
Selector selector = new SelectorImpl(null, RP.directScore,
(RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
if (iter.hasNext()) {
String directscore = iter.nextStatement().getObject()
.toString();
double score = 0.00;
try {
score = Double.valueOf(directscore).doubleValue();
} catch (NumberFormatException e) {
}
logger.debug("Direct score is " + score);
return score;
} else {
logger.warn("Direct Score is not defined in the model ");
throw new RpException("document.directscore.missing");
}
} catch (FileNotFoundException e) {
logger.warn("Error in reading the model stored in the path " +
modelPath);
throw new RpException("document.model.read.error",
new Object[] { modelPath });
}
}
}
/**
* Get the implied score for the specified document id in the category
*
* @param categName The category name
* @param documID The document id
*
* @return Implied score of the document
*
* @throws RpException If an error occur in calculate the implied score of
* the document
*/
public double getImpliedScore(final String categName, final String documID)
throws RpException {
//get the model path of the document/categoryname
String modelPath = getModelPath(categName, documID);
// create an empty model
Model model = ModelFactory.createDefaultModel();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(new FileInputStream(modelPath)),
"");
//predicate for filtering the category location
Selector selector = new SelectorImpl(null, RP.directScore,
(RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
if (iter.hasNext()) {
//get the statement
Statement stmt = iter.nextStatement();
//get the resource
Resource node = stmt.getSubject();
//get the direct score of the node
double directscore = Double.valueOf(node.getProperty(
RP.directScore).getObject().toString())
.doubleValue();
//get the calculated score of the node
double calcscore = Double.valueOf(node.getProperty(
RP.calcScore).getObject().toString())
.doubleValue();
double score = 0.00;
if (calcscore != 0) {
score = directscore / calcscore;
}
logger.debug("Implied score is " + score);
return score;
} else {
logger.warn(
"Direct Score for calculate the implied score is not defined in the model ");
throw new RpException("document.directscore.missing");
}
} catch (FileNotFoundException e) {
logger.warn("Error in reading the model stored in the path " +
modelPath);
throw new RpException("document.model.read.error",
new Object[] { modelPath });
}
}
}
/**
* Get the model path for the specified document in the category
*
* @param categoryname The category name
* @param documentid The document id
*
* @return The model path for document in the category
*/
private String getModelPath(final String categoryname,
final String documentid) {
//generate the folder using the specified name and store the original url
String categorypath = Util.addFolder(PluginManager.getCategoryManager()
.getRoot(),
categoryname);
//add model name + documentid + extension
String modelname = MODEL_NAME + documentid + MODEL_EXTENSION;
//generate modelpath
String modelpath = Util.addFolder(categorypath, modelname);
//generate model path
logger.debug("Generate the modelpath : " + modelpath);
//return the modelpath
return modelpath;
}
/**
* Update the document with the specified links list and user feedback
* (+/-)
*
* @param listLinks Links list to update the document
* @param userScore User feedback score (+/-)
*/
public void updateDocumentLinksAndScore(LinkedList listLinks, int userScore) {
//if no document is linked to the current categoryname/currentid no update
if (listLinks.size() == 0) {
return;
}
//iterate over the list
for (int i = 0; i < listLinks.size(); i++) {
DocumSetStruct currentDoc = (DocumSetStruct) listLinks.get(i);
LinkedList listElemUnique = new LinkedList();
for (int j = 0; j < listLinks.size(); j++) {
DocumSetStruct comp = (DocumSetStruct) listLinks.get(j);
//make a new list with the unique elements (categorname,documentid)
if ((i != j) &&
!(currentDoc.getCategoryName().equals(comp.getCategoryName()) &&
currentDoc.getDocumentId().equals(comp.getDocumentId()))) {
listElemUnique.add(comp.getCategoryName() + SEPARATOR +
comp.getDocumentId());
}
}
//get the model path for the
String modelpath = getModelPath(currentDoc.getCategoryName(),
currentDoc.getDocumentId());
//read the model from the specified path and update the calculated_score
logger.info("Read for update link/score from the model " +
modelpath);
Model model = ModelFactory.createDefaultModel();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(
new FileInputStream(modelpath)), "");
//Predicate for filtering the linkToId
Selector selector = new SelectorImpl(null, RP.linkToId,
(RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
//sum for implied score
double sumImpliedScore = 0.00;
while (iter.hasNext()) {
//get the statement
Statement stmt = iter.nextStatement();
//get the resource
Resource node = stmt.getSubject();
//get the resource properties (linktoid and linktocategory)
String linktoid = node.getProperty(RP.linkToId)
.getObject().toString();
String linktocategory = node.getProperty(RP.linkToCategory)
.getObject().toString();
//construct the unique element (categname/documid)
String compare = linktocategory + SEPARATOR + linktoid;
//validate if the element was not already add it
if (listElemUnique.contains(compare)) {
listElemUnique.remove(compare);
//get the implied score
try {
sumImpliedScore = sumImpliedScore +
getImpliedScore(linktocategory, linktoid);
} catch (RpException e) {
logger.info("Exception in calculate the implied weight",
e);
}
}
}
//if there are still links-elements in the list and must to be add it to the model
if (!listElemUnique.isEmpty()) {
Iterator it = listElemUnique.iterator();
while (it.hasNext()) {
String elem = (String) it.next();
int index = elem.lastIndexOf(SEPARATOR);
String linktocategory = elem.substring(0, index);
String linktoid = elem.substring(index + 1);
//add the resource
Resource link = model.createResource()
.addProperty(RP.linkToCategory,
linktocategory).addProperty(RP.linkToId,
linktoid);
}
}
//update the calc_score of the node according to the average of the set
//Predicate for filtering the calc_score
Selector selScore = new SelectorImpl(null, RP.calcScore,
(RDFNode) null);
// Filtering
StmtIterator iterScore = model.listStatements(selScore);
boolean changeScore = false;
double indexscore = 0.00;
if (iterScore.hasNext()) {
//get the statement
Statement stmtScore = iterScore.nextStatement();
//get the node
Resource node = stmtScore.getSubject();
//get the direct score and calc_score
double calcscore = Double.valueOf(node.getProperty(
RP.calcScore).getObject().toString())
.doubleValue();
double categscore = Double.valueOf(node.getProperty(
RP.categoryScore).getObject().toString())
.doubleValue();
//update the calc_score according to the average and implied score
calcscore = calcscore + currentDoc.getAverage() +
(sumImpliedScore * userScore);
//update the calc_score in the model
logger.debug("Update the calc_score for model:" +
modelpath + " /Score:" + calcscore);
stmtScore.changeObject(calcscore);
//remove the model
File f = new File(modelpath);
f.delete();
//write the model
model.write(new FileOutputStream(modelpath));
model.close();
//set the new index score;
PluginManager.getIndexManager().reindex(currentDoc.getDocumentId(),
calcscore * categscore);
} else {
logger.warn("CalcScore is not defined in the model " +
modelpath);
}
} catch (FileNotFoundException e) {
logger.warn(
"Error in reading the model stored in the path " +
modelpath);
}
}
}
}
/**
* Update the direct score of the document using the categoryname,
* document id and user feedback score
*
* @param categoryname The category name
* @param documentid The document id
* @param userScore The user feedback score (+1/-1)
*/
public void updateDocumentDirectScore(String categoryname,
String documentid, int userScore) {
logger.debug("Update the document directscore from:" + categoryname +
"/" + documentid);
//get the model path for the document
String modelpath = getModelPath(categoryname, documentid);
Model model = ModelFactory.createDefaultModel();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(new FileInputStream(modelpath)),
"");
//Predicate for filtering direct score
Selector selector = new SelectorImpl(null, RP.directScore,
(RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
if (iter.hasNext()) {
//get the statement
Statement stmt = iter.nextStatement();
//get the node (for obtain the calc_score and categoryScore)
Resource node = stmt.getSubject();
double directscore = Double.valueOf(stmt.getObject()
.toString())
.doubleValue();
//get the calculated score of the node
double calcscore = Double.valueOf(node.getProperty(
RP.calcScore).getObject().toString())
.doubleValue();
double categScore = Double.valueOf(node.getProperty(
RP.categoryScore).getObject().toString())
.doubleValue();
//increment the direct score for the current statement
directscore = directscore + userScore;
stmt.changeObject(directscore);
//adjust the calc_score according to the user feedback
calcscore = calcscore + userScore;
Statement stmt2 = node.getProperty(RP.calcScore);
stmt2.changeObject( calcscore);
//remove the model
File f = new File(modelpath);
f.delete();
//write the model
model.write(new FileOutputStream(modelpath));
model.close();
//modified because the user is interested in the selected document
//then the score of the document will be increased (direct_score and calculated
//score also in the quick process) , then the calc_score will be adjusted
//using the implied score of the document related to the current document
PluginManager.getIndexManager().reindex(documentid, calcscore*categScore);
} else {
logger.warn("CalcScore is not defined in the model " +
modelpath);
}
} catch (FileNotFoundException e) {
logger.warn("Error in reading the model stored in the path " +
modelpath);
}
}
}
/**
* Update the category score of the document using the model path and user
* feedback score
*
* @param modelpath The model path
* @param userScore The user feedback score (+1/-1)
*/
private void updateModelCategoryScore(String modelpath, int userScore) {
logger.debug("Update the category score of the model " + modelpath);
Model model = ModelFactory.createDefaultModel();
synchronized (this) {
try {
//read the RDF/XML file
model.read(new InputStreamReader(new FileInputStream(modelpath)),
"");
//Predicate for filtering the category score
Selector selector = new SelectorImpl(null, RP.categoryScore,
(RDFNode) null);
// Filtering
StmtIterator iter = model.listStatements(selector);
if (iter.hasNext()) {
//get the statement
Statement stmt = iter.nextStatement();
//get the category score of the node
double newCategScore = Double.valueOf(stmt.getObject()
.toString())
.doubleValue() + userScore;
//get the node (for obtain the calc_score and categoryScore)
Resource node = stmt.getSubject();
//get the calculated score of the node
double calcscore = Double.valueOf(node.getProperty(
RP.calcScore).getObject().toString())
.doubleValue();
//get the document id
String documentid = node.getProperty(RP.documentID)
.getObject().toString();
//change the value of the current statement
stmt.changeObject(newCategScore);
//remove the model
File f = new File(modelpath);
f.delete();
//write the model
model.write(new FileOutputStream(modelpath));
model.close();
//update the document in the index to the new score
PluginManager.getIndexManager().reindex(documentid,
newCategScore * calcscore);
} else {
logger.warn("CategoryScore is not defined in the model " +
modelpath);
}
} catch (FileNotFoundException e) {
logger.warn("Error in reading the model stored in the path " +
modelpath);
}
}
}
/**
* Update the category score using the categoryname and user feedback score
*
* @param categoryname The category name
* @param userScore The user feedback score (+1/-1)
*/
public void updateCategoryScore(String categoryname, int userScore) {
logger.debug("Update the category score from:" + categoryname);
//get the path of the category
String path = Util.addFolder(PluginManager.getCategoryManager().getRoot(),
categoryname);
File categFolder = new File(path);
logger.debug("Process the folder " + categFolder.getPath());
//process all the document modules from the category folder
if (categFolder.isDirectory() && categFolder.exists()) {
File[] list = categFolder.listFiles();
for (int i = 0; i < list.length; i++) {
logger.debug("Process the model from " + list[i].getName() +
"/" + list[i].getPath());
//model path
String modelpath = list[i].getPath();
updateModelCategoryScore(modelpath, userScore);
}
}
}
/**
* Store the document data as rdf-xml.
* If the model exists , then rewrite the new model.
*
* @param doc Document to be stored
*
* @throws RpException If an error occur in storing the data
*/
public void store(DocumStruct doc) throws RpException {
//generate the folder using the specified name and store the original url
String categ_dir = PluginManager.getCategoryManager().getRoot();
//generate the category folder (in case that not exists)
String path = Util.addFolder(categ_dir, doc.getCategoryName());
File categ_file = new File(path);
//create the category name folder if not exists
if (!categ_file.exists()) {
categ_file.mkdirs();
}
synchronized (this) {
try {
//create a new model
Model model = ModelFactory.createDefaultModel();
model.setNsPrefix(RP.getPrefix(), RP.getURI());
//create the document resource
//Use the document path as resource name
Resource node = model.createResource(doc.getPath());
//set the node attributes :title,path,lastupdate,calcscore,origscore,id
node.addProperty(RP.title, doc.getTitle());
node.addProperty(RP.path, doc.getPath());
node.addProperty(RP.summary, doc.getDescription());
node.addProperty(RP.lastUpdate, doc.getLastUpdate());
node.addProperty(RP.directScore, doc.getDirectScore());
node.addProperty(RP.calcScore, doc.getCalcScore());
node.addProperty(RP.documentID, doc.getId());
//set the category attributes
node.addProperty(RP.categoryScore, doc.getCategoryScore());
node.addProperty(RP.categoryName, doc.getCategoryName());
node.addProperty(RP.categoryLocation, doc.getCategoryLocation());
//no-links for the node
//add the node content to the document
node.addProperty(RP.content,
generateResource(doc.getContent(), model));
//get the model path
String modelpath = getModelPath(doc.getCategoryName(),
doc.getId());
//validation of the file
File model_file = new File(modelpath);
//rewrite the model
if (model_file.exists()){
//delete the file
model_file.delete();
}
//write the new model
model.write(new FileOutputStream( model_file ));
model.close();
} catch (Throwable e) {
logger.warn("Error in storing the model in the system path " +
path, e);
throw new RpException("document.store.model.error",
new Object[] { path });
}
}
}
/**
* Generate the RDF-Resource for the content node
*
* @param root Node struct content
* @param model RDF-Model used for generation of the resources
*
* @return RDF-Resources for the specified node
*/
private Resource generateResource(NodeStruct root, Model model) {
Resource actual = model.createResource();
for (int i = 0; i < root.getTuples().size(); i++) {
TupleStruct tuple = (TupleStruct) root.getTuples().get(i);
if (TupleStruct.KEYWORD_NAME.equals(tuple.getKeyword())) {
actual.addProperty(RP.keyword, tuple.getValue());
} else if (TupleStruct.KEYWORD_GENERIC.equals(tuple.getKeyword())) {
actual.addProperty(RP.value, tuple.getValue());
} else {
if (! Util.isEmpty(tuple.getKeyword()))
actual.addProperty(RP.keyword, tuple.getKeyword());
if (! Util.isEmpty(tuple.getValue()))
actual.addProperty(RP.value, tuple.getValue());
}
}
Enumeration e = root.children();
while (e.hasMoreElements()){
NodeStruct node = (NodeStruct) e.nextElement();
Resource crt = generateResource(node, model);
actual.addProperty(RP.child, crt);
}
/*
for (int i = 0; i < root.getChild().size(); i++) {
NodeStruct node = (NodeStruct) root.getChilds().get(i);
Resource crt = generateResource(node, model);
actual.addProperty(RP.child, crt);
}
*/
return actual;
}
}