/*
* JBoss, Home of Professional Open Source
*
* Distributable under LGPL license.
* See terms of license at gnu.org.
*/
package org.jboss.seam.wiki.core.search.metamodel;
import org.jboss.seam.annotations.Observer;
import org.jboss.seam.annotations.Scope;
import org.jboss.seam.ScopeType;
import org.jboss.seam.wiki.util.WikiUtil;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.util.Set;
import java.io.StringReader;
/**
* Superclass for search support, extend it to add search options to the wiki.
* <p>
* Extend this class and return <tt>SearchableEntityHandler</tt> instances for each
* entity you want to be able to search in the user interface. The handlers need to
* be able to extract a <tt>SearchHit</tt> from a given query and the original entity
* instance. This <tt>SearchHit</tt> is then displayed. If you have a string-based
* property and you want to simply show the "best" fragments of a hit, use the
* <tt>escapeBestFragments()</tt> convenience method.
* <p>
* Note that you also need to annotate any entity class and its properties
* with <tt>@Searchable</tt>.
*
* @see org.jboss.seam.wiki.core.search.annotations.Searchable
* @see SearchableEntityHandler
* @see org.jboss.seam.wiki.core.search.SearchHit
*
* @author Christian Bauer
*/
@Scope(ScopeType.APPLICATION)
public abstract class SearchSupport {
private static final String INTERNAL_BEGIN_HIT = "!!!BEGIN_HIT!!!";
private static final String INTERNAL_END_HIT = "!!!END_HIT!!!";
@Observer("Search.addSearchSupport")
public void add(Set<SearchSupport> searchSupportComponents) {
searchSupportComponents.add(this);
}
/**
* Returns the hits of the given query as fragments, highlighted, concatenated, and separated.
* <p>
* Pass in a <tt>NullFragmenter</tt> if you don't want any fragmentation by terms but
* simply the hits highlighted. Otherwise, you will most likely use <tt>SimpleFragmenter</tt>.
* The text you supply must be the same that was indexed, it will go through the same
* analysis procedure to find the hits. Do not pass a different String than the one indexed
* by Hibernate Search! If you use transparent string bridge with Hibernate Search, run the
* bridge before passing the string into this method.
* <p>
* This method escapes any dangerous HTML characters in the indexed text and fragments by
* replacing it with HTML entities. You can use the returned string directly to build a
* <tt>SearchHit</tt>.
*
* @param query the query that produced hits
* @param fragmenter a fragmenter that can split the indexed text
* @param indexedText the original text that was analyzed and indexed by Hibernate Search (after any bridges!)
* @param numOfFragments the number of fragments to include in the returned result
* @param alternativeLength if there are no hits to highlight, how many characters of the original text to return
* @return the fragmented, highglighted, and then concatenated substring of the indexed text
*/
protected String escapeBestFragments(Query query, Fragmenter fragmenter,
String indexedText, int numOfFragments, int alternativeLength) {
// The HTML escaping forces us to first fragment with internal placeholders...
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(INTERNAL_BEGIN_HIT, INTERNAL_END_HIT), new QueryScorer(query));
highlighter.setTextFragmenter(fragmenter);
try {
// Use the same analyzer as the indexer!
TokenStream tokenStream = new StandardAnalyzer().tokenStream(null, new StringReader(indexedText));
String unescapedFragements =
highlighter.getBestFragments(tokenStream, indexedText, numOfFragments, getFragmentSeparator());
String escapedFragments = WikiUtil.escapeHtml(WikiUtil.removeMacros(unescapedFragements), false, false);
// .. and then replace the internal placeholders with real tags after HTML has been escaped
escapedFragments = escapedFragments.replaceAll(INTERNAL_BEGIN_HIT, getBeginHitTag());
escapedFragments = escapedFragments.replaceAll(INTERNAL_END_HIT, getEndHitTag());
// Strip out macros
// If no fragments were produced (no hits), return the original text as an alternative
if (escapedFragments.length() == 0 && alternativeLength != 0) {
return WikiUtil.escapeHtml(
WikiUtil.removeMacros(
indexedText.substring(
0,
indexedText.length()>alternativeLength ? alternativeLength : indexedText.length()
)
), false, false
);
} else if (escapedFragments.length() == 0 && alternativeLength == 0){
return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText), false, false);
}
return escapedFragments;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
/**
* String used to mark the beginning of a fragment.
* <p>
* Defaults to <:b>, can be overriden by subclass.
*
* @return String used to mark the beginning of a fragment.
*/
protected String getBeginHitTag() {
return "<b>";
}
/**
* String used to mark the end of a fragment.
* <p>
* Defaults to <:/b>, can be overriden by subclass.
*
* @return String used to mark the end of a fragment.
*/
protected String getEndHitTag() {
return "</b>";
}
/**
* Separator string between two fragments.
* <p>
* Defaults to <tt>... ...</tt> (just dots with a space).
*
* @return Separator string between two fragments.
*/
protected String getFragmentSeparator() {
return "... ...";
}
/**
* Create and return any <tt>SearchableEntityHandler</tt> you require search functionality for.
* <p>
* This is called on startup only by the internal registry, to assemble all handlers.
*
* @return SearchableEntityHandler typed for a particular indexed/searchable entity class
*/
public abstract Set<SearchableEntityHandler> getSearchableEntityHandlers();
}