/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.search.node;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.sindice.siren.search.node.TwigQuery.TwigWeight;
/**
* A {@link NodeQuery} that matches a boolean combination of primitive queries, e.g.,
* {@link NodeTermQuery}s, {@link NodePhraseQuery}s, {@link NodeBooleanQuery}s,
* ...
*
* <p>
*
* Code taken from {@link BooleanQuery} and adapted for the Siren use case.
*/
public class NodeBooleanQuery extends NodeQuery {
private static int maxClauseCount = 1024;
/**
* Return the maximum number of clauses permitted, 1024 by default. Attempts
* to add more than the permitted number of clauses cause
* {@link TooManyClauses} to be thrown.
*
* @see #setMaxClauseCount(int)
*/
public static int getMaxClauseCount() {
return maxClauseCount;
}
/**
* Set the maximum number of clauses permitted per BooleanQuery. Default value
* is 1024.
*/
public static void setMaxClauseCount(final int maxClauseCount) {
if (maxClauseCount < 1)
throw new IllegalArgumentException("maxClauseCount must be >= 1");
NodeBooleanQuery.maxClauseCount = maxClauseCount;
}
protected ArrayList<NodeBooleanClause> clauses = new ArrayList<NodeBooleanClause>();
/** Constructs an empty boolean query. */
public NodeBooleanQuery() {}
@Override
public void setLevelConstraint(final int levelConstraint) {
super.setLevelConstraint(levelConstraint);
// keep clauses synchronised
for (final NodeBooleanClause clause : clauses) {
clause.getQuery().setLevelConstraint(levelConstraint);
}
}
@Override
public void setNodeConstraint(final int lowerBound, final int upperBound) {
super.setNodeConstraint(lowerBound, upperBound);
// keep clauses synchronised
for (final NodeBooleanClause clause : clauses) {
clause.getQuery().setNodeConstraint(lowerBound, upperBound);
}
}
@Override
protected void setAncestorPointer(final NodeQuery ancestor) {
super.setAncestorPointer(ancestor);
// keep clauses synchronised
for (final NodeBooleanClause clause : clauses) {
clause.getQuery().setAncestorPointer(ancestor);
}
}
/**
* Adds a clause to a boolean query.
*
* @throws TooManyClauses
* if the new number of clauses exceeds the maximum clause number
* @see #getMaxClauseCount()
*/
public void add(final NodeQuery query, final NodeBooleanClause.Occur occur) {
this.add(new NodeBooleanClause(query, occur));
}
/**
* Adds a clause to a boolean query.
*
* @throws TooManyClauses
* if the new number of clauses exceeds the maximum clause number
* @see #getMaxClauseCount()
*/
public void add(final NodeBooleanClause clause) {
if (clauses.size() >= maxClauseCount) {
throw new TooManyClauses();
}
clauses.add(clause);
// keep clause synchronised in term of constraint management
clause.getQuery().setLevelConstraint(levelConstraint);
clause.getQuery().setNodeConstraint(lowerBound, upperBound);
clause.getQuery().setAncestorPointer(ancestor);
}
/** Returns the set of clauses in this query. */
public NodeBooleanClause[] getClauses() {
return clauses.toArray(new NodeBooleanClause[clauses.size()]);
}
/** Returns the list of clauses in this query. */
public List<NodeBooleanClause> clauses() {
return clauses;
}
/**
* Returns an iterator on the clauses in this query. It implements the
* {@link Iterable} interface to make it possible to do:
* <pre>for (SirenBooleanClause clause : booleanQuery) {}</pre>
*/
public final Iterator<NodeBooleanClause> iterator() {
return this.clauses().iterator();
}
/**
* Expert: An abstract weight for queries with node boolean clauses.
* <p>
* This abstract class enables to use the {@link NodeBooleanScorer} by many
* different query implementation.
* <p>
* It is subclassed by {@link NodeBooleanWeight}, {@link TwigWeight}.
*/
static abstract class AbstractNodeBooleanWeight extends Weight {
/** The Similarity implementation. */
protected Similarity similarity;
protected ArrayList<Weight> weights;
public AbstractNodeBooleanWeight(final IndexSearcher searcher)
throws IOException {
this.similarity = searcher.getSimilarity();
this.initWeights(searcher);
}
protected abstract void initWeights(final IndexSearcher searcher) throws IOException;
public float coord(final int overlap, final int maxOverlap) {
return similarity.coord(overlap, maxOverlap);
}
}
/**
* Expert: the Weight for {@link NodeBooleanQuery}, used to
* normalize, score and explain these queries.
*/
public class NodeBooleanWeight extends AbstractNodeBooleanWeight {
public NodeBooleanWeight(final IndexSearcher searcher)
throws IOException {
super(searcher);
}
@Override
protected void initWeights(final IndexSearcher searcher) throws IOException {
weights = new ArrayList<Weight>(clauses.size());
for (int i = 0; i < clauses.size(); i++) {
final NodeBooleanClause c = clauses.get(i);
final NodeQuery q = c.getQuery();
// pass to child query the node contraints
q.setNodeConstraint(lowerBound, upperBound);
q.setLevelConstraint(levelConstraint);
// transfer ancestor pointer to child
q.setAncestorPointer(ancestor);
weights.add(q.createWeight(searcher));
}
}
@Override
public String toString() {
return "weight(" + NodeBooleanQuery.this + ")";
}
@Override
public Query getQuery() {
return NodeBooleanQuery.this;
}
@Override
public float getValueForNormalization()
throws IOException {
float sum = 0.0f;
for (int i = 0; i < weights.size(); i++) {
// call sumOfSquaredWeights for all clauses in case of side effects
final float s = weights.get(i).getValueForNormalization(); // sum sub weights
if (!clauses.get(i).isProhibited()) {
// only add to sum for non-prohibited clauses
sum += s;
}
}
// boost each sub-weight
sum *= NodeBooleanQuery.this.getBoost() * NodeBooleanQuery.this.getBoost();
return sum;
}
@Override
public void normalize(final float norm, float topLevelBoost) {
// incorporate boost
topLevelBoost *= NodeBooleanQuery.this.getBoost();
for (final Weight w : weights) {
// normalize all clauses, (even if prohibited in case of side affects)
w.normalize(norm, topLevelBoost);
}
}
@Override
public Explanation explain(final AtomicReaderContext context, final int doc)
throws IOException {
final ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
int coord = 0;
float sum = 0.0f;
boolean fail = false;
final Iterator<NodeBooleanClause> cIter = clauses.iterator();
for (final Weight w : weights) {
final NodeBooleanClause c = cIter.next();
if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) {
if (c.isRequired()) {
fail = true;
final Explanation r = new Explanation(0.0f, "no match on required " +
"clause (" + c.getQuery().toString() + ")");
sumExpl.addDetail(r);
}
continue;
}
final Explanation e = w.explain(context, doc);
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
sum += e.getValue();
coord++;
}
else {
final Explanation r =
new Explanation(0.0f, "match on prohibited clause (" +
c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
}
else if (c.isRequired()) {
final Explanation r = new Explanation(0.0f, "no match on required " +
"clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
}
if (fail) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription
("Failure to meet condition(s) of required/prohibited clause(s)");
return sumExpl;
}
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);
return sumExpl;
}
@Override
public Scorer scorer(final AtomicReaderContext context,
final boolean scoreDocsInOrder,
final boolean topScorer, final Bits acceptDocs)
throws IOException {
final List<NodeScorer> required = new ArrayList<NodeScorer>();
final List<NodeScorer> prohibited = new ArrayList<NodeScorer>();
final List<NodeScorer> optional = new ArrayList<NodeScorer>();
final Iterator<NodeBooleanClause> cIter = clauses.iterator();
for (final Weight w : weights) {
final NodeBooleanClause c = cIter.next();
final NodeScorer subScorer = (NodeScorer) w.scorer(context, true, false, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
}
} else if (c.isRequired()) {
required.add(subScorer);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
optional.add(subScorer);
}
}
if (required.size() == 0 && optional.size() == 0) {
// no required and optional clauses.
return null;
}
return new NodeBooleanScorer(this, required, prohibited, optional);
}
}
@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
return new NodeBooleanWeight(searcher);
}
@Override
public Query rewrite(final IndexReader reader) throws IOException {
if (clauses.size() == 1) { // optimize 1-clause queries
final NodeBooleanClause c = clauses.get(0);
if (!c.isProhibited()) { // just return clause
// rewrite first
NodeQuery query = (NodeQuery) c.getQuery().rewrite(reader);
if (this.getBoost() != 1.0f) { // incorporate boost
if (query == c.getQuery()) { // if rewrite was no-op
query = (NodeQuery) query.clone(); // then clone before boost
}
query.setBoost(this.getBoost() * query.getBoost());
}
// transfer constraints
query.setNodeConstraint(lowerBound, upperBound);
query.setLevelConstraint(levelConstraint);
// transfer ancestor pointer
query.setAncestorPointer(ancestor);
return query;
}
}
NodeBooleanQuery clone = null; // recursively rewrite
for (int i = 0 ; i < clauses.size(); i++) {
final NodeBooleanClause c = clauses.get(i);
final NodeQuery query = (NodeQuery) c.getQuery().rewrite(reader);
if (query != c.getQuery()) { // clause rewrote: must clone
if (clone == null) {
clone = (NodeBooleanQuery) this.clone();
}
// transfer constraints
query.setNodeConstraint(lowerBound, upperBound);
query.setLevelConstraint(levelConstraint);
// transfer ancestor pointer
query.setAncestorPointer(ancestor);
clone.clauses.set(i, new NodeBooleanClause(query, c.getOccur()));
}
}
if (clone != null) {
return clone; // some clauses rewrote
}
else {
return this; // no clauses rewrote
}
}
@Override
public void extractTerms(final Set<Term> terms) {
for (final NodeBooleanClause clause : clauses) {
clause.getQuery().extractTerms(terms);
}
}
@Override @SuppressWarnings("unchecked")
public Query clone() {
final NodeBooleanQuery clone = (NodeBooleanQuery) super.clone();
clone.clauses = (ArrayList<NodeBooleanClause>) this.clauses.clone();
return clone;
}
/** Prints a user-readable version of this query. */
@Override
public String toString(final String field) {
final StringBuffer buffer = new StringBuffer();
final boolean hasBoost = (this.getBoost() != 1.0);
if (hasBoost) {
buffer.append("(");
}
for (int i = 0; i < clauses.size(); i++) {
final NodeBooleanClause c = clauses.get(i);
if (c.isProhibited())
buffer.append("-");
else if (c.isRequired()) buffer.append("+");
final Query subQuery = c.getQuery();
if (subQuery != null) {
if (subQuery instanceof NodeBooleanQuery) { // wrap sub-bools in parens
buffer.append("(");
buffer.append(subQuery.toString(field));
buffer.append(")");
}
else {
buffer.append(subQuery.toString(field));
}
}
if (i != clauses.size() - 1) buffer.append(" ");
}
if (hasBoost) {
buffer.append(")").append(ToStringUtils.boost(this.getBoost()));
}
return buffer.toString();
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(final Object o) {
if (!(o instanceof NodeBooleanQuery)) return false;
final NodeBooleanQuery other = (NodeBooleanQuery) o;
return (this.getBoost() == other.getBoost()) &&
this.clauses.equals(other.clauses) &&
this.levelConstraint == other.levelConstraint &&
this.lowerBound == other.lowerBound &&
this.upperBound == other.upperBound;
}
/** Returns a hash code value for this object. */
@Override
public int hashCode() {
return Float.floatToIntBits(this.getBoost())
^ clauses.hashCode()
^ levelConstraint
^ upperBound
^ lowerBound;
}
/**
* Thrown when an attempt is made to add more than {@link
* #getMaxClauseCount()} clauses. This typically happens if
* a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery
* is expanded to many terms during search.
*/
public static class TooManyClauses extends RuntimeException {
public TooManyClauses() {
super("maxClauseCount is set to " + maxClauseCount);
}
}
}