/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.util.Version;
/**
* <code>JackrabbitQueryParser</code> extends the standard lucene query parser
* and adds JCR specific customizations.
*/
public class JackrabbitQueryParser extends QueryParser {
/**
* The Jackrabbit synonym provider or <code>null</code> if there is none.
*/
private final SynonymProvider synonymProvider;
private final PerQueryCache cache;
/**
* Creates a new query parser instance.
*
* @param fieldName the field name.
* @param analyzer the analyzer.
* @param synonymProvider the synonym provider or <code>null</code> if none
* is available.
*/
public JackrabbitQueryParser(String fieldName,
Analyzer analyzer,
SynonymProvider synonymProvider,
PerQueryCache cache) {
super(Version.LUCENE_24, fieldName, analyzer);
this.synonymProvider = synonymProvider;
this.cache = cache;
setAllowLeadingWildcard(true);
setDefaultOperator(Operator.AND);
}
/**
* {@inheritDoc}
*/
public Query parse(String textsearch) throws ParseException {
// replace escaped ' with just '
StringBuffer rewritten = new StringBuffer();
// the default lucene query parser recognizes 'AND' and 'NOT' as
// keywords.
textsearch = textsearch.replaceAll("AND", "and");
textsearch = textsearch.replaceAll("NOT", "not");
boolean escaped = false;
for (int i = 0; i < textsearch.length(); i++) {
if (textsearch.charAt(i) == '\\') {
if (escaped) {
rewritten.append("\\\\");
escaped = false;
} else {
escaped = true;
}
} else if (textsearch.charAt(i) == '\'') {
if (escaped) {
escaped = false;
}
rewritten.append(textsearch.charAt(i));
} else if (textsearch.charAt(i) == '~') {
if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) {
// escape tilde so we can use it for similarity query
rewritten.append("\\");
}
rewritten.append('~');
} else if (textsearch.charAt(i) == ':') {
// fields as known in lucene are not supported
rewritten.append("\\:");
} else {
if (escaped) {
rewritten.append('\\');
escaped = false;
}
rewritten.append(textsearch.charAt(i));
}
}
return super.parse(rewritten.toString());
}
/**
* Factory method for generating a synonym query.
* Called when parser parses an input term token that has the synonym
* prefix (~term) prepended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getSynonymQuery(String field, String termStr)
throws ParseException {
List<BooleanClause> synonyms = new ArrayList<BooleanClause>();
synonyms.add(new BooleanClause(getFieldQuery(field, termStr),
BooleanClause.Occur.SHOULD));
if (synonymProvider != null) {
for (String term : synonymProvider.getSynonyms(termStr)) {
synonyms.add(new BooleanClause(getFieldQuery(field, term), BooleanClause.Occur.SHOULD));
}
}
if (synonyms.size() == 1) {
return synonyms.get(0).getQuery();
} else {
return getBooleanQuery(synonyms);
}
}
/**
* {@inheritDoc}
*/
protected Query getFieldQuery(String field, String queryText)
throws ParseException {
if (queryText.startsWith("~")) {
// synonym query
return getSynonymQuery(field, queryText.substring(1));
} else {
return super.getFieldQuery(field, queryText);
}
}
/**
* {@inheritDoc}
*/
protected Query getPrefixQuery(String field, String termStr)
throws ParseException {
// only create a prefix query when the term is a single word / token
Analyzer a = getAnalyzer();
TokenStream ts = a.tokenStream(field, new StringReader(termStr));
int count = 0;
boolean isCJ = false;
try {
TypeAttribute t = ts.addAttribute(TypeAttribute.class);
ts.reset();
while (ts.incrementToken()) {
count++;
isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
}
ts.end();
} catch (IOException e) {
throw new ParseException(e.getMessage());
} finally {
try {
ts.close();
} catch (IOException e) {
// ignore
}
}
if (count > 1 && isCJ) {
return getFieldQuery(field, termStr);
} else {
return getWildcardQuery(field, termStr + "*");
}
}
/**
* {@inheritDoc}
*/
protected Query getWildcardQuery(String field, String termStr)
throws ParseException {
if (getLowercaseExpandedTerms()) {
termStr = termStr.toLowerCase();
}
return new WildcardQuery(field, null, translateWildcards(termStr), cache);
}
/**
* Translates unescaped wildcards '*' and '?' into '%' and '_'.
*
* @param input the input String.
* @return the translated String.
*/
private String translateWildcards(String input) {
StringBuffer translated = new StringBuffer(input.length());
boolean escaped = false;
for (int i = 0; i < input.length(); i++) {
if (input.charAt(i) == '\\') {
if (escaped) {
translated.append("\\\\");
escaped = false;
} else {
escaped = true;
}
} else if (input.charAt(i) == '*') {
if (escaped) {
translated.append('*');
escaped = false;
} else {
translated.append('%');
}
} else if (input.charAt(i) == '?') {
if (escaped) {
translated.append('?');
escaped = false;
} else {
translated.append('_');
}
} else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
// escape every occurrence of '%' and '_'
escaped = false;
translated.append('\\').append(input.charAt(i));
} else {
if (escaped) {
translated.append('\\');
escaped = false;
}
translated.append(input.charAt(i));
}
}
return translated.toString();
}
}