Package org.apache.solr.search

Source Code of org.apache.solr.search.ExtendedAnalyzer

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* This parser was originally derived from DismaxQParser from Solr.
* All changes are Copyright 2008, Lucid Imagination, Inc.
*/

package org.apache.solr.search;

import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.DefaultSolrParams;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.function.BoostedQuery;
import org.apache.solr.search.function.FunctionQuery;
import org.apache.solr.search.function.ProductFloatFunction;
import org.apache.solr.search.function.QueryValueSource;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.analysis.*;

import java.util.*;
import java.io.Reader;
import java.io.IOException;

/**
* An advanced multi-field query parser.
* @lucene.experimental
*/
public class ExtendedDismaxQParserPlugin extends QParserPlugin {
  public static final String NAME = "edismax";

  public void init(NamedList args) {
  }

  @Override
  public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
    return new ExtendedDismaxQParser(qstr, localParams, params, req);
  }
}


class ExtendedDismaxQParser extends QParser {

  /**
   * A field we can't ever find in any schema, so we can safely tell
   * DisjunctionMaxQueryParser to use it as our defaultField, and
   * map aliases from it to any field in our schema.
   */
  private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";

  /** shorten the class references for utilities */
  private static class U extends SolrPluginUtils {
    /* :NOOP */
  }

  /** shorten the class references for utilities */
  private static interface DMP extends DisMaxParams {
    /* :NOOP */
  }


  public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
    super(qstr, localParams, params, req);
  }

  Map<String,Float> queryFields;
  Query parsedUserQuery;


  private String[] boostParams;
  private String[] multBoosts;
  private List<Query> boostQueries;
  private Query altUserQuery;
  private QParser altQParser;


  @Override
  public Query parse() throws ParseException {
    SolrParams localParams = getLocalParams();
    SolrParams params = getParams();
   
    SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params);

    queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF));
    if (0 == queryFields.size()) {
      queryFields.put(req.getSchema().getDefaultSearchFieldName(), 1.0f);
    }
   
    // Boosted phrase of the full query string
    Map<String,Float> phraseFields =
      U.parseFieldBoosts(solrParams.getParams(DMP.PF));
    // Boosted Bi-Term Shingles from the query string
    Map<String,Float> phraseFields2 =
      U.parseFieldBoosts(solrParams.getParams("pf2"));
    // Boosted Tri-Term Shingles from the query string
    Map<String,Float> phraseFields3 =
      U.parseFieldBoosts(solrParams.getParams("pf3"));

    float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f);

    int pslop = solrParams.getInt(DMP.PS, 0);
    int qslop = solrParams.getInt(DMP.QS, 0);

    // remove stopwords from mandatory "matching" component?
    boolean stopwords = solrParams.getBool("stopwords", true);

    /* the main query we will execute.  we disable the coord because
     * this query is an artificial construct
     */
    BooleanQuery query = new BooleanQuery(true);

    /* * * Main User Query * * */
    parsedUserQuery = null;
    String userQuery = getString();
    altUserQuery = null;
    if( userQuery == null || userQuery.length() < 1 ) {
      // If no query is specified, we may have an alternate
      String altQ = solrParams.get( DMP.ALTQ );
      if (altQ != null) {
        altQParser = subQuery(altQ, null);
        altUserQuery = altQParser.getQuery();
        query.add( altUserQuery , BooleanClause.Occur.MUST );
      } else {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "missing query string" );
      }
    }
    else {    
      // There is a valid query string
      // userQuery = partialEscape(U.stripUnbalancedQuotes(userQuery)).toString();

      boolean lowercaseOperators = solrParams.getBool("lowercaseOperators", true);
      String mainUserQuery = userQuery;

      ExtendedSolrQueryParser up =
        new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
      up.addAlias(IMPOSSIBLE_FIELD_NAME,
                tiebreaker, queryFields);
      up.setPhraseSlop(qslop);     // slop for explicit user phrase queries
      up.setAllowLeadingWildcard(true);

      // defer escaping and only do if lucene parsing fails, or we need phrases
      // parsing fails.  Need to sloppy phrase queries anyway though.
      List<Clause> clauses = null;
      boolean specialSyntax = false;
      int numPluses = 0;
      int numMinuses = 0;
      int numOptional = 0;
      int numAND = 0;
      int numOR = 0;
      int numNOT = 0;
      boolean sawLowerAnd=false;
      boolean sawLowerOr=false;

      clauses = splitIntoClauses(userQuery, false);
      for (Clause clause : clauses) {
        if (!clause.isPhrase && clause.hasSpecialSyntax) {
          specialSyntax = true;
        }
        if (clause.must == '+') numPluses++;
        if (clause.must == '-') numMinuses++;
        if (clause.isBareWord()) {
          String s = clause.val;
          if ("AND".equals(s)) {
            numAND++;
          } else if ("OR".equals(s)) {
            numOR++;
          } else if ("NOT".equals(s)) {
            numNOT++;
          } else if (lowercaseOperators) {
            if ("and".equals(s)) {
              numAND++;
              sawLowerAnd=true;
            } else if ("or".equals(s)) {
              numOR++;
              sawLowerOr=true;
            }
          }
        }
      }
      numOptional = clauses.size() - (numPluses + numMinuses);

      // convert lower or mixed case operators to uppercase if we saw them.
      // only do this for the lucene query part and not for phrase query boosting
      // since some fields might not be case insensitive.
      // We don't use a regex for this because it might change and AND or OR in
      // a phrase query in a case sensitive field.
      if (sawLowerAnd || sawLowerOr) {
        StringBuilder sb = new StringBuilder();
        for (int i=0; i<clauses.size(); i++) {
          Clause clause = clauses.get(i);
          String s = clause.raw;
          // and and or won't be operators at the start or end
          if (i>0 && i+1<clauses.size()) {
            if ("AND".equalsIgnoreCase(s)) {
              s="AND";
            } else if ("OR".equalsIgnoreCase(s)) {
              s="OR";
            }
          }
          sb.append(s);
          sb.append(' ');
        }

        mainUserQuery = sb.toString();
      }

      // For correct lucene queries, turn off mm processing if there
      // were explicit operators (except for AND).
      boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;

      try {
        up.setRemoveStopFilter(!stopwords);
        up.exceptions = true;
        parsedUserQuery = up.parse(mainUserQuery);

        if (stopwords && isEmpty(parsedUserQuery)) {
         // if the query was all stop words, remove none of them
          up.setRemoveStopFilter(true);
          parsedUserQuery = up.parse(mainUserQuery);         
        }
      } catch (Exception e) {
        // ignore failure and reparse later after escaping reserved chars
        up.exceptions = false;
      }

      if (parsedUserQuery != null && doMinMatched) {
        String minShouldMatch = solrParams.get(DMP.MM, "100%");
        if (parsedUserQuery instanceof BooleanQuery) {
          U.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch);
        }
      }


      if (parsedUserQuery == null) {
        StringBuilder sb = new StringBuilder();
        for (Clause clause : clauses) {

          boolean doQuote = clause.isPhrase;

          String s=clause.val;
          if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
            doQuote=true;
          }

          if (clause.must != 0) {
            sb.append(clause.must);
          }
          if (clause.field != null) {
            sb.append(clause.field);
            sb.append(':');
          }
          if (doQuote) {
            sb.append('"');
          }
          sb.append(clause.val);
          if (doQuote) {
            sb.append('"');
          }
          sb.append(' ');
        }
        String escapedUserQuery = sb.toString();
        parsedUserQuery = up.parse(escapedUserQuery);

        // Only do minimum-match logic
        String minShouldMatch = solrParams.get(DMP.MM, "100%");

        if (parsedUserQuery instanceof BooleanQuery) {
          BooleanQuery t = new BooleanQuery();
          U.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery);
          U.setMinShouldMatch(t, minShouldMatch);
          parsedUserQuery = t;
        }
      }

      query.add(parsedUserQuery, BooleanClause.Occur.MUST);

      // sloppy phrase queries for proximity
      if (phraseFields.size() > 0 ||
          phraseFields2.size() > 0 ||
          phraseFields3.size() > 0) {
       
        // find non-field clauses
        List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
        for (Clause clause : clauses) {
          if (clause.field != null || clause.isPhrase) continue;
          // check for keywords "AND,OR,TO"
          if (clause.isBareWord()) {
            String s = clause.val.toString();
            // avoid putting explict operators in the phrase query
            if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
          }
          normalClauses.add(clause);
        }

        // full phrase...
        addShingledPhraseQueries(query, normalClauses, phraseFields, 0,
                                 tiebreaker, pslop);
        // shingles...
        addShingledPhraseQueries(query, normalClauses, phraseFields2, 2
                                 tiebreaker, pslop);
        addShingledPhraseQueries(query, normalClauses, phraseFields3, 3,
                                 tiebreaker, pslop);
       
      }
    }



    /* * * Boosting Query * * */
    boostParams = solrParams.getParams(DMP.BQ);
    //List<Query> boostQueries = U.parseQueryStrings(req, boostParams);
    boostQueries=null;
    if (boostParams!=null && boostParams.length>0) {
      boostQueries = new ArrayList<Query>();
      for (String qs : boostParams) {
        if (qs.trim().length()==0) continue;
        Query q = subQuery(qs, null).getQuery();
        boostQueries.add(q);
      }
    }
    if (null != boostQueries) {
      for(Query f : boostQueries) {
        query.add(f, BooleanClause.Occur.SHOULD);
      }
    }

    /* * * Boosting Functions * * */

    String[] boostFuncs = solrParams.getParams(DMP.BF);
    if (null != boostFuncs && 0 != boostFuncs.length) {
      for (String boostFunc : boostFuncs) {
        if(null == boostFunc || "".equals(boostFunc)) continue;
        Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
        for (String f : ff.keySet()) {
          Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery();
          Float b = ff.get(f);
          if (null != b) {
            fq.setBoost(b);
          }
          query.add(fq, BooleanClause.Occur.SHOULD);
        }
      }
    }


    //
    // create a boosted query (scores multiplied by boosts)
    //
    Query topQuery = query;
    multBoosts = solrParams.getParams("boost");
    if (multBoosts!=null && multBoosts.length>0) {

      List<ValueSource> boosts = new ArrayList<ValueSource>();
      for (String boostStr : multBoosts) {
        if (boostStr==null || boostStr.length()==0) continue;
        Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
        ValueSource vs;
        if (boost instanceof FunctionQuery) {
          vs = ((FunctionQuery)boost).getValueSource();
        } else {
          vs = new QueryValueSource(boost, 1.0f);
        }
        boosts.add(vs);
      }

      if (boosts.size()>1) {
        ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
        topQuery = new BoostedQuery(query, prod);
      } else if (boosts.size() == 1) {
        topQuery = new BoostedQuery(query, boosts.get(0));
      }
    }

    return topQuery;
  }

  /**
   * Modifies the main query by adding a new optional Query consisting
   * of shingled phrase queries across the specified clauses using the
   * specified field =&gt; boost mappings.
   *
   * @param mainQuery Where the phrase boosting queries will be added
   * @param clauses Clauses that will be used to construct the phrases
   * @param fields Field =&gt; boost mappings for the phrase queries
   * @param shingleSize how big the phrases should be, 0 means a single phrase
   * @param tiebreaker tie breker value for the DisjunctionMaxQueries
   * @param slop slop value for the constructed phrases
   */
  private void addShingledPhraseQueries(final BooleanQuery mainQuery,
                                        final List<Clause> clauses,
                                        final Map<String,Float> fields,
                                        int shingleSize,
                                        final float tiebreaker,
                                        final int slop)
    throws ParseException {
   
    if (null == fields || fields.isEmpty() ||
        null == clauses || clauses.size() <= shingleSize )
      return;
   
    if (0 == shingleSize) shingleSize = clauses.size();

    final int goat = shingleSize-1; // :TODO: better name for var?

    StringBuilder userPhraseQuery = new StringBuilder();
      for (int i=0; i < clauses.size() - goat; i++) {
        userPhraseQuery.append('"');
        for (int j=0; j <= goat; j++) {
          userPhraseQuery.append(clauses.get(i + j).val);
          userPhraseQuery.append(' ');
        }
        userPhraseQuery.append('"');
        userPhraseQuery.append(' ');
      }

      /* for parsing sloppy phrases using DisjunctionMaxQueries */
      ExtendedSolrQueryParser pp =
        new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);

      pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, fields);
      pp.setPhraseSlop(slop);
      pp.setRemoveStopFilter(true)// remove stop filter and keep stopwords

      /* :TODO: reevaluate using makeDismax=true vs false...
       *
       * The DismaxQueryParser always used DisjunctionMaxQueries for the
       * pf boost, for the same reasons it used them for the qf fields.
       * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added
       * the "makeDismax=false" property to use BooleanQueries instead, but
       * when asked why his response was "I honestly don't recall" ...
       *
       * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813
       *
       * so for now, we continue to use dismax style queries becuse it
       * seems the most logical and is back compatible, but we should
       * try to figure out what Yonik was thinking at the time (because he
       * rarely does things for no reason)
       */
      pp.makeDismax = true;


      // minClauseSize is independent of the shingleSize because of stop words
      // (if they are removed from the middle, so be it, but we need at least
      // two or there shouldn't be a boost)
      pp.minClauseSize = 2
     
      // TODO: perhaps we shouldn't use synonyms either...

      Query phrase = pp.parse(userPhraseQuery.toString());
      if (phrase != null) {
        mainQuery.add(phrase, BooleanClause.Occur.SHOULD);
      }
  }


  @Override
  public String[] getDefaultHighlightFields() {
    String[] highFields = queryFields.keySet().toArray(new String[0]);
    return highFields;
  }

  @Override
  public Query getHighlightQuery() throws ParseException {
    return parsedUserQuery;
  }

  @Override
  public void addDebugInfo(NamedList<Object> debugInfo) {
    super.addDebugInfo(debugInfo);
    debugInfo.add("altquerystring", altUserQuery);
    if (null != boostQueries) {
      debugInfo.add("boost_queries", boostParams);
      debugInfo.add("parsed_boost_queries",
                QueryParsing.toString(boostQueries, getReq().getSchema()));
    }
    debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
  }


 
  public static CharSequence partialEscape(CharSequence s) {
    StringBuilder sb = new StringBuilder();

    int len = s.length();
    for (int i = 0; i < len; i++) {
      char c = s.charAt(i);
      if (c == ':') {
        // look forward to make sure it's something that won't
        // cause a parse exception (something that won't be escaped... like
        // +,-,:, whitespace
        if (i+1<len && i>0) {
          char ch = s.charAt(i+1);
          if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
            // OK, at this point the chars after the ':' will be fine.
            // now look back and try to determine if this is a fieldname
            // [+,-]? [letter,_] [letter digit,_,-,.]*
            // This won't cover *all* possible lucene fieldnames, but we should
            // only pick nice names to begin with
            int start, pos;
            for (start=i-1; start>=0; start--) {
              ch = s.charAt(start);
              if (Character.isWhitespace(ch)) break;
            }

            // skip whitespace
            pos = start+1;

            // skip leading + or -
            ch = s.charAt(pos);
            if (ch=='+' || ch=='-') {
              pos++;
            }

            // we don't need to explicitly check for end of string
            // since ':' will act as our sentinal

              // first char can't be '-' or '.'
              ch = s.charAt(pos++);
              if (Character.isJavaIdentifierPart(ch)) {

                for(;;) {
                  ch = s.charAt(pos++);
                  if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
                    break;
                  }
                }

                if (pos<=i) {
                  // OK, we got to the ':' and everything looked like a valid fieldname, so
                  // don't escape the ':'
                  sb.append(':');
                  continue// jump back to start of outer-most loop
                }

              }


          }
        }

        // we fell through to here, so we should escape this like other reserved chars.
        sb.append('\\');
      }
      else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
          c == '^' || c == '[' || c == ']' ||
          c == '{'  || c == '}' || c == '~' || c == '*' || c == '?'
          )
      {
        sb.append('\\');
      }
      sb.append(c);
    }
    return sb;
  }


  static class Clause {

    boolean isBareWord() {
      return must==0 && !isPhrase;
    }

    String field;
    boolean isPhrase;
    boolean hasWhitespace;
    boolean hasSpecialSyntax;
    boolean syntaxError;
    char must;   // + or -
    String val;  // the field value (minus the field name, +/-, quotes)
    String raw;  // the raw clause w/o leading/trailing whitespace
  }

 
  public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) {
    ArrayList<Clause> lst = new ArrayList<Clause>(4);
    Clause clause = new Clause();

    int pos=0;
    int end=s.length();
    char ch=0;
    int start;
    outer: while (pos < end) {
      ch = s.charAt(pos);

      while (Character.isWhitespace(ch)) {
        if (++pos >= end) break;
        ch = s.charAt(pos);
      }

      start = pos;     

      if (ch=='+' || ch=='-') {
        clause.must = ch;
        pos++;
      }

      clause.field = getFieldName(s, pos, end);
      if (clause.field != null) {
        pos += clause.field.length(); // skip the field name
        pos++;  // skip the ':'
      }

      if (pos>=end) break;


      char inString=0;

      ch = s.charAt(pos);
      if (!ignoreQuote && ch=='"') {
        clause.isPhrase = true;
        inString = '"';
        pos++;
      }

      StringBuilder sb = new StringBuilder();
      while (pos < end) {
        ch = s.charAt(pos++);
        if (ch=='\\') {    // skip escaped chars, but leave escaped
          sb.append(ch);
          if (pos >= end) {
            sb.append(ch); // double backslash if we are at the end of the string
            break;
          }
          ch = s.charAt(pos++);
          sb.append(ch);
          continue;
        } else if (inString != 0 && ch == inString) {
          inString=0;
          break;
        } else if (Character.isWhitespace(ch)) {
          clause.hasWhitespace=true;
          if (inString == 0) {
            // end of the token if we aren't in a string, backing
            // up the position.
            pos--;
            break;
          }
        }

        if (inString == 0) {
          switch (ch) {
            case '!':
            case '(':
            case ')':
            case ':':
            case '^':
            case '[':
            case ']':
            case '{':
            case '}':
            case '~':
            case '*':
            case '?':
            case '"':
            case '+':
            case '-':
              clause.hasSpecialSyntax = true;
              sb.append('\\');
          }
        } else if (ch=='"') {
          // only char we need to escape in a string is double quote
          sb.append('\\');
        }
        sb.append(ch);
      }
      clause.val = sb.toString();
      if (clause.isPhrase) {
        if (inString != 0) {
          // detected bad quote balancing... retry
          // parsing with quotes like any other char
          return splitIntoClauses(s, true);
        }

        // special syntax in a string isn't special
        clause.hasSpecialSyntax = false;       
      } else {
        // an empty clause... must be just a + or - on it's own
        if (clause.val.length() == 0) {
          clause.syntaxError = true;
          if (clause.must != 0) {
            clause.val="\\"+clause.must;
            clause.must = 0;
            clause.hasSpecialSyntax = true;
          } else {
            // uh.. this shouldn't happen.
            clause=null;
          }
        }
      }

      if (clause != null) {
        clause.raw = s.substring(start, pos);
        lst.add(clause);
      }
      clause = new Clause();
    }

    return lst;
  }

  public String getFieldName(String s, int pos, int end) {
    if (pos >= end) return null;
    int p=pos;
    int colon = s.indexOf(':',pos);
    // make sure there is space after the colon, but not whitespace
    if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null;
    char ch = s.charAt(p++);
    if (!Character.isJavaIdentifierPart(ch)) return null;
    while (p<colon) {
      ch = s.charAt(p++);
      if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null;
    }
    String fname = s.substring(pos, p);
    return getReq().getSchema().getFieldTypeNoEx(fname) == null ? null : fname;
  }


  public static List<String> split(String s, boolean ignoreQuote) {
    ArrayList<String> lst = new ArrayList<String>(4);
    int pos=0, start=0, end=s.length();
    char inString=0;
    char ch=0;
    while (pos < end) {
      char prevChar=ch;
      ch = s.charAt(pos++);
      if (ch=='\\') {    // skip escaped chars
        pos++;
      } else if (inString != 0 && ch==inString) {
        inString=0;
      } else if (!ignoreQuote && ch=='"') {
        // If char is directly preceeded by a number or letter
        // then don't treat it as the start of a string.
        if (!Character.isLetterOrDigit(prevChar)) {
          inString=ch;
        }
      } else if (Character.isWhitespace(ch) && inString==0) {
        lst.add(s.substring(start,pos-1));
        start=pos;
      }
    }
    if (start < end) {
      lst.add(s.substring(start,end));
    }

    if (inString != 0) {
      // unbalanced quote... ignore them
      return split(s, true);
    }

    return lst;
  }




    enum QType {
      FIELD,
      PHRASE,
      PREFIX,
      WILDCARD,
      FUZZY,
      RANGE
    }


  static final RuntimeException unknownField = new RuntimeException("UnknownField");
  static {
    unknownField.fillInStackTrace();
  }

  /**
   * A subclass of SolrQueryParser that supports aliasing fields for
   * constructing DisjunctionMaxQueries.
   */
  class ExtendedSolrQueryParser extends SolrQueryParser {


    /** A simple container for storing alias info
     */
    protected class Alias {
      public float tie;
      public Map<String,Float> fields;
    }

    boolean makeDismax=true;
    boolean disableCoord=true;
    boolean allowWildcard=true;
    int minClauseSize = 0;    // minimum number of clauses per phrase query...
                              // used when constructing boosting part of query via sloppy phrases
    boolean exceptions;  //  allow exceptions to be thrown (for example on a missing field)

    ExtendedAnalyzer analyzer;

    /**
     * Where we store a map from field name we expect to see in our query
     * string, to Alias object containing the fields to use in our
     * DisjunctionMaxQuery and the tiebreaker to use.
     */
    protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);

    public ExtendedSolrQueryParser(QParser parser, String defaultField) {
      super(parser, defaultField, new ExtendedAnalyzer(parser));
      analyzer = (ExtendedAnalyzer)getAnalyzer();     
      // don't trust that our parent class won't ever change it's default
      setDefaultOperator(QueryParser.Operator.OR);
    }

    public void setRemoveStopFilter(boolean remove) {
      analyzer.removeStopFilter = remove;
    }

    @Override
    protected Query getBooleanQuery(List clauses, boolean disableCoord) throws ParseException {
      Query q = super.getBooleanQuery(clauses, disableCoord);
      if (q != null) {
        q = QueryUtils.makeQueryable(q);
      }
      return q;
    }


    ////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////
    ////////////////////////////////////////////////////////////////////////////

    @Override
    protected void addClause(List clauses, int conj, int mods, Query q) {
//System.out.println("addClause:clauses="+clauses+" conj="+conj+" mods="+mods+" q="+q);
      super.addClause(clauses, conj, mods, q);
    }

    /**
     * Add an alias to this query parser.
     *
     * @param field the field name that should trigger alias mapping
     * @param fieldBoosts the mapping from fieldname to boost value that
     *                    should be used to build up the clauses of the
     *                    DisjunctionMaxQuery.
     * @param tiebreaker to the tiebreaker to be used in the
     *                   DisjunctionMaxQuery
     * @see SolrPluginUtils#parseFieldBoosts
     */
    public void addAlias(String field, float tiebreaker,
                         Map<String,Float> fieldBoosts) {

      Alias a = new Alias();
      a.tie = tiebreaker;
      a.fields = fieldBoosts;
      aliases.put(field, a);
    }


    QType type;
    String field;
    String val;
    String val2;
    boolean bool;
    float flt;
    int slop;

    @Override
    protected Query getFieldQuery(String field, String val, boolean quoted) throws ParseException {
//System.out.println("getFieldQuery: val="+val);

      this.type = QType.FIELD;
      this.field = field;
      this.val = val;
      this.slop = getPhraseSlop(); // unspecified
      return getAliasedQuery();
    }

    @Override
    protected Query getFieldQuery(String field, String val, int slop) throws ParseException {
//System.out.println("getFieldQuery: val="+val+" slop="+slop);

      this.type = QType.PHRASE;
      this.field = field;
      this.val = val;
      this.slop = slop;
      return getAliasedQuery();
    }

    @Override
    protected Query getPrefixQuery(String field, String val) throws ParseException {
//System.out.println("getPrefixQuery: val="+val);
      if (val.equals("") && field.equals("*")) {
        return new MatchAllDocsQuery();
      }
      this.type = QType.PREFIX;
      this.field = field;
      this.val = val;
      return getAliasedQuery();
    }

    @Override
    protected Query getRangeQuery(String field, String a, String b, boolean inclusive) throws ParseException {
//System.out.println("getRangeQuery:");

      this.type = QType.RANGE;
      this.field = field;
      this.val = a;
      this.val2 = b;
      this.bool = inclusive;
      return getAliasedQuery();
    }

    @Override
    protected Query getWildcardQuery(String field, String val) throws ParseException {
//System.out.println("getWildcardQuery: val="+val);

      if (val.equals("*")) {
        if (field.equals("*")) {
          return new MatchAllDocsQuery();
        } else{
          return getPrefixQuery(field,"");
        }
      }
      this.type = QType.WILDCARD;
      this.field = field;
      this.val = val;
      return getAliasedQuery();
    }

    @Override
    protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws ParseException {
//System.out.println("getFuzzyQuery: val="+val);

      this.type = QType.FUZZY;
      this.field = field;
      this.val = val;
      this.flt = minSimilarity;
      return getAliasedQuery();
    }

    /**
     * Delegates to the super class unless the field has been specified
     * as an alias -- in which case we recurse on each of
     * the aliased fields, and the results are composed into a
     * DisjunctionMaxQuery.  (so yes: aliases which point at other
     * aliases should work)
     */
    protected Query getAliasedQuery()
      throws ParseException {
      Alias a = aliases.get(field);
      if (a != null) {
        List<Query> lst = getQueries(a);
        if (lst == null || lst.size()==0)
            return getQuery();
        // make a DisjunctionMaxQuery in this case too... it will stop
        // the "mm" processing from making everything required in the case
        // that the query expanded to multiple clauses.
        // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
        // if (lst.size()==1) return lst.get(0);

        if (makeDismax) {
          DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie);
          return q;
        } else {
          // should we disable coord?
          BooleanQuery q = new BooleanQuery(disableCoord);
          for (Query sub : lst) {
            q.add(sub, BooleanClause.Occur.SHOULD);
          }
          return q;
        }
      } else {

        // verify that a fielded query is actually on a field that exists... if not,
        // then throw an exception to get us out of here, and we'll treat it like a
        // literal when we try the escape+re-parse.
        if (exceptions) {
          FieldType ft = schema.getFieldTypeNoEx(field);
          if (ft == null) throw unknownField;
        }

        return getQuery();
      }
    }


     protected List<Query> getQueries(Alias a) throws ParseException {
       if (a == null) return null;
       if (a.fields.size()==0) return null;
       List<Query> lst= new ArrayList<Query>(4);

       for (String f : a.fields.keySet()) {
         this.field = f;
         Query sub = getQuery();
         if (sub != null) {
           Float boost = a.fields.get(f);
           if (boost != null) {
              sub.setBoost(boost);
           }
           lst.add(sub);
         }
       }
       return lst;
     }

    private Query getQuery() throws ParseException {
      try {

        switch (type) {
          case FIELD:  // fallthrough
          case PHRASE:
            Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
            if (query instanceof PhraseQuery) {
              PhraseQuery pq = (PhraseQuery)query;
              if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
              ((PhraseQuery)query).setSlop(slop);
            } else if (query instanceof MultiPhraseQuery) {
              MultiPhraseQuery pq = (MultiPhraseQuery)query;
              if (minClauseSize > 1 && pq.getTermArrays().size() < minClauseSize) return null;
              ((MultiPhraseQuery)query).setSlop(slop);
            } else if (minClauseSize > 1) {
              // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
              return null;
            }
            return query;
          case PREFIX: return super.getPrefixQuery(field, val);
          case WILDCARD: return super.getWildcardQuery(field, val);
          case FUZZY: return super.getFuzzyQuery(field, val, flt);
          case RANGE: return super.getRangeQuery(field, val, val2, bool);
        }
        return null;

      } catch (Exception e) {
        // an exception here is due to the field query not being compatible with the input text
        // for example, passing a string to a numeric field.
        return null;
      }
    }
  }


  static boolean isEmpty(Query q) {
    if (q==null) return true;
    if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true;
    return false;
  }
}


final class ExtendedAnalyzer extends Analyzer {
  final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
  final QParser parser;
  final Analyzer queryAnalyzer;
  public boolean removeStopFilter = false;

  public static TokenizerChain getQueryTokenizerChain(QParser parser, String fieldName) {
    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    return qa instanceof TokenizerChain ? (TokenizerChain)qa : null;
  }

  public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) {
    TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName);
    if (tcq == null) return null;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i=0; i<facs.length; i++) {
      TokenFilterFactory tf = facs[i];
      if (tf instanceof StopFilterFactory) {
        return (StopFilterFactory)tf;
      }
    }
    return null;
  }

  public ExtendedAnalyzer(QParser parser) {
    this.parser = parser;
    this.queryAnalyzer = parser.getReq().getSchema().getQueryAnalyzer();
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    if (!removeStopFilter) {
      return queryAnalyzer.tokenStream(fieldName, reader);
    }
   
    Analyzer a = map.get(fieldName);
    if (a != null) {
      return a.tokenStream(fieldName, reader);
    }

    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    if (!(qa instanceof TokenizerChain)) {
      map.put(fieldName, qa);
      return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tcq = (TokenizerChain)qa;
    Analyzer ia = ft.getAnalyzer();
    if (ia == qa || !(ia instanceof TokenizerChain)) {
      map.put(fieldName, qa);
      return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tci = (TokenizerChain)ia;

    // make sure that there isn't a stop filter in the indexer
    for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
      if (tf instanceof StopFilterFactory) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
      }
    }

    // now if there is a stop filter in the query analyzer, remove it
    int stopIdx = -1;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i=0; i<facs.length; i++) {
      TokenFilterFactory tf = facs[i];
      if (tf instanceof StopFilterFactory) {
        stopIdx = i;
        break;
      }
    }

    if (stopIdx == -1) {
      // no stop filter exists
      map.put(fieldName, qa);
      return qa.tokenStream(fieldName, reader);
    }

    TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length-1];
    for (int i=0,j=0; i<facs.length; i++) {
      if (i==stopIdx) continue;
      newtf[j++] = facs[i];
    }

    TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
    newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));

    map.put(fieldName, newa);
    return newa.tokenStream(fieldName, reader);       
  }

  @Override
  public int getPositionIncrementGap(String fieldName) {
    return queryAnalyzer.getPositionIncrementGap(fieldName);
  }

  @Override
  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    if (!removeStopFilter) {
      return queryAnalyzer.reusableTokenStream(fieldName, reader);
    }
    // TODO: done to fix stop word removal bug - could be done while still using resusable?
    return tokenStream(fieldName, reader);
  }
}
TOP

Related Classes of org.apache.solr.search.ExtendedAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.