Package org.apache.solr.search

Source Code of org.apache.solr.search.ScorePriorityQueue

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.search;

import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.SolrInfoRegistry;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.NamedList;
import org.apache.solr.util.OpenBitSet;

import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;


/**
* SolrIndexSearcher adds schema awareness and caching functionality
* over the lucene IndexSearcher.
*
* @author yonik
* @version $Id: SolrIndexSearcher.java 479793 2006-11-27 22:40:21Z klaas $
* @since solr 0.9
*/

// Since the internal reader in IndexSearcher is
// package protected, I can't get to it by inheritance.
// For now, I am using delgation and creating the
// IndexReader to pass to the searcher myself.
// NOTE: as of Lucene 1.9, this has changed!

public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
  private static Logger log = Logger.getLogger(SolrIndexSearcher.class.getName());

  private final IndexSchema schema;

  private final String name;
  private long openTime = System.currentTimeMillis();
  private long registerTime = 0;
  private final IndexSearcher searcher;
  private final IndexReader reader;
  private final boolean closeReader;

  private final boolean cachingEnabled;
  private final SolrCache filterCache;
  private final SolrCache queryResultCache;
  private final SolrCache documentCache;

  // map of generic caches - not synchronized since it's read-only after the constructor.
  private final HashMap<String, SolrCache> cacheMap;
  private static final HashMap<String, SolrCache> noGenericCaches=new HashMap<String,SolrCache>(0);

  // list of all caches associated with this searcher.
  private final SolrCache[] cacheList;
  private static final SolrCache[] noCaches = new SolrCache[0];

  /** Creates a searcher searching the index in the named directory. */
  public SolrIndexSearcher(IndexSchema schema, String name, String path, boolean enableCache) throws IOException {
    this(schema,name,IndexReader.open(path), true, enableCache);
  }

  /** Creates a searcher searching the index in the provided directory. */
  public SolrIndexSearcher(IndexSchema schema, String name, Directory directory, boolean enableCache) throws IOException {
    this(schema,name,IndexReader.open(directory), true, enableCache);
  }

  /** Creates a searcher searching the provided index. */
  public SolrIndexSearcher(IndexSchema schema, String name, IndexReader r, boolean enableCache) {
    this(schema,name,r, false, enableCache);
  }

  private SolrIndexSearcher(IndexSchema schema, String name, IndexReader r, boolean closeReader, boolean enableCache) {
    this.schema = schema;
    this.name = "Searcher@" + Integer.toHexString(hashCode()) + (name!=null ? " "+name : "");

    log.info("Opening " + this.name);

    reader = r;
    searcher = new IndexSearcher(r);
    this.closeReader = closeReader;
    searcher.setSimilarity(schema.getSimilarity());

    cachingEnabled=enableCache;
    if (cachingEnabled) {
      ArrayList<SolrCache> clist = new ArrayList<SolrCache>();
      filterCache= filterCacheConfig==null ? null : filterCacheConfig.newInstance();
      if (filterCache!=null) clist.add(filterCache);
      queryResultCache = queryResultCacheConfig==null ? null : queryResultCacheConfig.newInstance();
      if (queryResultCache!=null) clist.add(queryResultCache);
      documentCache = documentCacheConfig==null ? null : documentCacheConfig.newInstance();
      if (documentCache!=null) clist.add(documentCache);

      if (userCacheConfigs == null) {
        cacheMap = noGenericCaches;
      } else {
        cacheMap = new HashMap<String,SolrCache>(userCacheConfigs.length);
        for (CacheConfig userCacheConfig : userCacheConfigs) {
          SolrCache cache = null;
          if (userCacheConfig != null) cache = userCacheConfig.newInstance();
          if (cache != null) {
            cacheMap.put(cache.name(), cache);
            clist.add(cache);
          }
        }
      }

      cacheList = clist.toArray(new SolrCache[clist.size()]);
    } else {
      filterCache=null;
      queryResultCache=null;
      documentCache=null;
      cacheMap = noGenericCaches;
      cacheList= noCaches;
    }

    // register self
    SolrInfoRegistry.getRegistry().put(this.name, this);
  }


  public String toString() {
    return name;
  }


  /** Register sub-objects such as caches
   */
  public void register() {
    for (SolrCache cache : cacheList) {
      cache.setState(SolrCache.State.LIVE);
      SolrInfoRegistry.getRegistry().put(cache.name(), cache);
    }
    registerTime=System.currentTimeMillis();
  }

  /**
   * Free's resources associated with this searcher.
   *
   * In particular, the underlying reader and any cache's in use are closed.
   */
  public void close() throws IOException {
    // unregister first, so no management actions are tried on a closing searcher.
    SolrInfoRegistry.getRegistry().remove(name);

    if (cachingEnabled) {
      StringBuilder sb = new StringBuilder();
      sb.append("Closing ").append(name);
      for (SolrCache cache : cacheList) {
        sb.append("\n\t");
        sb.append(cache);
      }
      log.info(sb.toString());
    } else {
      log.fine("Closing " + name);
    }
    try {
      searcher.close();
    }
    finally {
      if(closeReader) reader.close();
      for (SolrCache cache : cacheList) {
        cache.close();
      }
    }
  }

  /** Direct access to the IndexReader used by this searcher */
  public IndexReader getReader() { return reader; }
  /** Direct access to the IndexSchema for use with this searcher */
  public IndexSchema getSchema() { return schema; }


  // params for the "nutch" query optimizer
  private static boolean filtOptEnabled=SolrConfig.config.getBool("query/boolTofilterOptimizer/@enabled",false);
  private static int filtOptCacheSize=SolrConfig.config.getInt("query/boolTofilterOptimizer/@cacheSize",32);
  private static float filtOptThreshold= SolrConfig.config.getFloat("query/boolTofilterOptimizer/@threshold",.05f);
  private LuceneQueryOptimizer optimizer = filtOptEnabled ? new LuceneQueryOptimizer(filtOptCacheSize,filtOptThreshold) : null;


  private static final CacheConfig filterCacheConfig = CacheConfig.getConfig("query/filterCache");
  private static final CacheConfig queryResultCacheConfig = CacheConfig.getConfig("query/queryResultCache");
  private static final CacheConfig documentCacheConfig = CacheConfig.getConfig("query/documentCache");
  private static final CacheConfig[] userCacheConfigs = CacheConfig.getMultipleConfigs("query/cache");


  //
  // Set default regenerators on filter and query caches if they don't have any
  //
  static {
    if (filterCacheConfig != null && filterCacheConfig.getRegenerator() == null) {
      filterCacheConfig.setRegenerator(
              new CacheRegenerator() {
                public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
                  newSearcher.cacheDocSet((Query)oldKey, null, false);
                  return true;
                }
              }
      );
    }

    if (queryResultCacheConfig != null && queryResultCacheConfig.getRegenerator() == null) {
      queryResultCacheConfig.setRegenerator(
              new CacheRegenerator() {
                public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
                  QueryResultKey key = (QueryResultKey)oldKey;
                  int nDocs=1;
                  // request 1 doc and let caching round up to the next window size...
                  // unless the window size is <=1, in which case we will pick
                  // the minimum of the number of documents requested last time and
                  // a reasonable number such as 40.
                  // TODO: make more configurable later...

                  if (queryResultWindowSize<=1) {
                    DocList oldList = (DocList)oldVal;
                    int oldnDocs = oldList.offset() + oldList.size();
                    // 40 has factors of 2,4,5,10,20
                    nDocs = Math.min(oldnDocs,40);
                  }

                  DocListAndSet ret = new DocListAndSet();
                  int flags=NO_CHECK_QCACHE | key.nc_flags;

                  newSearcher.getDocListC(ret, key.query, key.filters, null, key.sort, 0, nDocs, flags);
                  return true;
                }
              }
      );
    }
  }


  private static boolean useFilterForSortedQuery=SolrConfig.config.getBool("query/useFilterForSortedQuery", false);
  private static int queryResultWindowSize=SolrConfig.config.getInt("query/queryResultWindowSize", 1);


  public Hits search(Query query, Filter filter, Sort sort) throws IOException {
    // todo - when Solr starts accepting filters, need to
    // change this conditional check (filter!=null) and create a new filter
    // that ANDs them together if it already exists.

    if (optimizer==null || filter!=null || !(query instanceof BooleanQuery)
    ) {
      return searcher.search(query,filter,sort);
    } else {
      Query[] newQuery = new Query[1];
      Filter[] newFilter = new Filter[1];
      optimizer.optimize((BooleanQuery)query, searcher, 0, newQuery, newFilter);

      return searcher.search(newQuery[0], newFilter[0], sort);
    }
  }

  public Hits search(Query query, Filter filter) throws IOException {
    return searcher.search(query, filter);
  }

  public Hits search(Query query, Sort sort) throws IOException {
    return searcher.search(query, sort);
  }

  public void search(Query query, HitCollector results) throws IOException {
    searcher.search(query, results);
  }

  public void setSimilarity(Similarity similarity) {
    searcher.setSimilarity(similarity);
  }

  public Similarity getSimilarity() {
    return searcher.getSimilarity();
  }

  public int docFreq(Term term) throws IOException {
    return searcher.docFreq(term);
  }

  /* ********************** Document retrieval *************************/
  
  /* Future optimizations (yonik)
   *
   * If no cache is present:
   *   - use NO_LOAD instead of LAZY_LOAD
   *   - use LOAD_AND_BREAK if a single field is begin retrieved
   */

  /**
   * FieldSelector which loads the specified fields, and load all other
   * field lazily.
   */
  class SetNonLazyFieldSelector implements FieldSelector {
    private Set<String> fieldsToLoad;
    SetNonLazyFieldSelector(Set<String> toLoad) {
      fieldsToLoad = toLoad;
    }
    public FieldSelectorResult accept(String fieldName) {
      if(fieldsToLoad.contains(fieldName))
        return FieldSelectorResult.LOAD;
      else
        return FieldSelectorResult.LAZY_LOAD;
    }
  }

  /* solrconfig lazyfields setting */
  public static final boolean enableLazyFieldLoading = SolrConfig.config.getBool("query/enableLazyFieldLoading", false);

  /**
   * Retrieve the {@link Document} instance corresponding to the document id.
   */
  public Document doc(int i) throws IOException {
    return doc(i, null);
  }
  /**
   * Retrieve the {@link Document} instance corresponding to the document id.
   *
   * Note: The document will have all fields accessable, but if a field
   * filter is provided, only the provided fields will be loaded (the
   * remainder will be available lazily).
   */
  public Document doc(int i, Set<String> fields) throws IOException {
   
    Document d;
    if (documentCache != null) {
      d = (Document)documentCache.get(i);
      if (d!=null) return d;
    }

    if(!enableLazyFieldLoading || fields == null) {
      d = searcher.getIndexReader().document(i);
    } else {
      d = searcher.getIndexReader().document(i,
             new SetNonLazyFieldSelector(fields));
    }

    if (documentCache != null) {
      documentCache.put(i, d);
    }

    return d;
  }

  /**
   * Takes a list of docs (the doc ids actually), and reads them into an array
   * of Documents.
   */
  public void readDocs(Document[] docs, DocList ids) throws IOException {
    readDocs(docs, ids, null);
  }
  /**
   * Takes a list of docs (the doc ids actually) and a set of fields to load,
   * and reads them into an array of Documents.
   */
  public void readDocs(Document[] docs, DocList ids, Set<String> fields) throws IOException {
    DocIterator iter = ids.iterator();
    for (int i=0; i<docs.length; i++) {
      docs[i] = doc(iter.nextDoc(), fields);
    }
  }

  /* ********************** end document retrieval *************************/

  public int maxDoc() throws IOException {
    return searcher.maxDoc();
  }

  public TopDocs search(Weight weight, Filter filter, int i) throws IOException {
    return searcher.search(weight, filter, i);
  }

  public void search(Weight weight, Filter filter, HitCollector hitCollector) throws IOException {
    searcher.search(weight, filter, hitCollector);
  }

  public Query rewrite(Query original) throws IOException {
    return searcher.rewrite(original);
  }

  public Explanation explain(Weight weight, int i) throws IOException {
    return searcher.explain(weight, i);
  }

  public TopFieldDocs search(Weight weight, Filter filter, int i, Sort sort) throws IOException {
    return searcher.search(weight, filter, i, sort);
  }

  ////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////

  /**
   * Returns the first document number containing the term <code>t</code>
   * Returns -1 if no document was found.
   * This method is primarily intended for clients that want to fetch
   * documents using a unique identifier."
   * @param t
   * @return the first document number containing the term
   */
  public int getFirstMatch(Term t) throws IOException {
    TermDocs tdocs = null;
    try {
      tdocs = reader.termDocs(t);
      if (!tdocs.next()) return -1;
      return tdocs.doc();
    } finally {
      if (tdocs!=null) tdocs.close();
    }
  }


  /**
   * Compute and cache the DocSet that matches a query.
   * The normal usage is expected to be cacheDocSet(myQuery, null,false)
   * meaning that Solr will determine if the Query warrants caching, and
   * if so, will compute the DocSet that matches the Query and cache it.
   * If the answer to the query is already cached, nothing further will be done.
   * <p>
   * If the optionalAnswer DocSet is provided, it should *not* be modified
   * after this call.
   *
   * @param query           the lucene query that will act as the key
   * @param optionalAnswer   the DocSet to be cached - if null, it will be computed.
   * @param mustCache        if true, a best effort will be made to cache this entry.
   *                         if false, heuristics may be used to determine if it should be cached.
   */
  public void cacheDocSet(Query query, DocSet optionalAnswer, boolean mustCache) throws IOException {
    // Even if the cache is null, still compute the DocSet as it may serve to warm the Lucene
    // or OS disk cache.
    if (optionalAnswer != null) {
      if (filterCache!=null) {
        filterCache.put(query,optionalAnswer);
      }
      return;
    }

    // Throw away the result, relying on the fact that getDocSet
    // will currently always cache what it found.  If getDocSet() starts
    // using heuristics about what to cache, and mustCache==true, (or if we
    // want this method to start using heuristics too) then
    // this needs to change.
    getDocSet(query);
  }

  /**
   * Returns the set of document ids matching a query.
   * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
   * If the answer was not cached, it may have been inserted into the cache as a result of this call.
   * <p>
   * The DocSet returned should <b>not</b> be modified.
   */
  public DocSet getDocSet(Query query) throws IOException {
    DocSet answer;
    if (filterCache != null) {
      answer = (DocSet)filterCache.get(query);
      if (answer!=null) return answer;
    }

    answer = getDocSetNC(query, null);

    if (filterCache != null) {
      filterCache.put(query, answer);
    }

    return answer;
  }


  // TODO: do a more efficient version that starts with the
  // smallest DocSet and drives the intersection off that
  // or implement an intersection() function that takes multiple
  // DocSets (prob the better way)
  protected DocSet getDocSet(List<Query> queries) throws IOException {
    DocSet answer=null;
    if (queries==null) return null;
    for (Query q : queries) {
      if (answer==null) {
        answer = getDocSet(q);
      } else {
        answer = answer.intersection(getDocSet(q));
      }
    }
    return answer;
  }


  protected DocSet getDocSetNC(Query query, DocSet filter) throws IOException {
    if (filter==null) {
      DocSetHitCollector hc = new DocSetHitCollector(maxDoc());
      if (query instanceof TermQuery) {
        Term t = ((TermQuery)query).getTerm();
        TermDocs tdocs = null;
        try {
          tdocs = reader.termDocs(t);
          while (tdocs.next()) hc.collect(tdocs.doc(),0.0f);
        } finally {
          if (tdocs!=null) tdocs.close();
        }
      } else {
        searcher.search(query,null,hc);
      }
      return hc.getDocSet();

    } else {
      // FUTURE: if the filter is sorted by docid, could use skipTo (SkipQueryFilter)
      final DocSetHitCollector hc = new DocSetHitCollector(maxDoc());
      final DocSet filt = filter;
      searcher.search(query, null, new HitCollector() {
        public void collect(int doc, float score) {
          if (filt.exists(doc)) hc.collect(doc,score);
        }
      }
      );
      return hc.getDocSet();
    }
  }


  /**
   * Returns the set of document ids matching both the query and the filter.
   * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
   * If the answer was not cached, it may have been inserted into the cache as a result of this call.
   * <p>
   *
   * @param query
   * @param filter may be null
   * @return DocSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   */
  public DocSet getDocSet(Query query, DocSet filter) throws IOException {
    if (filter==null) return getDocSet(query);

    DocSet first;
    if (filterCache != null) {
      first = (DocSet)filterCache.get(query);
      if (first==null) {
        first = getDocSetNC(query,null);
        filterCache.put(query,first);
      }
      return first.intersection(filter);
    }


    // If there isn't a cache, then do a single filtered query.
    return getDocSetNC(query,filter);


    /******* OLD VERSION that did a filtered query instead of
     * an intersection if the query docset wasn't found in the cache.
     * It made misses != inserts (even if no evictions)
    DocSet first=null;
    if (filterCache != null) {
      first = (DocSet)filterCache.get(query);
      if (first != null) {
        return first.intersection(filter);
      }
    }

    DocSet answer = getDocSetNC(query, filter);
    // nothing is inserted into the cache, because we don't cache materialized filters.
    // Hmmm, we *could* make a hitcollector that made a DocSet out of the query at the
    // same time it was running the filter though...

    // Q: we could call getDocSet(query) and then take the intersection instead of running
    // the query as a filter.  Then it could be cached.
    return answer;
    ****************/
  }


  /**
  * Converts a filter into a DocSet.
  * This method is not cache-aware and no caches are checked.
  */
  public DocSet convertFilter(Filter lfilter) throws IOException {
    BitSet bs = lfilter.bits(this.reader);
    OpenBitSet obs = new OpenBitSet(bs.size());
    for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)) {
      obs.fastSet(i);
    }
    return new BitDocSet(obs);
  }

  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>.
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocList getDocList(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
    List<Query> filterList = null;
    if (filter != null) {
      filterList = new ArrayList<Query>(1);
      filterList.add(filter);
    }
    return getDocList(query, filterList, lsort, offset, len, 0);
  }


  /**
   * Returns documents matching both <code>query</code> and the
   * intersection of the <code>filterList</code>, sorted by <code>sort</code>.
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   *
   * @param query
   * @param filterList may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocList getDocList(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
    DocListAndSet answer = new DocListAndSet();
    getDocListC(answer,query,filterList,null,lsort,offset,len,flags);
    return answer.docList;
  }


  private static final int NO_CHECK_QCACHE       = 0x80000000;
  private static final int GET_DOCSET            = 0x40000000;
  private static final int NO_CHECK_FILTERCACHE  = 0x20000000;

  public static final int GET_SCORES             =       0x01;

  private void getDocListC(DocListAndSet out, Query query, List<Query> filterList, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
    QueryResultKey key=null;
    int maxDoc = offset + len;
    int supersetMaxDoc=maxDoc;
    DocList superset;


    // we can try and look up the complete query in the cache.
    // we can't do that if filter!=null though (we don't want to
    // do hashCode() and equals() for a big DocSet).
    if (queryResultCache != null && filter==null) {
        // all of the current flags can be reused during warming,
        // so set all of them on the cache key.
        key = new QueryResultKey(query, filterList, lsort, flags);
        if ((flags & NO_CHECK_QCACHE)==0) {
          superset = (DocList)queryResultCache.get(key);

          if (superset != null) {
            // check that the cache entry has scores recorded if we need them
            if ((flags & GET_SCORES)==0 || superset.hasScores()) {
              out.docList = superset.subset(offset,len);
            }
          }
          if (out.docList != null) {
            // found the docList in the cache... now check if we need the docset too.
            // OPT: possible future optimization - if the doclist contains all the matches,
            // use it to make the docset instead of rerunning the query.
            if (out.docSet==null && ((flags & GET_DOCSET)!=0) ) {
              if (filterList==null) {
                out.docSet = getDocSet(query);
              } else {
                List<Query> newList = new ArrayList<Query>(filterList.size()+1);
                newList.add(query);
                newList.addAll(filterList);
                out.docSet = getDocSet(newList);
              }
            }
            return;
          }
        }

        // If we are going to generate the result, bump up to the
        // next resultWindowSize for better caching.

        // handle 0 special case as well as avoid idiv in the common case.
        if (maxDoc < queryResultWindowSize) {
          supersetMaxDoc=queryResultWindowSize;
        } else {
          supersetMaxDoc = ((maxDoc-1)/queryResultWindowSize + 1)*queryResultWindowSize;
        }
    }


    // OK, so now we need to generate an answer.
    // One way to do that would be to check if we have an unordered list
    // of results for the base query.  If so, we can apply the filters and then
    // sort by the resulting set.  This can only be used if:
    // - the sort doesn't contain score
    // - we don't want score returned.

    // check if we should try and use the filter cache
    boolean useFilterCache=false;
    if ((flags & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && lsort != null && filterCache != null) {
      useFilterCache=true;
      SortField[] sfields = lsort.getSort();
      for (SortField sf : sfields) {
        if (sf.getType() == SortField.SCORE) {
          useFilterCache=false;
          break;
        }
      }
    }

    if (useFilterCache) {
      // now actually use the filter cache.
      // for large filters that match few documents, this may be
      // slower than simply re-executing the query.
      if (out.docSet == null) {
        out.docSet = getDocSet(query,filter);
        DocSet bigFilt = getDocSet(filterList);
        if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
      }
      // todo: there could be a sortDocSet that could take a list of
      // the filters instead of anding them first...
      // perhaps there should be a multi-docset-iterator
      superset = sortDocSet(out.docSet,lsort,supersetMaxDoc);
      out.docList = superset.subset(offset,len);
    } else {
      // do it the normal way...
      DocSet theFilt = filter!=null ? filter : getDocSet(filterList);

      if ((flags & GET_DOCSET)!=0) {
        DocSet qDocSet = getDocListAndSetNC(out,query,theFilt,lsort,0,supersetMaxDoc,flags);
        // cache the docSet matching the query w/o filtering
        if (filterCache!=null) filterCache.put(query,qDocSet);
      } else {
        out.docList = getDocListNC(query,theFilt,lsort,0,supersetMaxDoc,flags);
      }
      superset = out.docList;
      out.docList = superset.subset(offset,len);
    }

    // lastly, put the superset in the cache
    if (key != null) {
      queryResultCache.put(key, superset);
    }
  }



  private DocList getDocListNC(Query query, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
    final int lastDocRequested = offset+len;
    int nDocsReturned;
    int totalHits;
    float maxScore;
    int[] ids;
    float[] scores;


    // handle zero case...
    if (lastDocRequested<=0) {
      final DocSet filt = filter;
      final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
      final int[] numHits = new int[1];

      searcher.search(query, new HitCollector() {
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          if (score > topscore[0]) topscore[0]=score;
        }
      }
      );

      nDocsReturned=0;
      ids = new int[nDocsReturned];
      scores = new float[nDocsReturned];
      totalHits = numHits[0];
      maxScore = totalHits>0 ? topscore[0] : 0.0f;
    } else if (lsort != null) {
      // can't use TopDocs if there is a sort since it
      // will do automatic score normalization.
      // NOTE: this changed late in Lucene 1.9

      final DocSet filt = filter;
      final int[] numHits = new int[1];
      final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, lsort.getSort(), offset+len);

      searcher.search(query, new HitCollector() {
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          hq.insert(new FieldDoc(doc, score));
        }
      }
      );

      totalHits = numHits[0];
      maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;

      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (flags&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        FieldDoc fieldDoc = (FieldDoc)hq.pop();
        // fillFields is the point where score normalization happens
        // hq.fillFields(fieldDoc)
        ids[i] = fieldDoc.doc;
        if (scores != null) scores[i] = fieldDoc.score;
      }
    } else {
      // No Sort specified (sort by score descending)
      // This case could be done with TopDocs, but would currently require
      // getting a BitSet filter from a DocSet which may be inefficient.

      final DocSet filt = filter;
      final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
      final int[] numHits = new int[1];
      searcher.search(query, new HitCollector() {
        float minScore=Float.NEGATIVE_INFINITY;  // minimum score in the priority queue
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          if (numHits[0]++ < lastDocRequested || score >= minScore) {
            // TODO: if docs are always delivered in order, we could use "score>minScore"
            // instead of "score>=minScore" and avoid tiebreaking scores
            // in the priority queue.
            // but might BooleanScorer14 might still be used and deliver docs out-of-order?
            hq.insert(new ScoreDoc(doc, score));
            minScore = ((ScoreDoc)hq.top()).score;
          }
        }
      }
      );

      totalHits = numHits[0];
      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (flags&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      ScoreDoc sdoc =null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        sdoc = (ScoreDoc)hq.pop();
        ids[i] = sdoc.doc;
        if (scores != null) scores[i] = sdoc.score;
      }
      maxScore = sdoc ==null ? 0.0f : sdoc.score;
    }


    int sliceLen = Math.min(lastDocRequested,nDocsReturned) - offset;
    if (sliceLen < 0) sliceLen=0;
    return new DocSlice(offset,sliceLen,ids,scores,totalHits,maxScore);



    /**************** older implementation using TopDocs *******************


      Filter lfilter=null;
      if (filter != null) {
        final BitSet bits = filter.getBits();   // avoid if possible
        lfilter = new Filter() {
          public BitSet bits(IndexReader reader)  {
            return bits;
          }
        };
      }

      int lastDocRequested=offset+len;

      // lucene doesn't allow 0 to be passed for nDocs
      if (lastDocRequested==0) lastDocRequested=1;

      // TopFieldDocs sortedDocs;  // use TopDocs so both versions can use it
      TopDocs sortedDocs;
      if (lsort!=null) {
         sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
      } else {
         sortedDocs = searcher.search(query, lfilter, lastDocRequested);
      }

      int nDocsReturned = sortedDocs.scoreDocs.length;
      int[] docs = new int[nDocsReturned];
      for (int i=0; i<nDocsReturned; i++) {
        docs[i] = sortedDocs.scoreDocs[i].doc;
      }
      float[] scores=null;
      float maxScore=0.0f;
      if ((flags & GET_SCORES) != 0) {
        scores = new float[nDocsReturned];
        for (int i=0; i<nDocsReturned; i++) {
          scores[i] = sortedDocs.scoreDocs[i].score;
        }
        if (nDocsReturned>0) {
          maxScore=sortedDocs.scoreDocs[0].score;
        }
      }
      int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
      if (sliceLen < 0) sliceLen=0;
      return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);

    **********************************************************************************/

  }


  // the DocSet returned is for the query only, without any filtering... that way it may
  // be cached if desired.
  private DocSet getDocListAndSetNC(DocListAndSet out, Query query, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
    final int lastDocRequested = offset+len;
    int nDocsReturned;
    int totalHits;
    float maxScore;
    int[] ids;
    float[] scores;
    final DocSetHitCollector setHC = new DocSetHitCollector(maxDoc());
    // TODO: perhaps unify getDocListAndSetNC and getDocListNC without imposing a significant performance hit

    // Comment: gathering the set before the filter is applied allows one to cache
    // the resulting DocSet under the query.  The drawback is that it requires an
    // extra intersection with the filter at the end.  This will be a net win
    // for expensive queries.

    // Q: what if the final intersection results in a small set from two large
    // sets... it won't be a HashDocSet or other small set.  One way around
    // this would be to collect the resulting set as we go (the filter is
    // checked anyway).

    // handle zero case...
    if (lastDocRequested<=0) {
      final DocSet filt = filter;
      final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
      final int[] numHits = new int[1];

      searcher.search(query, new HitCollector() {
        public void collect(int doc, float score) {
          setHC.collect(doc,score);
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          if (score > topscore[0]) topscore[0]=score;
        }
      }
      );

      nDocsReturned=0;
      ids = new int[nDocsReturned];
      scores = new float[nDocsReturned];
      totalHits = numHits[0];
      maxScore = totalHits>0 ? topscore[0] : 0.0f;
    } else if (lsort != null) {
      // can't use TopDocs if there is a sort since it
      // will do automatic score normalization.
      // NOTE: this changed late in Lucene 1.9

      final DocSet filt = filter;
      final int[] numHits = new int[1];
      final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, lsort.getSort(), offset+len);

      searcher.search(query, new HitCollector() {
        public void collect(int doc, float score) {
          setHC.collect(doc,score);
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          hq.insert(new FieldDoc(doc, score));
        }
      }
      );

      totalHits = numHits[0];
      maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;

      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (flags&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        FieldDoc fieldDoc = (FieldDoc)hq.pop();
        // fillFields is the point where score normalization happens
        // hq.fillFields(fieldDoc)
        ids[i] = fieldDoc.doc;
        if (scores != null) scores[i] = fieldDoc.score;
      }
    } else {
      // No Sort specified (sort by score descending)
      // This case could be done with TopDocs, but would currently require
      // getting a BitSet filter from a DocSet which may be inefficient.

      final DocSet filt = filter;
      final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
      final int[] numHits = new int[1];
      searcher.search(query, new HitCollector() {
        float minScore=Float.NEGATIVE_INFINITY;  // minimum score in the priority queue
        public void collect(int doc, float score) {
          setHC.collect(doc,score);
          if (filt!=null && !filt.exists(doc)) return;
          if (numHits[0]++ < lastDocRequested || score >= minScore) {
            // if docs are always delivered in order, we could use "score>minScore"
            // but might BooleanScorer14 might still be used and deliver docs out-of-order?
            hq.insert(new ScoreDoc(doc, score));
            minScore = ((ScoreDoc)hq.top()).score;
          }
        }
      }
      );

      totalHits = numHits[0];
      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (flags&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      ScoreDoc sdoc =null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        sdoc = (ScoreDoc)hq.pop();
        ids[i] = sdoc.doc;
        if (scores != null) scores[i] = sdoc.score;
      }
      maxScore = sdoc ==null ? 0.0f : sdoc.score;
    }


    int sliceLen = Math.min(lastDocRequested,nDocsReturned) - offset;
    if (sliceLen < 0) sliceLen=0;
    out.docList = new DocSlice(offset,sliceLen,ids,scores,totalHits,maxScore);
    DocSet qDocSet = setHC.getDocSet();
    out.docSet = filter==null ? qDocSet : qDocSet.intersection(filter);
    return qDocSet;
  }



  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>.
   * FUTURE: The returned DocList may be retrieved from a cache.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocList getDocList(Query query, DocSet filter, Sort lsort, int offset, int len) throws IOException {
    DocListAndSet answer = new DocListAndSet();
    getDocListC(answer,query,null,filter,lsort,offset,len,0);
    return answer.docList;
  }

  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>.  Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   * <p>
   * The DocList and DocSet returned should <b>not</b> be modified.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
    List<Query> filterList = buildQueryList(filter);
    return getDocListAndSet(query, filterList, lsort, offset, len);

  }

  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>.  Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   * <p>
   * The DocList and DocSet returned should <b>not</b> be modified.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @param flags    user supplied flags for the result set
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len, int flags) throws IOException {
  List<Query> filterList = buildQueryList(filter);
  return getDocListAndSet(query, filterList, lsort, offset, len, flags);
  }
 
  /**
   * A simple utility method for to build a filterList from a query
   * @param filter
   */
  private List<Query> buildQueryList(Query filter) {
  List<Query> filterList = null;
  if (filter != null) {
    filterList = new ArrayList<Query>(2);
    filterList.add(filter);
  }
  return filterList;
  }

  /**
   * Returns documents matching both <code>query</code> and the intersection
   * of <code>filterList</code>, sorted by <code>sort</code>
   * Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code>
   * (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   * <p>
   * The DocList and DocSet returned should <b>not</b> be modified.
   *
   * @param query
   * @param filterList   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len) throws IOException {
    DocListAndSet ret = new DocListAndSet();
    getDocListC(ret,query,filterList,null,lsort,offset,len,GET_DOCSET);
    return ret;
  }

  /**
   * Returns documents matching both <code>query</code> and the intersection
   * of <code>filterList</code>, sorted by <code>sort</code>
   * Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code>
   * (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * This method is cache aware and may retrieve <code>filter</code> from
   * the cache or make an insertion into the cache as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   * <p>
   * The DocList and DocSet returned should <b>not</b> be modified.
   *
   * @param query
   * @param filterList   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @param flags    user supplied flags for the result set
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
      DocListAndSet ret = new DocListAndSet();
      getDocListC(ret,query,filterList,null,lsort,offset,len, flags |= GET_DOCSET);
      return ret;
  }

  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>. Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len) throws IOException {
    DocListAndSet ret = new DocListAndSet();
    getDocListC(ret,query,null,filter,lsort,offset,len,GET_DOCSET);
    return ret;
  }

  /**
   * Returns documents matching both <code>query</code> and <code>filter</code>
   * and sorted by <code>sort</code>.  Also returns the compete set of documents
   * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
   * <p>
   * This method is cache aware and may make an insertion into the cache
   * as a result of this call.
   * <p>
   * FUTURE: The returned DocList may be retrieved from a cache.
   * <p>
   * The DocList and DocSet returned should <b>not</b> be modified.
   *
   * @param query
   * @param filter   may be null
   * @param lsort    criteria by which to sort (if null, query relevance is used)
   * @param offset   offset into the list of documents to return
   * @param len      maximum number of documents to return
   * @param flags    user supplied flags for the result set
   * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
   * @throws IOException
   */
  public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
      DocListAndSet ret = new DocListAndSet();
      getDocListC(ret,query,null,filter,lsort,offset,len, flags |= GET_DOCSET);
      return ret;
    }

  protected DocList sortDocSet(DocSet set, Sort sort, int nDocs) throws IOException {
    final FieldSortedHitQueue hq =
            new FieldSortedHitQueue(reader, sort.getSort(), nDocs);
    DocIterator iter = set.iterator();
    int hits=0;
    while(iter.hasNext()) {
      int doc = iter.nextDoc();
      hits++;   // could just use set.size(), but that would be slower for a bitset
      hq.insert(new FieldDoc(doc,1.0f));
    }

    int numCollected = hq.size();
    int[] ids = new int[numCollected];
    for (int i = numCollected-1; i >= 0; i--) {
      FieldDoc fieldDoc = (FieldDoc)hq.pop();
      // hq.fillFields(fieldDoc)  // optional, if we need that info
      ids[i] = fieldDoc.doc;
    }

    return new DocSlice(0,numCollected,ids,null,hits,0.0f);
  }



  /**
   * Returns the number of documents that match both <code>a</code> and <code>b</code>.
   * <p>
   * This method is cache-aware and may check as well as modify the cache.
   *
   * @param a
   * @param b
   * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
   * @throws IOException
   */
  public int numDocs(Query a, DocSet b) throws IOException {
    // reverse: do the query on filter and filter using docs...
    // or if filter is a term query, can get the freq and
    // drive things off that.
    //  this higher level API leaves open more optimization possibilities.
    // prob only worth it if cacheHitRatio is bad...

    return b.intersectionSize(getDocSet(a));
  }

   /**
   * Returns the number of documents that match both <code>a</code> and <code>b</code>.
   * <p>
   * This method is cache-aware and may check as well as modify the cache.
   *
   * @param a
   * @param b
   * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
   * @throws IOException
   */
  public int numDocs(Query a, Query b) throws IOException {
    return getDocSet(b).intersectionSize(getDocSet(a));
  }


  /**
   * Takes a list of docs (the doc ids actually), and returns an array
   * of Documents containing all of the stored fields.
   */
  public Document[] readDocs(DocList ids) throws IOException {
     Document[] docs = new Document[ids.size()];
     readDocs(docs,ids);
     return docs;
  }



  /**
   * Warm this searcher based on an old one (primarily for auto-cache warming).
   */
  public void warm(SolrIndexSearcher old) throws IOException {
    // Make sure this is first!  filters can help queryResults execute!
    boolean logme = log.isLoggable(Level.INFO);

    // warm the caches in order...
    for (int i=0; i<cacheList.length; i++) {
      if (logme) log.info("autowarming " + this + " from " + old + "\n\t" + old.cacheList[i]);
      this.cacheList[i].warm(this, old.cacheList[i]);
      if (logme) log.info("autowarming result for " + this + "\n\t" + this.cacheList[i]);
    }
  }


  /**
   * return the named generic cache
   */
  public SolrCache getCache(String cacheName) {
    return cacheMap.get(cacheName);
  }

  /**
   * lookup an entry in a generic cache
   */
  public Object cacheLookup(String cacheName, Object key) {
    SolrCache cache = cacheMap.get(cacheName);
    return cache==null ? null : cache.get(key);
  }

  /**
   * insert an entry in a generic cache
   */
  public Object cacheInsert(String cacheName, Object key, Object val) {
    SolrCache cache = cacheMap.get(cacheName);
    return cache==null ? null : cache.put(key,val);
  }


  /////////////////////////////////////////////////////////////////////
  // SolrInfoMBean stuff: Statistics and Module Info
  /////////////////////////////////////////////////////////////////////

  public String getName() {
    return SolrIndexSearcher.class.getName();
  }

  public String getVersion() {
    return SolrCore.version;
  }

  public String getDescription() {
    return "index searcher";
  }

  public Category getCategory() {
    return Category.CORE;
  }

  public String getSourceId() {
    return "$Id: SolrIndexSearcher.java 479793 2006-11-27 22:40:21Z klaas $";
  }

  public String getSource() {
    return "$URL: https://svn.apache.org/repos/asf/incubator/solr/branches/branch-1.1/src/java/org/apache/solr/search/SolrIndexSearcher.java $";
  }

  public URL[] getDocs() {
    return null;
  }

  public NamedList getStatistics() {
    NamedList lst = new NamedList();
    lst.add("caching", cachingEnabled);
    lst.add("numDocs", reader.numDocs());
    lst.add("maxDoc", reader.maxDoc());
    lst.add("readerImpl", reader.getClass().getSimpleName());
    lst.add("readerDir", reader.directory());
    lst.add("indexVersion", reader.getVersion());
    lst.add("openedAt", new Date(openTime));
    if (registerTime!=0) lst.add("registeredAt", new Date(registerTime));
    return lst;
  }
}



// Lucene's HitQueue isn't public, so here is our own.
final class ScorePriorityQueue extends PriorityQueue {
  ScorePriorityQueue(int size) {
    initialize(size);
  }

  protected final boolean lessThan(Object o1, Object o2) {
    ScoreDoc sd1 = (ScoreDoc)o1;
    ScoreDoc sd2 = (ScoreDoc)o2;
    // use index order as a tiebreaker to make sorts stable
    return sd1.score < sd2.score || (sd1.score==sd2.score && sd1.doc > sd2.doc);
  }
}



TOP

Related Classes of org.apache.solr.search.ScorePriorityQueue

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.