Source Code of org.apache.lucene.benchmark.byTask.tasks.ReadTask

package org.apache.lucene.benchmark.byTask.tasks;


/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;




/**
 * Read index (abstract) task.
 * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
 * methods to configure the actual action.
 * <p/>
 * <p>Note: All ReadTasks reuse the reader if it is already open.
 * Otherwise a reader is opened at start and closed at the end.
 * <p/>
 * <p>Other side effects: none.
 */
public abstract class ReadTask extends PerfTask {


  public ReadTask(PerfRunData runData) {
    super(runData);
  }


  public int doLogic() throws Exception {
    int res = 0;
    boolean closeReader = false;


    // open reader or use existing one
    IndexReader ir = getRunData().getIndexReader();
    if (ir == null) {
      Directory dir = getRunData().getDirectory();
      ir = IndexReader.open(dir);
      closeReader = true;
      //res++; //this is confusing, comment it out
    }


    // optionally warm and add num docs traversed to count
    if (withWarm()) {
      Document doc = null;
      for (int m = 0; m < ir.maxDoc(); m++) {
        if (!ir.isDeleted(m)) {
          doc = ir.document(m);
          res += (doc == null ? 0 : 1);
        }
      }
    }


    if (withSearch()) {
      res++;
      IndexSearcher searcher = new IndexSearcher(ir);
      QueryMaker queryMaker = getQueryMaker();
      Query q = queryMaker.makeQuery();
      Sort sort = getSort();
      Hits hits;
      if(sort != null) {
        hits = searcher.search(q, sort);
      } else {
        hits = searcher.search(q);
      }
      //System.out.println("searched: "+q);


      if (withTraverse() && hits != null) {
        int traversalSize = Math.min(hits.length(), traversalSize());
        if (traversalSize > 0) {
          boolean retrieve = withRetrieve();
          int numHighlight = Math.min(numToHighlight(), hits.length());
          Analyzer analyzer = getRunData().getAnalyzer();
          Highlighter highlighter = null;
          int maxFrags = 1;
          if (numHighlight > 0) {
            highlighter = getHighlighter(q);
            maxFrags = maxNumFragments();
          }
          boolean merge = isMergeContiguousFragments();
          for (int m = 0; m < traversalSize; m++) {
            int id = hits.id(m);
            res++;
            if (retrieve) {
              Document document = retrieveDoc(ir, id);
              res += document != null ? 1 : 0;
              if (numHighlight > 0 && m < numHighlight) {
                Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
                for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
                  String field = (String) iterator.next();
                  String text = document.get(field);
                  TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
                  res += doHighlight(ts, text, highlighter, merge, maxFrags);
                }
              }
            }
          }
        }
      }


      searcher.close();
    }


    if (closeReader) {
      ir.close();
    }
    return res;
  }






  protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
    return ir.document(id);
  }


  /**
   * Return query maker used for this task.
   */
  public abstract QueryMaker getQueryMaker();


  /**
   * Return true if search should be performed.
   */
  public abstract boolean withSearch();
  


  /**
   * Return true if warming should be performed.
   */
  public abstract boolean withWarm();


  /**
   * Return true if, with search, results should be traversed.
   */
  public abstract boolean withTraverse();


  /**
   * Specify the number of hits to traverse.  Tasks should override this if they want to restrict the number
   * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
   * <p/>
   * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
   *
   * @return Integer.MAX_VALUE
   */
  public int traversalSize() {
    return Integer.MAX_VALUE;
  }


  /**
   * Return true if, with search & results traversing, docs should be retrieved.
   */
  public abstract boolean withRetrieve();


  /**
   * Set to the number of documents to highlight.
   *
   * @return The number of the results to highlight.  O means no docs will be highlighted.
   */
  public int numToHighlight() {
    return 0;
  }


  protected Highlighter getHighlighter(Query q){
    return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
  }


  /**
   *
   * @return the maxiumum number of highlighter fragments
   */
  public int maxNumFragments(){
    return 10;
  }


  /**
   *
   * @return true if the highlighter should merge contiguous fragments
   */
  public boolean isMergeContiguousFragments(){
    return false;
  }


  protected int doHighlight(TokenStream ts, String text,  Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException {
    TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFragments);
    return frag != null ? frag.length : 0;
  }
  
  protected Sort getSort() {
    return null;
  }


  /**
   * Define the fields to highlight.  Base implementation returns all fields
   * @param document The Document
   * @return A Collection of Field names (Strings)
   */
  protected Collection/*<String>*/ getFieldsToHighlight(Document document) {
    List/*<Fieldable>*/ fieldables = document.getFields();
    Set/*<String>*/ result = new HashSet(fieldables.size());
    for (Iterator iterator = fieldables.iterator(); iterator.hasNext();) {
      Fieldable fieldable = (Fieldable) iterator.next();
      result.add(fieldable.name());
    }
    return result;
  }


}
Source Code of org.apache.lucene.benchmark.byTask.tasks.ReadTask

Related Classes of org.apache.lucene.benchmark.byTask.tasks.ReadTask