Package org.apache.lucene.search.suggest

Source Code of org.apache.lucene.search.suggest.DocumentDictionary$DocumentInputIterator

package org.apache.lucene.search.suggest;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;

/**
* Dictionary with terms, weights and optionally payload information
* taken from stored fields in a Lucene index.
*
* <b>NOTE: </b>
<ul>
*    <li>
*      The term, weight and (optionally) payload fields supplied
*      are required for ALL documents and has to be stored
*    </li>
</ul>
*/
public class DocumentDictionary implements Dictionary {
 
  /** {@link IndexReader} to load documents from */
  protected final IndexReader reader;

  /** Field to read payload from */
  protected final String payloadField;
  private final String field;
  private final String weightField;
 
  /**
   * Creates a new dictionary with the contents of the fields named <code>field</code>
   * for the terms and <code>weightField</code> for the weights that will be used for
   * the corresponding terms.
   */
  public DocumentDictionary(IndexReader reader, String field, String weightField) {
    this.reader = reader;
    this.field = field;
    this.weightField = weightField;
    this.payloadField = null;
  }
 
  /**
   * Creates a new dictionary with the contents of the fields named <code>field</code>
   * for the terms, <code>weightField</code> for the weights that will be used for the
   * the corresponding terms and <code>payloadField</code> for the corresponding payloads
   * for the entry.
   */
  public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) {
    this.reader = reader;
    this.field = field;
    this.weightField = weightField;
    this.payloadField = payloadField;
  }
 
  @Override
  public BytesRefIterator getWordsIterator() throws IOException {
    return new DocumentInputIterator(payloadField!=null);
  }

  /** Implements {@link InputIterator} from stored fields. */
  protected class DocumentInputIterator implements InputIterator {
    private final int docCount;
    private final Set<String> relevantFields;
    private final boolean hasPayloads;
    private final Bits liveDocs;
    private int currentDocId = -1;
    private long currentWeight;
    private BytesRef currentPayload;
    private Document doc;
   
    /**
     * Creates an iterator over term, weight and payload fields from the lucene
     * index. setting <code>withPayload</code> to false, implies an iterator
     * over only term and weight.
     */
    public DocumentInputIterator(boolean hasPayloads) throws IOException {
      docCount = reader.maxDoc() - 1;
      this.hasPayloads = hasPayloads;
      currentPayload = null;
      liveDocs = MultiFields.getLiveDocs(reader);
      this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
    }

    @Override
    public long weight() {
      return currentWeight;
    }

    @Override
    public Comparator<BytesRef> getComparator() {
      return null;
    }

    @Override
    public BytesRef next() throws IOException {
      while (currentDocId < docCount) {
        currentDocId++;
        if (liveDocs != null && !liveDocs.get(currentDocId)) {
          continue;
        }

        doc = reader.document(currentDocId, relevantFields);
       
        if (hasPayloads) {
          IndexableField payload = doc.getField(payloadField);
          if (payload == null) {
            throw new IllegalArgumentException(payloadField + " does not exist");
          } else if (payload.binaryValue() == null) {
            throw new IllegalArgumentException(payloadField + " does not have binary value");
          }
          currentPayload = payload.binaryValue();
        }
       
        currentWeight = getWeight(currentDocId);
       
        IndexableField fieldVal = doc.getField(field);
        if (fieldVal == null) {
          throw new IllegalArgumentException(field + " does not exist");
        } else if(fieldVal.stringValue() == null) {
          throw new IllegalArgumentException(field + " does not have string value");
        }
       
        return new BytesRef(fieldVal.stringValue());
      }
      return null;
    }

    @Override
    public BytesRef payload() {
      return currentPayload;
    }

    @Override
    public boolean hasPayloads() {
      return hasPayloads;
    }

    /** Return the suggestion weight for this document */
    protected long getWeight(int docId) {
      IndexableField weight = doc.getField(weightField);
      if (weight == null) {
        throw new IllegalArgumentException(weightField + " does not exist");
      } else if (weight.numericValue() == null) {
        throw new IllegalArgumentException(weightField + " does not have numeric value");
      }
      return weight.numericValue().longValue();
    }
   
    private Set<String> getRelevantFields(String... fields) {
      Set<String> relevantFields = new HashSet<String>();
      for (String relevantField : fields) {
        if (relevantField != null) {
          relevantFields.add(relevantField);
        }
      }
      return relevantFields;
    }
  }
}
TOP

Related Classes of org.apache.lucene.search.suggest.DocumentDictionary$DocumentInputIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.