Package org.apache.lucene.facet.index

Source Code of org.apache.lucene.facet.index.CategoryDocumentBuilder

package org.apache.lucene.facet.index;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import org.apache.lucene.facet.index.attributes.CategoryAttribute;
import org.apache.lucene.facet.index.attributes.CategoryAttributesIterable;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.index.streaming.CategoryAttributesStream;
import org.apache.lucene.facet.index.streaming.CategoryListTokenizer;
import org.apache.lucene.facet.index.streaming.CategoryParentsStream;
import org.apache.lucene.facet.index.streaming.CategoryTokenizer;
import org.apache.lucene.facet.index.streaming.CountingListTokenizer;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* A utility class which allows attachment of {@link CategoryPath}s or
* {@link CategoryAttribute}s to a given document using a taxonomy.<br>
* Construction could be done with either a given {@link FacetIndexingParams} or
* the default implementation {@link DefaultFacetIndexingParams}.<br>
* A CategoryDocumentBuilder can be reused by repeatedly setting the categories
* and building the document. Categories are provided either as
* {@link CategoryAttribute} elements through {@link #setCategories(Iterable)},
* or as {@link CategoryPath} elements through
* {@link #setCategoryPaths(Iterable)}.
* <p>
* Note that both {@link #setCategories(Iterable)} and
* {@link #setCategoryPaths(Iterable)} return this
* {@link CategoryDocumentBuilder}, allowing the following pattern: {@code new
* CategoryDocumentBuilder(taxonomy,
* params).setCategories(categories).build(doc)}.
*
* @lucene.experimental
*/
public class CategoryDocumentBuilder {

  /**
   * A {@link TaxonomyWriter} for adding categories and retrieving their
   * ordinals.
   */
  protected final TaxonomyWriter taxonomyWriter;

  /**
   * Parameters to be used when indexing categories.
   */
  protected final FacetIndexingParams indexingParams;

  /**
   * A list of fields which is filled at ancestors' construction and used
   * during {@link CategoryDocumentBuilder#build(Document)}.
   */
  protected final ArrayList<Field> fieldList = new ArrayList<Field>();

  protected Map<String, List<CategoryAttribute>> categoriesMap;

  /**
   * Creating a facets document builder with default facet indexing
   * parameters.<br>
   * See:
   * {@link #CategoryDocumentBuilder(TaxonomyWriter, FacetIndexingParams)}
   *
   * @param taxonomyWriter
   *            to which new categories will be added, as well as translating
   *            known categories to ordinals
   * @throws IOException
   *
   */
  public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter)
      throws IOException {
    this(taxonomyWriter, new DefaultFacetIndexingParams());
  }

  /**
   * Creating a facets document builder with a given facet indexing parameters
   * object.<br>
   *
   * @param taxonomyWriter
   *            to which new categories will be added, as well as translating
   *            known categories to ordinals
   * @param params
   *            holds all parameters the indexing process should use such as
   *            category-list parameters
   * @throws IOException
   */
  public CategoryDocumentBuilder(TaxonomyWriter taxonomyWriter,
      FacetIndexingParams params) throws IOException {
    this.taxonomyWriter = taxonomyWriter;
    this.indexingParams = params;
    this.categoriesMap = new HashMap<String, List<CategoryAttribute>>();
  }

  /**
   * Set the categories of the document builder from an {@link Iterable} of
   * {@link CategoryPath} objects.
   *
   * @param categoryPaths
   *            An iterable of CategoryPath objects which holds the categories
   *            (facets) which will be added to the document at
   *            {@link #build(Document)}
   * @return This CategoryDocumentBuilder, to enable this one line call:
   *         {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
   *         {@link #setCategoryPaths(Iterable)}.{@link #build(Document)}.
   * @throws IOException
   */
  public CategoryDocumentBuilder setCategoryPaths(
      Iterable<CategoryPath> categoryPaths) throws IOException {
    if (categoryPaths == null) {
      fieldList.clear();
      return this;
    }
    return setCategories(new CategoryAttributesIterable(categoryPaths));
  }

  /**
   * Set the categories of the document builder from an {@link Iterable} of
   * {@link CategoryAttribute} objects.
   *
   * @param categories
   *            An iterable of {@link CategoryAttribute} objects which holds
   *            the categories (facets) which will be added to the document at
   *            {@link #build(Document)}
   * @return This CategoryDocumentBuilder, to enable this one line call:
   *         {@code new} {@link #CategoryDocumentBuilder(TaxonomyWriter)}.
   *         {@link #setCategories(Iterable)}.{@link #build(Document)}.
   * @throws IOException
   */
  public CategoryDocumentBuilder setCategories(
      Iterable<CategoryAttribute> categories) throws IOException {
    fieldList.clear();
    if (categories == null) {
      return this;
    }

    // get field-name to a list of facets mapping as different facets could
    // be added to different category-lists on different fields
    fillCategoriesMap(categories);

    // creates a different stream for each different field
    for (Entry<String, List<CategoryAttribute>> e : categoriesMap
        .entrySet()) {
      // create a category attributes stream for the array of facets
      CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(
          e.getValue());

      // Set a suitable {@link TokenStream} using
      // CategoryParentsStream, followed by CategoryListTokenizer and
      // CategoryTokenizer composition (the ordering of the last two is
      // not mandatory).
      CategoryParentsStream parentsStream = (CategoryParentsStream) getParentsStream(categoryAttributesStream);
      CategoryListTokenizer categoryListTokenizer = getCategoryListTokenizer(parentsStream);
      CategoryTokenizer stream = getCategoryTokenizer(categoryListTokenizer);

      // Finally creating a suitable field with stream and adding it to a
      // master field-list, used during the build process (see
      // super.build())
      fieldList.add(new Field(e.getKey(), stream));
    }

    return this;
  }

  /**
   * Get a stream of categories which includes the parents, according to
   * policies defined in indexing parameters.
   *
   * @param categoryAttributesStream
   *            The input stream
   * @return The parents stream.
   * @see OrdinalPolicy OrdinalPolicy (for policy of adding category tokens for parents)
   * @see PathPolicy PathPolicy (for policy of adding category <b>list</b> tokens for parents)
   */
  protected TokenStream getParentsStream(
      CategoryAttributesStream categoryAttributesStream) {
    return new CategoryParentsStream(categoryAttributesStream,
        taxonomyWriter, indexingParams);
  }

  /**
   * Fills the categories mapping between a field name and a list of
   * categories that belongs to it according to this builder's
   * {@link FacetIndexingParams} object
   *
   * @param categories
   *            Iterable over the category attributes
   */
  protected void fillCategoriesMap(Iterable<CategoryAttribute> categories)
      throws IOException {
    categoriesMap.clear();

    // for-each category
    for (CategoryAttribute category : categories) {
      // extracting the field-name to which this category belongs
      String fieldName = indexingParams.getCategoryListParams(
          category.getCategoryPath()).getTerm().field();

      // getting the list of categories which belongs to that field
      List<CategoryAttribute> list = categoriesMap.get(fieldName);

      // if no such list exists
      if (list == null) {
        // adding a new one to the map
        list = new ArrayList<CategoryAttribute>();
        categoriesMap.put(fieldName, list);
      }

      // adding the new category to the list
      list.add(category.clone());
    }
  }

  /**
   * Get a category list tokenizer (or a series of such tokenizers) to create
   * the <b>category list tokens</b>.
   *
   * @param categoryStream
   *            A stream containing {@link CategoryAttribute} with the
   *            relevant data.
   * @return The category list tokenizer (or series of tokenizers) to be used
   *         in creating category list tokens.
   */
  protected CategoryListTokenizer getCategoryListTokenizer(
      TokenStream categoryStream) {
    return getCountingListTokenizer(categoryStream);
  }

  /**
   * Get a {@link CountingListTokenizer} for creating counting list token.
   *
   * @param categoryStream
   *            A stream containing {@link CategoryAttribute}s with the
   *            relevant data.
   * @return A counting list tokenizer to be used in creating counting list
   *         token.
   */
  protected CountingListTokenizer getCountingListTokenizer(
      TokenStream categoryStream) {
    return new CountingListTokenizer(categoryStream, indexingParams);
  }

  /**
   * Get a {@link CategoryTokenizer} to create the <b>category tokens</b>.
   * This method can be overridden for adding more attributes to the category
   * tokens.
   *
   * @param categoryStream
   *            A stream containing {@link CategoryAttribute} with the
   *            relevant data.
   * @return The {@link CategoryTokenizer} to be used in creating category
   *         tokens.
   * @throws IOException
   */
  protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
      throws IOException {
    return new CategoryTokenizer(categoryStream, indexingParams);
  }

  /** Adds the fields created in one of the "set" methods to the document */
  public Document build(Document doc) {
    for (Field f : fieldList) {
      f.setOmitNorms(true);
      doc.add(f);
    }
    return doc;
  }

}
TOP

Related Classes of org.apache.lucene.facet.index.CategoryDocumentBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.