Package org.apache.lucene.facet.sampling

Source Code of org.apache.lucene.facet.sampling.SamplingAccumulator

package org.apache.lucene.facet.sampling;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Facets accumulation with sampling.<br>
* <p>
* Note two major differences between this class and {@link SamplingWrapper}:
* <ol>
* <li>Latter can wrap any other {@link FacetsAccumulator} while this class
* directly extends {@link StandardFacetsAccumulator}.</li>
* <li>This class can effectively apply sampling on the complement set of
* matching document, thereby working efficiently with the complement
* optimization - see {@link StandardFacetsAccumulator#getComplementThreshold()}
* .</li>
* </ol>
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()}.
*
* @see Sampler
* @lucene.experimental
*/
public class SamplingAccumulator extends StandardFacetsAccumulator {
 
  private double samplingRatio = -1d;
  private final Sampler sampler;
 
  public SamplingAccumulator(Sampler sampler, FacetSearchParams searchParams,
      IndexReader indexReader, TaxonomyReader taxonomyReader,
      FacetArrays facetArrays) {
    super(searchParams, indexReader, taxonomyReader, facetArrays);
    this.sampler = sampler;
  }

  /**
   * Constructor...
   */
  public SamplingAccumulator(
      Sampler sampler,
      FacetSearchParams searchParams,
      IndexReader indexReader, TaxonomyReader taxonomyReader) {
    super(searchParams, indexReader, taxonomyReader);
    this.sampler = sampler;
  }

  @Override
  public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
    // Replacing the original searchParams with the over-sampled
    FacetSearchParams original = searchParams;
    SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
    final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
    if (shouldOversample) {
      searchParams = sampler.overSampledSearchParams(original);
    }
   
    List<FacetResult> sampleRes = super.accumulate(docids);
   
    List<FacetResult> results = new ArrayList<FacetResult>();
    for (FacetResult fres : sampleRes) {
      // for sure fres is not null because this is guaranteed by the delegee.
      PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
      if (samplerFixer != null) {
        // fix the result of current request
        samplerFixer.fixResult(docids, fres, samplingRatio);
       
        fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to

        if (shouldOversample) {
          // Using the sampler to trim the extra (over-sampled) results
          fres = sampler.trimResult(fres);
        }
      }
     
      // final labeling if allowed (because labeling is a costly operation)
      if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
        // category does not exist, add an empty result
        results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
      } else {
        frh.labelResult(fres);
        results.add(fres);
      }
    }
   
    searchParams = original; // Back to original params
   
    return results;
  }

  @Override
  protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
    SampleResult sampleRes = sampler.getSampleSet(docids);
    samplingRatio = sampleRes.actualSampleRatio;
    return sampleRes.docids;
  }
 
  @Override
  protected double getTotalCountsFactor() {
    if (samplingRatio<0) {
      throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
    }
    return samplingRatio;
  }
}
TOP

Related Classes of org.apache.lucene.facet.sampling.SamplingAccumulator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.