Package org.apache.lucene.facet.sampling

Source Code of org.apache.lucene.facet.sampling.BaseSampleTestTopK

package org.apache.lucene.facet.sampling;

import java.util.List;
import java.util.Random;

import org.apache.lucene.facet.old.OldFacetsAccumulator;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.BaseTestTopK;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public abstract class BaseSampleTestTopK extends BaseTestTopK {
 
  /** Number of top results */
  protected static final int K = 2;
 
  /** since there is a chance that this test would fail even if the code is correct, retry the sampling */
  protected static final int RETRIES = 10;
 
  @Override
  protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
    FacetSearchParams res = super.searchParamsWithRequests(numResults, fip);
    for (FacetRequest req : res.facetRequests) {
      // randomize the way we aggregate results
      if (random().nextBoolean()) {
        req.setResultMode(ResultMode.GLOBAL_FLAT);
      } else {
        req.setResultMode(ResultMode.PER_NODE_IN_TREE);
      }
    }
    return res;
  }
 
  protected abstract OldFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
      IndexReader indexReader, FacetSearchParams searchParams);
 
  /**
   * Try out faceted search with sampling enabled and complements either disabled or enforced
   * Lots of randomly generated data is being indexed, and later on a "90% docs" faceted search
   * is performed. The results are compared to non-sampled ones.
   */
  public void testCountUsingSampling() throws Exception {
    boolean useRandomSampler = random().nextBoolean();
    for (int partitionSize : partitionSizes) {
      try {
        // complements return counts for all ordinals, so force ALL_PARENTS indexing
        // so that it's easier to compare
        FacetIndexingParams fip = getFacetIndexingParams(partitionSize, true);
        initIndex(fip);
        // Get all of the documents and run the query, then do different
        // facet counts and compare to control
        Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
       
        FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip);
        FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
       
        searcher.search(q, fc);
       
        List<FacetResult> expectedResults = fc.getFacetResults();
       
        FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, fip);
       
        // try several times in case of failure, because the test has a chance to fail
        // if the top K facets are not sufficiently common with the sample set
        for (int nTrial = 0; nTrial < RETRIES; nTrial++) {
          try {
            // complement with sampling!
            final Sampler sampler = createSampler(nTrial, useRandomSampler, samplingSearchParams);
           
            assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
            assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
           
            break; // succeeded
          } catch (AssertionError e) {
            if (nTrial >= RETRIES - 1) {
              throw e; // no more retries allowed, must fail
            }
          }
        }
      } finally {
        closeAll();
      }
    }
  }
 
  private void assertSampling(List<FacetResult> expected, Query q, Sampler sampler, FacetSearchParams params, boolean complement) throws Exception {
    FacetsCollector samplingFC = samplingCollector(complement, sampler, params);
   
    searcher.search(q, samplingFC);
    List<FacetResult> sampledResults = samplingFC.getFacetResults();
   
    assertSameResults(expected, sampledResults);
  }
 
  private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
      FacetSearchParams samplingSearchParams) {
    OldFacetsAccumulator sfa = getSamplingAccumulator(sampler, taxoReader, indexReader, samplingSearchParams);
    sfa.setComplementThreshold(complement ? OldFacetsAccumulator.FORCE_COMPLEMENT : OldFacetsAccumulator.DISABLE_COMPLEMENT);
    return FacetsCollector.create(sfa);
  }
 
  private Sampler createSampler(int nTrial, boolean useRandomSampler, FacetSearchParams sParams) {
    SamplingParams samplingParams = new SamplingParams();
   
    /*
     * Set sampling to Exact fixing with TakmiSampleFixer as it is not easy to
     * validate results with amortized results.
     */
    samplingParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, sParams));
       
    final double retryFactor = Math.pow(1.01, nTrial);
    samplingParams.setOversampleFactor(5.0 * retryFactor); // Oversampling
    samplingParams.setSampleRatio(0.8 * retryFactor);
    samplingParams.setMinSampleSize((int) (100 * retryFactor));
    samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
    samplingParams.setSamplingThreshold(11000); //force sampling

    Sampler sampler = useRandomSampler ?
        new RandomSampler(samplingParams, new Random(random().nextLong())) :
          new RepeatableSampler(samplingParams);
    return sampler;
  }
 
}
TOP

Related Classes of org.apache.lucene.facet.sampling.BaseSampleTestTopK

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.