Package org.apache.lucene.index.sorter

Source Code of org.apache.lucene.index.sorter.TestSortingMergePolicy

package org.apache.lucene.index.sorter;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

@SuppressCodecs("Lucene3x")
public class TestSortingMergePolicy extends LuceneTestCase {

  private List<String> terms;
  private Directory dir1, dir2;
  private Sorter sorter;
  private IndexReader reader;
  private IndexReader sortedReader;

  @Override
  public void setUp() throws Exception {
    super.setUp();
    sorter = new NumericDocValuesSorter("ndv");
    createRandomIndexes();
  }

  private Document randomDocument() {
    final Document doc = new Document();
    doc.add(new NumericDocValuesField("ndv", random().nextLong()));
    doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES));
    return doc;
  }

  static MergePolicy newSortingMergePolicy(Sorter sorter) {
    // create a MP with a low merge factor so that many merges happen
    MergePolicy mp;
    if (random().nextBoolean()) {
      TieredMergePolicy tmp = newTieredMergePolicy(random());
      final int numSegs = _TestUtil.nextInt(random(), 3, 5);
      tmp.setSegmentsPerTier(numSegs);
      tmp.setMaxMergeAtOnce(_TestUtil.nextInt(random(), 2, numSegs));
      mp = tmp;
    } else {
      LogMergePolicy lmp = newLogMergePolicy(random());
      lmp.setMergeFactor(_TestUtil.nextInt(random(), 3, 5));
      mp = lmp;
    }
    // wrap it with a sorting mp
    return new SortingMergePolicy(mp, sorter);
  }

  private void createRandomIndexes() throws IOException {
    dir1 = newDirectory();
    dir2 = newDirectory();
    final int numDocs = atLeast(150);
    final int numTerms = _TestUtil.nextInt(random(), 1, numDocs / 5);
    Set<String> randomTerms = new HashSet<String>();
    while (randomTerms.size() < numTerms) {
      randomTerms.add(_TestUtil.randomSimpleString(random()));
    }
    terms = new ArrayList<String>(randomTerms);
    final long seed = random().nextLong();
    final IndexWriterConfig iwc1 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
    final IndexWriterConfig iwc2 = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
    iwc2.setMergePolicy(newSortingMergePolicy(sorter));
    final RandomIndexWriter iw1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
    final RandomIndexWriter iw2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
    for (int i = 0; i < numDocs; ++i) {
      if (random().nextInt(5) == 0 && i != numDocs - 1) {
        final String term = RandomPicks.randomFrom(random(), terms);
        iw1.deleteDocuments(new Term("s", term));
        iw2.deleteDocuments(new Term("s", term));
      }
      final Document doc = randomDocument();
      iw1.addDocument(doc);
      iw2.addDocument(doc);
      if (random().nextInt(8) == 0) {
        iw1.commit();
        iw2.commit();
      }
    }
    // Make sure we have something to merge
    iw1.commit();
    iw2.commit();
    final Document doc = randomDocument();
    // NOTE: don't use RIW.addDocument directly, since it sometimes commits
    // which may trigger a merge, at which case forceMerge may not do anything.
    // With field updates this is a problem, since the updates can go into the
    // single segment in the index, and threefore the index won't be sorted.
    // This hurts the assumption of the test later on, that the index is sorted
    // by SortingMP.
    iw1.w.addDocument(doc);
    iw2.w.addDocument(doc);

    if (defaultCodecSupportsFieldUpdates()) {
      // update NDV of docs belonging to one term (covers many documents)
      final long value = random().nextLong();
      final String term = RandomPicks.randomFrom(random(), terms);
      iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value);
      iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value);
    }
   
    iw1.forceMerge(1);
    iw2.forceMerge(1);
    iw1.close();
    iw2.close();
    reader = DirectoryReader.open(dir1);
    sortedReader = DirectoryReader.open(dir2);
  }

  @Override
  public void tearDown() throws Exception {
    reader.close();
    sortedReader.close();
    dir1.close();
    dir2.close();
    super.tearDown();
  }

  private static void assertSorted(AtomicReader reader) throws IOException {
    final NumericDocValues ndv = reader.getNumericDocValues("ndv");
    for (int i = 1; i < reader.maxDoc(); ++i) {
      assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i), ndv.get(i-1) <= ndv.get(i));
    }
  }

  public void testSortingMP() throws IOException {
    final AtomicReader sortedReader1 = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sorter);
    final AtomicReader sortedReader2 = SlowCompositeReaderWrapper.wrap(sortedReader);

    assertSorted(sortedReader1);
    assertSorted(sortedReader2);
   
    assertReaderEquals("", sortedReader1, sortedReader2);
  }

}
TOP

Related Classes of org.apache.lucene.index.sorter.TestSortingMergePolicy

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.