Examples of RandomIndexWriter

org.apache.lucene.index.RandomIndexWriter
Silly class that randomizes the indexing experience. EG it may swap in a different merge policy/scheduler; may commit periodically; may or may not optimize in the end, may flush by doc count instead of RAM, etc.

Examples of org.apache.lucene.index.RandomIndexWriter

    return groupingSearch;
  }


  public void testSetAllGroups() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED));
    w.addDocument(doc);


    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();


    GroupingSearch gs = new GroupingSearch("group");
    gs.setAllGroups(true);
    TopGroups<?> groups = gs.search(indexSearcher, null, new TermQuery(new Term("group", "foo")), 0, 10);
    assertEquals(1, groups.totalHitCount);

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

    
    int bits = Integer.parseInt(reader.readLine());
    int terms = (int) Math.pow(2, bits);
    
    Directory dir = newDirectory();
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()));


    Document doc = new Document();
    Field field = newTextField("field", "", Field.Store.NO);
    doc.add(field);
    
    for (int i = 0; i < terms; i++) {
      field.setStringValue(mapInt(codePointTable, i));
      writer.addDocument(doc);
    }   
    
    IndexReader r = writer.getReader();
    IndexSearcher searcher = newSearcher(r);
    if (VERBOSE) {
      System.out.println("TEST: searcher=" + searcher);
    }
    // even though this uses a boost-only rewrite, this test relies upon queryNorm being the default implementation,
    // otherwise scores are different!
    searcher.setSimilarity(new DefaultSimilarity());
    
    writer.close();
    String line;
    while ((line = reader.readLine()) != null) {
      String params[] = line.split(",");
      String query = mapInt(codePointTable, Integer.parseInt(params[0]));
      int prefix = Integer.parseInt(params[1]);

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter


public class CommonTermsQueryTest extends LuceneTestCase {
  
  public void testBasics() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    String[] docs = new String[] {"this is the end of the world right",
        "is this it or maybe not",
        "this is the end of the universe as we know it",
        "there is the famous restaurant at the end of the universe",};
    for (int i = 0; i < docs.length; i++) {
      Document doc = new Document();
      doc.add(newStringField("id", "" + i, Field.Store.YES));
      doc.add(newTextField("field", docs[i], Field.Store.NO));
      w.addDocument(doc);
    }
    
    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r);
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 3);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
    }
    
    { // only high freq
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 2);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
    }
    
    { // low freq is mandatory
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 1);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
    }
    
    { // low freq is mandatory
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.MUST,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "restaurant"));
      query.add(new Term("field", "universe"));
      
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 1);
      assertEquals("3", r.document(search.scoreDocs[0].doc).get("id"));
      
    }
    r.close();
    w.close();
    dir.close();
  }

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

    }
  }
  
  public void testMinShouldMatch() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    String[] docs = new String[] {"this is the end of the world right",
        "is this it or maybe not",
        "this is the end of the universe as we know it",
        "there is the famous restaurant at the end of the universe",};
    for (int i = 0; i < docs.length; i++) {
      Document doc = new Document();
      doc.add(newStringField("id", "" + i, Field.Store.YES));
      doc.add(newTextField("field", docs[i], Field.Store.NO));
      w.addDocument(doc);
    }
    
    IndexReader r = w.getReader();
    IndexSearcher s = newSearcher(r);
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      query.setLowFreqMinimumNumberShouldMatch(0.5f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 1);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
    }
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      query.setLowFreqMinimumNumberShouldMatch(2.0f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 1);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
    }
    
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      query.setLowFreqMinimumNumberShouldMatch(0.49f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 3);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
    }
    
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      query.setLowFreqMinimumNumberShouldMatch(1.0f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 3);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
      assertTrue(search.scoreDocs[1].score > search.scoreDocs[2].score);
    }
    
    {
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "end"));
      query.add(new Term("field", "world"));
      query.add(new Term("field", "universe"));
      query.add(new Term("field", "right"));
      query.setLowFreqMinimumNumberShouldMatch(1.0f);
      query.setHighFreqMinimumNumberShouldMatch(4.0f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 3);
      assertEquals(search.scoreDocs[1].score, search.scoreDocs[2].score, 0.0f);
      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
      // doc 2 and 3 only get a score from low freq terms
      assertEquals(
          new HashSet<String>(Arrays.asList("2", "3")),
          new HashSet<String>(Arrays.asList(
              r.document(search.scoreDocs[1].doc).get("id"),
              r.document(search.scoreDocs[2].doc).get("id"))));
    }
    
    {
      // only high freq terms around - check that min should match is applied
      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "the"));
      query.setLowFreqMinimumNumberShouldMatch(1.0f);
      query.setHighFreqMinimumNumberShouldMatch(2.0f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 4);
    }
    
    {
      // only high freq terms around - check that min should match is applied
      CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD,
          random().nextBoolean() ? 2.0f : 0.5f);
      query.add(new Term("field", "is"));
      query.add(new Term("field", "this"));
      query.add(new Term("field", "the"));
      query.setLowFreqMinimumNumberShouldMatch(1.0f);
      query.setHighFreqMinimumNumberShouldMatch(2.0f);
      TopDocs search = s.search(query, 10);
      assertEquals(search.totalHits, 2);
      assertEquals(
          new HashSet<String>(Arrays.asList("0", "2")),
          new HashSet<String>(Arrays.asList(
              r.document(search.scoreDocs[0].doc).get("id"),
              r.document(search.scoreDocs[1].doc).get("id"))));
    }
    r.close();
    w.close();
    dir.close();
  }

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

    }
  }
  
  public void testRandomIndex() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    createRandomIndex(atLeast(50), w, random().nextLong());
    DirectoryReader reader = w.getReader();
    AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
    String field = "body";
    Terms terms = wrapper.terms(field);
    PriorityQueue<TermAndFreq> lowFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(
        5) {
      
      @Override
      protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
        return a.freq > b.freq;
      }
      
    };
    PriorityQueue<TermAndFreq> highFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(
        5) {
      
      @Override
      protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
        return a.freq < b.freq;
      }
      
    };
    try {
      TermsEnum iterator = terms.iterator(null);
      while (iterator.next() != null) {
        if (highFreqQueue.size() < 5) {
          highFreqQueue.add(new TermAndFreq(
              BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
          lowFreqQueue.add(new TermAndFreq(
              BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
        } else {
          if (highFreqQueue.top().freq < iterator.docFreq()) {
            highFreqQueue.top().freq = iterator.docFreq();
            highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            highFreqQueue.updateTop();
          }
          
          if (lowFreqQueue.top().freq > iterator.docFreq()) {
            lowFreqQueue.top().freq = iterator.docFreq();
            lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
            lowFreqQueue.updateTop();
          }
        }
      }
      int lowFreq = lowFreqQueue.top().freq;
      int highFreq = highFreqQueue.top().freq;
      assumeTrue("unlucky index", highFreq - 1 > lowFreq);
      List<TermAndFreq> highTerms = queueToList(highFreqQueue);
      List<TermAndFreq> lowTerms = queueToList(lowFreqQueue);
      
      IndexSearcher searcher = newSearcher(reader);
      Occur lowFreqOccur = randomOccur(random());
      BooleanQuery verifyQuery = new BooleanQuery();
      CommonTermsQuery cq = new CommonTermsQuery(randomOccur(random()),
          lowFreqOccur, highFreq - 1, random().nextBoolean());
      for (TermAndFreq termAndFreq : lowTerms) {
        cq.add(new Term(field, termAndFreq.term));
        verifyQuery.add(new BooleanClause(new TermQuery(new Term(field,
            termAndFreq.term)), lowFreqOccur));
      }
      for (TermAndFreq termAndFreq : highTerms) {
        cq.add(new Term(field, termAndFreq.term));
      }
      
      TopDocs cqSearch = searcher.search(cq, reader.maxDoc());
      
      TopDocs verifySearch = searcher.search(verifyQuery, reader.maxDoc());
      assertEquals(verifySearch.totalHits, cqSearch.totalHits);
      Set<Integer> hits = new HashSet<Integer>();
      for (ScoreDoc doc : verifySearch.scoreDocs) {
        hits.add(doc.doc);
      }
      
      for (ScoreDoc doc : cqSearch.scoreDocs) {
        assertTrue(hits.remove(doc.doc));
      }
      
      assertTrue(hits.isEmpty());
      
      /*
       *  need to force merge here since QueryUtils adds checks based
       *  on leave readers which have different statistics than the top
       *  level reader if we have more than one segment. This could 
       *  result in a different query / results.
       */
      w.forceMerge(1); 
      DirectoryReader reader2 = w.getReader();
      QueryUtils.check(random(), cq, newSearcher(reader2));
      reader2.close();
    } finally {
      reader.close();
      wrapper.close();
      w.close();
      dir.close();
    }
    
  }

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter


    FieldType customType = new FieldType();
    customType.setStored(true);


    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
                               random(),
                               dir,
                               newIndexWriterConfig(TEST_VERSION_CURRENT,
                                                    new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "random text", Field.Store.YES));
    doc.add(new Field("id", "1", customType));
    w.addDocument(doc);


    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "some more random text", Field.Store.YES));
    doc.add(new Field("id", "2", customType));
    w.addDocument(doc);


    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "some more random textual data", Field.Store.YES));
    doc.add(new Field("id", "3", customType));
    w.addDocument(doc);


    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2", canUseIDV);
    doc.add(new TextField("content", "some random text", Field.Store.YES));
    doc.add(new Field("id", "4", customType));
    w.addDocument(doc);


    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3", canUseIDV);
    doc.add(new TextField("content", "some more random text", Field.Store.YES));
    doc.add(new Field("id", "5", customType));
    w.addDocument(doc);


    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3", canUseIDV);
    doc.add(new TextField("content", "random", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);


    // 6 -- no author field
    doc = new Document();
    doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);


    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();


    final Sort groupSort = Sort.RELEVANCE;


    if (canUseIDV && random().nextBoolean()) {
      groupField += "_dv";

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

  public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
    checkResetException(a, "best effort");
    long seed = random.nextLong();
    boolean useCharFilter = random.nextBoolean();
    Directory dir = null;
    RandomIndexWriter iw = null;
    final String postingsFormat =  _TestUtil.getPostingsFormat("dummy");
    boolean codecOk = iterations * maxWordLength < 100000 ||
        !(postingsFormat.equals("Memory") ||
            postingsFormat.equals("SimpleText"));
    if (rarely(random) && codecOk) {
      dir = newFSDirectory(_TestUtil.getTempDir("bttc"));
      iw = new RandomIndexWriter(new Random(seed), dir, a);
    }
    boolean success = false;
    try {
      checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
      // now test with multiple threads: note we do the EXACT same thing we did before in each thread,

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

    final String groupField = "author";
    FieldType customType = new FieldType();
    customType.setStored(true);


    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(
        random(),
        dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT,
            new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());


    // 0
    Document doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "random text", Field.Store.YES));
    doc.add(new Field("id", "1", customType));
    w.addDocument(doc);


    // 1
    doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "some more random text blob", Field.Store.YES));
    doc.add(new Field("id", "2", customType));
    w.addDocument(doc);


    // 2
    doc = new Document();
    addGroupField(doc, groupField, "author1", canUseIDV);
    doc.add(new TextField("content", "some more random textual data", Field.Store.YES));
    doc.add(new Field("id", "3", customType));
    w.addDocument(doc);
    w.commit(); // To ensure a second segment


    // 3
    doc = new Document();
    addGroupField(doc, groupField, "author2", canUseIDV);
    doc.add(new TextField("content", "some random text", Field.Store.YES));
    doc.add(new Field("id", "4", customType));
    w.addDocument(doc);


    // 4
    doc = new Document();
    addGroupField(doc, groupField, "author3", canUseIDV);
    doc.add(new TextField("content", "some more random text", Field.Store.YES));
    doc.add(new Field("id", "5", customType));
    w.addDocument(doc);


    // 5
    doc = new Document();
    addGroupField(doc, groupField, "author3", canUseIDV);
    doc.add(new TextField("content", "random blob", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);


    // 6 -- no author field
    doc = new Document();
    doc.add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
    doc.add(new Field("id", "6", customType));
    w.addDocument(doc);


    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();


    AbstractAllGroupsCollector<?> allGroupsCollector = createRandomCollector(groupField, canUseIDV);
    indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupsCollector);
    assertEquals(4, allGroupsCollector.getGroupCount());

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

        groupMap.put(groupDoc.group, new ArrayList<GroupDoc>());
      }
      groupMap.get(groupDoc.group).add(groupDoc);
    }


    RandomIndexWriter w = new RandomIndexWriter(
                                                random(),
                                                dir,
                                                newIndexWriterConfig(TEST_VERSION_CURRENT,
                                                                     new MockAnalyzer(random())));


    final List<List<Document>> updateDocs = new ArrayList<List<Document>>();


    FieldType groupEndType = new FieldType(StringField.TYPE_NOT_STORED);
    groupEndType.setIndexOptions(IndexOptions.DOCS_ONLY);
    groupEndType.setOmitNorms(true);


    //System.out.println("TEST: index groups");
    for(BytesRef group : groupValues) {
      final List<Document> docs = new ArrayList<Document>();
      //System.out.println("TEST:   group=" + (group == null ? "null" : group.utf8ToString()));
      for(GroupDoc groupValue : groupMap.get(group)) {
        Document doc = new Document();
        docs.add(doc);
        if (groupValue.group != null) {
          doc.add(newStringField("group", groupValue.group.utf8ToString(), Field.Store.NO));
        }
        doc.add(newStringField("sort1", groupValue.sort1.utf8ToString(), Field.Store.NO));
        doc.add(newStringField("sort2", groupValue.sort2.utf8ToString(), Field.Store.NO));
        doc.add(new IntField("id", groupValue.id, Field.Store.NO));
        doc.add(newTextField("content", groupValue.content, Field.Store.NO));
        //System.out.println("TEST:     doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id);
      }
      // So we can pull filter marking last doc in block:
      final Field groupEnd = newField("groupend", "x", groupEndType);
      docs.get(docs.size()-1).add(groupEnd);
      // Add as a doc block:
      w.addDocuments(docs);
      if (group != null && random().nextInt(7) == 4) {
        updateDocs.add(docs);
      }
    }


    for(List<Document> docs : updateDocs) {
      // Just replaces docs w/ same docs:
      w.updateDocuments(new Term("group", docs.get(0).get("group")), docs);
    }


    final DirectoryReader r = w.getReader();
    w.close();


    return r;
  }

View Full Code Here

Examples of org.apache.lucene.index.RandomIndexWriter

          System.out.println("  content=" + sb.toString());
        }
      }


      Directory dir = newDirectory();
      RandomIndexWriter w = new RandomIndexWriter(
                                                  random(),
                                                  dir,
                                                  newIndexWriterConfig(TEST_VERSION_CURRENT,
                                                                       new MockAnalyzer(random())));
      final boolean preFlex = "Lucene3x".equals(w.w.getConfig().getCodec().getName());
      boolean canUseIDV = !preFlex;


      Document doc = new Document();
      Document docNoGroup = new Document();
      Field idvGroupField = new SortedDocValuesField("group_dv", new BytesRef());
      if (canUseIDV) {
        doc.add(idvGroupField);
        docNoGroup.add(idvGroupField);
      }


      Field group = newStringField("group", "", Field.Store.NO);
      doc.add(group);
      Field sort1 = newStringField("sort1", "", Field.Store.NO);
      doc.add(sort1);
      docNoGroup.add(sort1);
      Field sort2 = newStringField("sort2", "", Field.Store.NO);
      doc.add(sort2);
      docNoGroup.add(sort2);
      Field content = newTextField("content", "", Field.Store.NO);
      doc.add(content);
      docNoGroup.add(content);
      IntField id = new IntField("id", 0, Field.Store.NO);
      doc.add(id);
      docNoGroup.add(id);
      final GroupDoc[] groupDocs = new GroupDoc[numDocs];
      for(int i=0;i<numDocs;i++) {
        final BytesRef groupValue;
        if (random().nextInt(24) == 17) {
          // So we test the "doc doesn't have the group'd
          // field" case:
          groupValue = null;
        } else {
          groupValue = groups.get(random().nextInt(groups.size()));
        }
        final GroupDoc groupDoc = new GroupDoc(i,
                                               groupValue,
                                               groups.get(random().nextInt(groups.size())),
                                               groups.get(random().nextInt(groups.size())),
                                               contentStrings[random().nextInt(contentStrings.length)]);
        if (VERBOSE) {
          System.out.println("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
        }


        groupDocs[i] = groupDoc;
        if (groupDoc.group != null) {
          group.setStringValue(groupDoc.group.utf8ToString());
          if (canUseIDV) {
            idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
          }
        } else if (canUseIDV) {
          // Must explicitly set empty string, else eg if
          // the segment has all docs missing the field then
          // we get null back instead of empty BytesRef:
          idvGroupField.setBytesValue(new BytesRef());
        }
        sort1.setStringValue(groupDoc.sort1.utf8ToString());
        sort2.setStringValue(groupDoc.sort2.utf8ToString());
        content.setStringValue(groupDoc.content);
        id.setIntValue(groupDoc.id);
        if (groupDoc.group == null) {
          w.addDocument(docNoGroup);
        } else {
          w.addDocument(doc);
        }
      }


      final GroupDoc[] groupDocsByID = new GroupDoc[groupDocs.length];
      System.arraycopy(groupDocs, 0, groupDocsByID, 0, groupDocs.length);


      final DirectoryReader r = w.getReader();
      w.close();


      // NOTE: intentional but temporary field cache insanity!
      final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(SlowCompositeReaderWrapper.wrap(r), "id", false);
      DirectoryReader rBlocks = null;
      Directory dirBlocks = null;

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.