Examples of org.apache.lucene.facet.taxonomy.TaxonomyReader

Package org.apache.lucene.facet.taxonomy

Examples of org.apache.lucene.facet.taxonomy.TaxonomyReader

org.apache.lucene.facet.taxonomy.TaxonomyReader
TaxonomyReader is the read-only interface with which the faceted-search library uses the taxonomy during search time.
A TaxonomyReader holds a list of categories. Each category has a serial number which we call an "ordinal", and a hierarchical "path" name:
- The ordinal is an integer that starts at 0 for the first category (which is always the root category), and grows contiguously as more categories are added; Note that once a category is added, it can never be deleted.
- The path is a CategoryPath object specifying the category's position in the hierarchy.
Notes about concurrent access to the taxonomy:
An implementation must allow multiple readers to be active concurrently with a single writer. Readers follow so-called "point in time" semantics, i.e., a TaxonomyReader object will only see taxonomy entries which were available at the time it was created. What the writer writes is only available to (new) readers after the writer's commit() is called.
In faceted search, two separate indices are used: the main Lucene index, and the taxonomy. Because the main index refers to the categories listed in the taxonomy, it is important to open the taxonomy *after* opening the main index, and it is also necessary to reopen() the taxonomy after reopen()ing the main index.
This order is important, otherwise it would be possible for the main index to refer to a category which is not yet visible in the old snapshot of the taxonomy. Note that it is indeed fine for the the taxonomy to be opened after the main index - even a long time after. The reason is that once a category is added to the taxonomy, it can never be changed or deleted, so there is no danger that a "too new" taxonomy not being consistent with an older index. @lucene.experimental

    }
    IOUtils.close(indexWriter, taxoWriter);
    
    // test the multi iterator
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    CategoryListIterator[] iterators = new CategoryListIterator[numDimensions];
    for (int i = 0; i < iterators.length; i++) {
      CategoryListParams clp = indexingParams.getCategoryListParams(new CategoryPath(dimensions[i]));
      IntDecoder decoder = clp.createEncoder().createMatchingDecoder();
      iterators[i] = new DocValuesCategoryListIterator(clp.field, decoder);
    }
    MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);
    for (AtomicReaderContext context : indexReader.leaves()) {
      assertTrue("failed to init multi-iterator", cli.setNextReader(context));
      IntsRef ordinals = new IntsRef();
      final int maxDoc = context.reader().maxDoc();
      for (int i = 0; i < maxDoc; i++) {
        cli.getOrdinals(i, ordinals);
        assertTrue("document " + i + " does not have categories", ordinals.length > 0);
        for (int j = 0; j < ordinals.length; j++) {
          CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
          assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
          if (cp.length == 2) {
            int globalDoc = i + context.docBase;
            assertEquals("invalid category for document " + globalDoc, globalDoc, Integer.parseInt(cp.components[1]));
          }

View Full Code Here

    if (args.length != (printTree ? 2 : 1)) {
      System.out.println("\nUsage: java -classpath ... org.apache.lucene.facet.util.PrintTaxonomyStats [-printTree] /path/to/taxononmy/index\n");
      System.exit(1);
    }
    Directory dir = FSDirectory.open(new File(path));
    TaxonomyReader r = new DirectoryTaxonomyReader(dir);
    printStats(r, System.out, printTree);
    r.close();
    dir.close();
  }

View Full Code Here

  public void testMergeHierarchies() throws Exception {
    Directory indexDir = new RAMDirectory(), taxoDir = new RAMDirectory();
    initIndex(indexDir, taxoDir);
    
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    
    String[] exp = new String[] { "Date (0)\n  2010 (4)\n  2011 (3)\n" };
    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date" } }, null);

View Full Code Here

  
  @Test
  public void testDifferentNumResults() throws Exception {
    // test the collector w/ FacetRequests and different numResults
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), 
        new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
    FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader));

View Full Code Here

  }
  
  @Test
  public void testAllCounts() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), 
        new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B));
    FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader));

View Full Code Here

  }
  
  @Test
  public void testBigNumResults() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, Integer.MAX_VALUE), 
        new CountFacetRequest(CP_B, Integer.MAX_VALUE));
    FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader));

View Full Code Here

  }
  
  @Test
  public void testNoParents() throws Exception {
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = newSearcher(indexReader);
    FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_C, NUM_CHILDREN_CP_C), 
        new CountFacetRequest(CP_D, NUM_CHILDREN_CP_D));
    FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader));
    searcher.search(new MatchAllDocsQuery(), fc);

View Full Code Here

    indexTwoDocs(indexWriter, facetFields, true);  // 6th segment, with content, with categories
    indexTwoDocs(indexWriter, null, true);         // 7th segment, with content, no categories
    IOUtils.close(indexWriter, taxoWriter);


    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher indexSearcher = newSearcher(indexReader);
    
    // search for "f:a", only segments 1 and 3 should match results
    Query q = new TermQuery(new Term("f", "a"));
    FacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10);

View Full Code Here

    Directory dir = newDirectory();
    
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
    touchTaxo(taxoWriter, new CategoryPath("a"));
    
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);


    touchTaxo(taxoWriter, new CategoryPath("b"));
    
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
    taxoReader.close();
    taxoReader = newtr;
    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));


    // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.

View Full Code Here

    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();


    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();


    // Count both "Publish Date" and "Author" dimensions:
    FacetSearchParams fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
        new CountFacetRequest(new CategoryPath("Author"), 10));


    // Aggregate the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);


    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);


    // Retrieve & verify results:
    List<FacetResult> results = c.getFacetResults();
    assertEquals(2, results.size());
    assertEquals("Publish Date (0)\n  2012 (2)\n  2010 (2)\n  1999 (1)\n",
        FacetTestUtils.toSimpleString(results.get(0)));
    assertEquals("Author (0)\n  Lisa (2)\n  Frank (1)\n  Susan (1)\n  Bob (1)\n",
        FacetTestUtils.toSimpleString(results.get(1)));


    
    // Now user drills down on Publish Date/2010:
    fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
    DrillDownQuery q2 = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    q2.add(new CategoryPath("Publish Date/2010", '/'));
    c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
    searcher.search(q2, c);
    results = c.getFacetResults();
    assertEquals(1, results.size());
    assertEquals("Author (0)\n  Lisa (1)\n  Bob (1)\n",
        FacetTestUtils.toSimpleString(results.get(0)));


    // Smoke test PrintTaxonomyStats:
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    PrintTaxonomyStats.printStats(taxoReader, new PrintStream(bos, false, "UTF-8"), true);
    String result = bos.toString("UTF-8");
    assertTrue(result.indexOf("/Author: 4 immediate children; 5 total categories") != -1);
    assertTrue(result.indexOf("/Publish Date: 3 immediate children; 12 total categories") != -1);
    // Make sure at least a few nodes of the tree came out:
    assertTrue(result.indexOf("  /1999") != -1);
    assertTrue(result.indexOf("  /2012") != -1);
    assertTrue(result.indexOf("      /20") != -1);


    taxoReader.close();
    searcher.getIndexReader().close();
    dir.close();
    taxoDir.close();
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.facet.taxonomy.TaxonomyReader

com.gentics.cr.lucene.search.CRSearcher

org.apache.lucene.benchmark.byTask.tasks.CloseTaxonomyReaderTask

org.apache.lucene.benchmark.byTask.TestPerfTasksLogic

org.apache.lucene.demo.facet.AssociationsFacetsExample

org.apache.lucene.demo.facet.ExpressionAggregationFacetsExample

org.apache.lucene.demo.facet.MultiCategoryListsFacetsExample

org.apache.lucene.demo.facet.SimpleFacetsExample

org.apache.lucene.facet.complements.TestTotalFacetCounts

org.apache.lucene.facet.complements.TestTotalFacetCountsCache

org.apache.lucene.facet.example.adaptive.AdaptiveSearcher

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.