Examples of org.apache.lucene.facet.taxonomy.TaxonomyWriter

Package org.apache.lucene.facet.taxonomy

Examples of org.apache.lucene.facet.taxonomy.TaxonomyWriter

org.apache.lucene.facet.taxonomy.TaxonomyWriter
TaxonomyWriter is the interface which the faceted-search library uses to dynamically build the taxonomy at indexing time.
Notes about concurrent access to the taxonomy:
An implementation must allow multiple readers and a single writer to be active concurrently. Readers follow so-called "point in time" semantics, i.e., a reader object will only see taxonomy entries which were available at the time it was created. What the writer writes is only available to (new) readers after the writer's commit() is called.
Faceted search keeps two indices - namely Lucene's main index, and this taxonomy index. When one or more readers are active concurrently with the writer, care must be taken to avoid an inconsistency between the state of these two indices: When writing to the indices, the taxonomy must always be committed to disk *before* the main index, because the main index refers to categories listed in the taxonomy. Such control can best be achieved by turning off the main index's "autocommit" feature, and explicitly calling commit() for both indices (first for the taxonomy, then for the main index). In old versions of Lucene (2.2 or earlier), when autocommit could not be turned off, a more complicated solution needs to be used. E.g., use some sort of (possibly inter-process) locking to ensure that a reader is being opened only right after both indices have been flushed (and before anything else is written to them). @lucene.experimental

          assertEquals("field", name);
          return sim;
        }
      });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);      


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }

View Full Code Here

    Random random = random();
    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
    conf.setMaxBufferedDocs(2); // force few segments
    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // avoid merges so that we're left with few segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    
    FacetFields facetFields = new PayloadFacetFields(taxoWriter, fip);
    
    HashMap<String,Integer> expectedCounts = new HashMap<String,Integer>(DIMENSIONS.length);
    int numDocs = atLeast(10);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numCategories = random.nextInt(3) + 1;
      ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>(numCategories);
      HashSet<String> docDimensions = new HashSet<String>();
      while (numCategories-- > 0) {
        String dim = DIMENSIONS[random.nextInt(DIMENSIONS.length)];
        // we should only increment the expected count by 1 per document
        docDimensions.add(dim);
        categories.add(new CategoryPath(dim, Integer.toString(i), Integer.toString(numCategories)));
      }
      facetFields.addFields(doc, categories);
      doc.add(new StringField("docid", Integer.toString(i), Store.YES));
      doc.add(new TextField("foo", "content" + i, Store.YES));
      indexWriter.addDocument(doc);


      // update expected count per dimension
      for (String dim : docDimensions) {
        Integer val = expectedCounts.get(dim);
        if (val == null) {
          expectedCounts.put(dim, Integer.valueOf(1));
        } else {
          expectedCounts.put(dim, Integer.valueOf(val.intValue() + 1));
        }
      }
      
      if (random.nextDouble() < 0.2) { // add some documents that will be deleted
        doc = new Document();
        doc.add(new StringField("del", "key", Store.NO));
        facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dummy")));
        indexWriter.addDocument(doc);
      }
    }
    
    indexWriter.commit();
    taxoWriter.commit();


    // delete the docs that were marked for deletion. note that the 'dummy'
    // category is not removed from the taxonomy, so must account for it when we
    // verify the migrated index.
    indexWriter.deleteDocuments(new Term("del", "key"));

View Full Code Here

    doTestReadRecreatedTaxonomy(random(), false);
  }
  
  private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
    Directory dir = null;
    TaxonomyWriter tw = null;
    TaxonomyReader tr = null;
    
    // prepare a few categories
    int  n = 10;
    CategoryPath[] cp = new CategoryPath[n];
    for (int i=0; i<n; i++) {
      cp[i] = new CategoryPath("a", Integer.toString(i));
    }
    
    try {
      dir = newDirectory();
      
      tw = new DirectoryTaxonomyWriter(dir);
      tw.addCategory(new CategoryPath("a"));
      tw.close();
      
      tr = new DirectoryTaxonomyReader(dir);
      int baseNumCategories = tr.getSize();
      
      for (int i=0; i<n; i++) {
        int k = random.nextInt(n);
        tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
        for (int j = 0; j <= k; j++) {
          tw.addCategory(cp[j]);
        }
        tw.close();
        if (closeReader) {
          tr.close();
          tr = new DirectoryTaxonomyReader(dir);
        } else {
          TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);

View Full Code Here


    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));


    // create and open a taxonomy writer
    TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);


    // loop over  sample documents 
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum=0; docNum<SimpleUtils.docTexts.length; docNum++) {


      // obtain the sample facets for current document
      List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(SimpleUtils.categories[docNum]);


      // we do not alter indexing parameters!  
      // a category document builder will add the categories to a document once build() is called
      CategoryDocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(taxo).setCategoryPaths(facetList);


      // create a plain Lucene document and add some regular Lucene fields to it 
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], Store.NO, Index.ANALYZED));


      // invoke the category document builder for adding categories to the document and,
      // as required, to the taxonomy index 
      categoryDocBuilder.build(doc);


      // finally add the document to the index
      iw.addDocument(doc);


      nDocsAdded ++;
      nFacetsAdded += facetList.size(); 
    }


    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to the search index.
    // this is important, so that all facets referred to by documents in the search index 
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();


    // close the taxonomy index and the index - all modifications are 
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();


    ExampleUtils.log("Indexed "+nDocsAdded+" documents with overall "+nFacetsAdded+" facets.");
  }

View Full Code Here

    // 4. Segment w/ categories, but only some results
    
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    conf.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);


    allExpectedCounts = newCounts();
    termExpectedCounts = newCounts();
    
    // segment w/ no categories

View Full Code Here

    Random r = random();
    RandomIndexWriter writer = new RandomIndexWriter(r, dir, 
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));
    
    taxoDir = newDirectory();
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    config = new FacetsConfig();


    // Randomize the per-dim config:
    config.setHierarchical("a", random().nextBoolean());
    config.setMultiValued("a", random().nextBoolean());
    if (random().nextBoolean()) {
      config.setIndexFieldName("a", "$a");
    }
    config.setRequireDimCount("a", true);


    config.setHierarchical("b", random().nextBoolean());
    config.setMultiValued("b", random().nextBoolean());
    if (random().nextBoolean()) {
      config.setIndexFieldName("b", "$b");
    }
    config.setRequireDimCount("b", true);


    for (int i = 0; i < 100; i++) {
      Document doc = new Document();
      if (i % 2 == 0) { // 50
        doc.add(new TextField("content", "foo", Field.Store.NO));
      }
      if (i % 3 == 0) { // 33
        doc.add(new TextField("content", "bar", Field.Store.NO));
      }
      if (i % 4 == 0) { // 25
        if (r.nextBoolean()) {
          doc.add(new FacetField("a", "1"));
        } else {
          doc.add(new FacetField("a", "2"));
        }
      }
      if (i % 5 == 0) { // 20
        doc.add(new FacetField("b", "1"));
      }
      writer.addDocument(config.build(taxoWriter, doc));
    }
    
    taxoWriter.close();
    reader = writer.getReader();
    writer.close();
    
    taxo = new DirectoryTaxonomyReader(taxoDir);
  }

View Full Code Here


    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));


    // create and open a taxonomy writer
    TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);


    // loop over sample documents
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
      ExampleUtils.log(" ++++ DOC ID: " + docNum);
      // obtain the sample categories for current document
      CategoryContainer categoryContainer = new CategoryContainer();
      for (CategoryPath path : SimpleUtils.categories[docNum]) {
        categoryContainer.addCategory(path);
        ExampleUtils.log("\t ++++ PATH: " + path);
      }
      // and also those with associations
      CategoryPath[] associationsPaths = AssociationUtils.categories[docNum];
      AssociationProperty[] associationProps = AssociationUtils.associations[docNum];
      for (int i = 0; i < associationsPaths.length; i++) {
        categoryContainer.addCategory(associationsPaths[i], associationProps[i]);
        ExampleUtils.log("\t $$$$ Association: ("
            + associationsPaths[i] + "," + associationProps[i]
            + ")");
      }


      // we do not alter indexing parameters!
      // a category document builder will add the categories to a document
      // once build() is called
      CategoryDocumentBuilder categoryDocBuilder = new EnhancementsDocumentBuilder(
          taxo, AssociationUtils.assocIndexingParams);
      categoryDocBuilder.setCategories(categoryContainer);


      // create a plain Lucene document and add some regular Lucene fields
      // to it
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum],
          Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum],
          Store.NO, Index.ANALYZED));


      // invoke the category document builder for adding categories to the
      // document and,
      // as required, to the taxonomy index
      categoryDocBuilder.build(doc);


      // finally add the document to the index
      iw.addDocument(doc);


      nDocsAdded++;
      nFacetsAdded += categoryContainer.size();
    }


    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to
    // the search index.
    // this is important, so that all facets referred to by documents in the
    // search index
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();


    // close the taxonomy index and the index - all modifications are
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();


    ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
        + nFacetsAdded + " facets.");
  }

View Full Code Here

    doTestReadRecreatedTaxonomy(random(), false);
  }
  
  private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
    Directory dir = null;
    TaxonomyWriter tw = null;
    TaxonomyReader tr = null;
    
    // prepare a few categories
    int  n = 10;
    CategoryPath[] cp = new CategoryPath[n];
    for (int i=0; i<n; i++) {
      cp[i] = new CategoryPath("a", Integer.toString(i));
    }
    
    try {
      dir = newDirectory();
      
      tw = new DirectoryTaxonomyWriter(dir);
      tw.addCategory(new CategoryPath("a"));
      tw.close();
      
      tr = new DirectoryTaxonomyReader(dir);
      int baseNumCategories = tr.getSize();
      
      for (int i=0; i<n; i++) {
        int k = random.nextInt(n);
        tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
        for (int j = 0; j <= k; j++) {
          tw.addCategory(cp[j]);
        }
        tw.close();
        if (closeReader) {
          tr.close();
          tr = new DirectoryTaxonomyReader(dir);
        } else {
          TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);

View Full Code Here

          assertEquals("field", name);
          return sim;
        }
      });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);      


    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }

View Full Code Here

  @Test
  public void testSumScoreAggregator() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();


    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
    IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));


    FacetFields facetFields = new FacetFields(taxonomyWriter);
    for(int i = atLeast(30); i > 0; --i) {
      Document doc = new Document();
      if (random().nextBoolean()) { // don't match all documents
        doc.add(new StringField("f", "v", Store.NO));
      }
      facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a")));
      iw.addDocument(doc);
    }
    
    taxonomyWriter.close();
    iw.close();
    
    DirectoryReader r = DirectoryReader.open(indexDir);
    DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.facet.taxonomy.TaxonomyWriter

com.gentics.cr.lucene.indexer.index.CRLuceneIndexJob

org.apache.lucene.benchmark.byTask.tasks.CommitTaxonomyIndexTask

org.apache.lucene.facet.associations.AssociationsFacetRequestTest

org.apache.lucene.facet.complements.TestTotalFacetCounts

org.apache.lucene.facet.complements.TestTotalFacetCountsCache

org.apache.lucene.facet.enhancements.TwoEnhancementsTest

org.apache.lucene.facet.example.association.AssociationIndexer

org.apache.lucene.facet.example.simple.SimpleIndexer

org.apache.lucene.facet.FacetTestBase

org.apache.lucene.facet.index.categorypolicy.OrdinalPolicyTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.