Package org.apache.lucene.facet.taxonomy

Examples of org.apache.lucene.facet.taxonomy.TaxonomyWriter


          assertEquals("field", name);
          return sim;
        }
      });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);     

    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here


    Random random = random();
    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
    conf.setMaxBufferedDocs(2); // force few segments
    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // avoid merges so that we're left with few segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
   
    FacetFields facetFields = new PayloadFacetFields(taxoWriter, fip);
   
    HashMap<String,Integer> expectedCounts = new HashMap<String,Integer>(DIMENSIONS.length);
    int numDocs = atLeast(10);
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      int numCategories = random.nextInt(3) + 1;
      ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>(numCategories);
      HashSet<String> docDimensions = new HashSet<String>();
      while (numCategories-- > 0) {
        String dim = DIMENSIONS[random.nextInt(DIMENSIONS.length)];
        // we should only increment the expected count by 1 per document
        docDimensions.add(dim);
        categories.add(new CategoryPath(dim, Integer.toString(i), Integer.toString(numCategories)));
      }
      facetFields.addFields(doc, categories);
      doc.add(new StringField("docid", Integer.toString(i), Store.YES));
      doc.add(new TextField("foo", "content" + i, Store.YES));
      indexWriter.addDocument(doc);

      // update expected count per dimension
      for (String dim : docDimensions) {
        Integer val = expectedCounts.get(dim);
        if (val == null) {
          expectedCounts.put(dim, Integer.valueOf(1));
        } else {
          expectedCounts.put(dim, Integer.valueOf(val.intValue() + 1));
        }
      }
     
      if (random.nextDouble() < 0.2) { // add some documents that will be deleted
        doc = new Document();
        doc.add(new StringField("del", "key", Store.NO));
        facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dummy")));
        indexWriter.addDocument(doc);
      }
    }
   
    indexWriter.commit();
    taxoWriter.commit();

    // delete the docs that were marked for deletion. note that the 'dummy'
    // category is not removed from the taxonomy, so must account for it when we
    // verify the migrated index.
    indexWriter.deleteDocuments(new Term("del", "key"));
View Full Code Here

    doTestReadRecreatedTaxonomy(random(), false);
  }
 
  private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
    Directory dir = null;
    TaxonomyWriter tw = null;
    TaxonomyReader tr = null;
   
    // prepare a few categories
    int  n = 10;
    CategoryPath[] cp = new CategoryPath[n];
    for (int i=0; i<n; i++) {
      cp[i] = new CategoryPath("a", Integer.toString(i));
    }
   
    try {
      dir = newDirectory();
     
      tw = new DirectoryTaxonomyWriter(dir);
      tw.addCategory(new CategoryPath("a"));
      tw.close();
     
      tr = new DirectoryTaxonomyReader(dir);
      int baseNumCategories = tr.getSize();
     
      for (int i=0; i<n; i++) {
        int k = random.nextInt(n);
        tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
        for (int j = 0; j <= k; j++) {
          tw.addCategory(cp[j]);
        }
        tw.close();
        if (closeReader) {
          tr.close();
          tr = new DirectoryTaxonomyReader(dir);
        } else {
          TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
View Full Code Here

    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));

    // create and open a taxonomy writer
    TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

    // loop over  sample documents
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum=0; docNum<SimpleUtils.docTexts.length; docNum++) {

      // obtain the sample facets for current document
      List<CategoryPath> facetList = SimpleUtils.categoryPathArrayToList(SimpleUtils.categories[docNum]);

      // we do not alter indexing parameters! 
      // a category document builder will add the categories to a document once build() is called
      CategoryDocumentBuilder categoryDocBuilder = new CategoryDocumentBuilder(taxo).setCategoryPaths(facetList);

      // create a plain Lucene document and add some regular Lucene fields to it
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], Store.NO, Index.ANALYZED));

      // invoke the category document builder for adding categories to the document and,
      // as required, to the taxonomy index
      categoryDocBuilder.build(doc);

      // finally add the document to the index
      iw.addDocument(doc);

      nDocsAdded ++;
      nFacetsAdded += facetList.size();
    }

    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to the search index.
    // this is important, so that all facets referred to by documents in the search index
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();

    // close the taxonomy index and the index - all modifications are
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();

    ExampleUtils.log("Indexed "+nDocsAdded+" documents with overall "+nFacetsAdded+" facets.");
  }
View Full Code Here

    // 4. Segment w/ categories, but only some results
   
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    conf.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges, so we can control the index segments
    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    allExpectedCounts = newCounts();
    termExpectedCounts = newCounts();
   
    // segment w/ no categories
View Full Code Here

    Random r = random();
    RandomIndexWriter writer = new RandomIndexWriter(r, dir,
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(r, MockTokenizer.KEYWORD, false)));
   
    taxoDir = newDirectory();
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
    config = new FacetsConfig();

    // Randomize the per-dim config:
    config.setHierarchical("a", random().nextBoolean());
    config.setMultiValued("a", random().nextBoolean());
    if (random().nextBoolean()) {
      config.setIndexFieldName("a", "$a");
    }
    config.setRequireDimCount("a", true);

    config.setHierarchical("b", random().nextBoolean());
    config.setMultiValued("b", random().nextBoolean());
    if (random().nextBoolean()) {
      config.setIndexFieldName("b", "$b");
    }
    config.setRequireDimCount("b", true);

    for (int i = 0; i < 100; i++) {
      Document doc = new Document();
      if (i % 2 == 0) { // 50
        doc.add(new TextField("content", "foo", Field.Store.NO));
      }
      if (i % 3 == 0) { // 33
        doc.add(new TextField("content", "bar", Field.Store.NO));
      }
      if (i % 4 == 0) { // 25
        if (r.nextBoolean()) {
          doc.add(new FacetField("a", "1"));
        } else {
          doc.add(new FacetField("a", "2"));
        }
      }
      if (i % 5 == 0) { // 20
        doc.add(new FacetField("b", "1"));
      }
      writer.addDocument(config.build(taxoWriter, doc));
    }
   
    taxoWriter.close();
    reader = writer.getReader();
    writer.close();
   
    taxo = new DirectoryTaxonomyReader(taxoDir);
  }
View Full Code Here

    // create and open an index writer
    IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, SimpleUtils.analyzer));

    // create and open a taxonomy writer
    TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

    // loop over sample documents
    int nDocsAdded = 0;
    int nFacetsAdded = 0;
    for (int docNum = 0; docNum < SimpleUtils.docTexts.length; docNum++) {
      ExampleUtils.log(" ++++ DOC ID: " + docNum);
      // obtain the sample categories for current document
      CategoryContainer categoryContainer = new CategoryContainer();
      for (CategoryPath path : SimpleUtils.categories[docNum]) {
        categoryContainer.addCategory(path);
        ExampleUtils.log("\t ++++ PATH: " + path);
      }
      // and also those with associations
      CategoryPath[] associationsPaths = AssociationUtils.categories[docNum];
      AssociationProperty[] associationProps = AssociationUtils.associations[docNum];
      for (int i = 0; i < associationsPaths.length; i++) {
        categoryContainer.addCategory(associationsPaths[i], associationProps[i]);
        ExampleUtils.log("\t $$$$ Association: ("
            + associationsPaths[i] + "," + associationProps[i]
            + ")");
      }

      // we do not alter indexing parameters!
      // a category document builder will add the categories to a document
      // once build() is called
      CategoryDocumentBuilder categoryDocBuilder = new EnhancementsDocumentBuilder(
          taxo, AssociationUtils.assocIndexingParams);
      categoryDocBuilder.setCategories(categoryContainer);

      // create a plain Lucene document and add some regular Lucene fields
      // to it
      Document doc = new Document();
      doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum],
          Store.YES, Index.ANALYZED));
      doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum],
          Store.NO, Index.ANALYZED));

      // invoke the category document builder for adding categories to the
      // document and,
      // as required, to the taxonomy index
      categoryDocBuilder.build(doc);

      // finally add the document to the index
      iw.addDocument(doc);

      nDocsAdded++;
      nFacetsAdded += categoryContainer.size();
    }

    // commit changes.
    // we commit changes to the taxonomy index prior to committing them to
    // the search index.
    // this is important, so that all facets referred to by documents in the
    // search index
    // will indeed exist in the taxonomy index.
    taxo.commit();
    iw.commit();

    // close the taxonomy index and the index - all modifications are
    // now safely in the provided directories: indexDir and taxoDir.
    taxo.close();
    iw.close();

    ExampleUtils.log("Indexed " + nDocsAdded + " documents with overall "
        + nFacetsAdded + " facets.");
  }
View Full Code Here

    doTestReadRecreatedTaxonomy(random(), false);
  }
 
  private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
    Directory dir = null;
    TaxonomyWriter tw = null;
    TaxonomyReader tr = null;
   
    // prepare a few categories
    int  n = 10;
    CategoryPath[] cp = new CategoryPath[n];
    for (int i=0; i<n; i++) {
      cp[i] = new CategoryPath("a", Integer.toString(i));
    }
   
    try {
      dir = newDirectory();
     
      tw = new DirectoryTaxonomyWriter(dir);
      tw.addCategory(new CategoryPath("a"));
      tw.close();
     
      tr = new DirectoryTaxonomyReader(dir);
      int baseNumCategories = tr.getSize();
     
      for (int i=0; i<n; i++) {
        int k = random.nextInt(n);
        tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
        for (int j = 0; j <= k; j++) {
          tw.addCategory(cp[j]);
        }
        tw.close();
        if (closeReader) {
          tr.close();
          tr = new DirectoryTaxonomyReader(dir);
        } else {
          TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
View Full Code Here

          assertEquals("field", name);
          return sim;
        }
      });
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    FacetFields facetFields = new FacetFields(taxoWriter);     

    Document doc = new Document();
    doc.add(newTextField("field", "text", Field.Store.NO));
    facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a/path", '/')));
    writer.addDocument(doc);
    writer.close();
    taxoWriter.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here

  @Test
  public void testSumScoreAggregator() throws Exception {
    Directory indexDir = newDirectory();
    Directory taxoDir = newDirectory();

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir);
    IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));

    FacetFields facetFields = new FacetFields(taxonomyWriter);
    for(int i = atLeast(30); i > 0; --i) {
      Document doc = new Document();
      if (random().nextBoolean()) { // don't match all documents
        doc.add(new StringField("f", "v", Store.NO));
      }
      facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a")));
      iw.addDocument(doc);
    }
   
    taxonomyWriter.close();
    iw.close();
   
    DirectoryReader r = DirectoryReader.open(indexDir);
    DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
   
View Full Code Here

TOP

Related Classes of org.apache.lucene.facet.taxonomy.TaxonomyWriter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.