Package org.apache.lucene.facet.taxonomy

Examples of org.apache.lucene.facet.taxonomy.TaxonomyReader


    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();

    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();
   
    FacetSearchParams fsp = new FacetSearchParams(fip,
                                                  new CountFacetRequest(new CategoryPath("a", '/'), 10));

    // Aggregate the facet counts:
    FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);
    List<FacetResult> results = c.getFacetResults();
    assertEquals(1, results.size());
    assertEquals(1, (int) results.get(0).getFacetResultNode().value);

    // LUCENE-4913:
    for(FacetResultNode childNode : results.get(0).getFacetResultNode().subResults) {
      assertTrue(childNode.ordinal != 0);
    }

    searcher.getIndexReader().close();
    taxoReader.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here


    // NRT open
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();
   
    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();
   
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
   
    // Aggregate the facet counts:
View Full Code Here

    facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/')));
    indexWriter.addDocument(doc);
    IOUtils.close(indexWriter, taxoWriter);
   
    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    // ask to count a non-existing category to test labeling
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5));
   
    final SamplingParams sampleParams = new SamplingParams();
View Full Code Here

    writer.close();

    //System.out.println("searcher=" + searcher);

    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();

    // Count both "Publish Date" and "Author" dimensions, in
    // drill-down:
    FacetSearchParams fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10),
        new CountFacetRequest(new CategoryPath("Author"), 10));

    DrillSideways ds = new DrillSideways(searcher, taxoReader);

    // Simple case: drill-down on a single field; in this
    // case the drill-sideways + drill-down counts ==
    // drill-down of just the query:
    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"));
    DrillSidewaysResult r = ds.search(null, ddq, 10, fsp);

    assertEquals(2, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));

    // Same simple case, but no baseQuery (pure browse):
    // drill-down on a single field; in this case the
    // drill-sideways + drill-down counts == drill-down of
    // just the query:
    ddq = new DrillDownQuery(fsp.indexingParams);
    ddq.add(new CategoryPath("Author", "Lisa"));
    r = ds.search(null, ddq, 10, fsp);

    assertEquals(2, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
    assertEquals(2, r.facetResults.get(0).getNumValidDescendants());

    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
    assertEquals(4, r.facetResults.get(1).getNumValidDescendants());

    // Another simple case: drill-down on on single fields
    // but OR of two values
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"), new CategoryPath("Author", "Bob"));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(3, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down: Lisa and Bob
    // (drill-down) published twice in 2010 and once in 2012:
    assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));

    // More interesting case: drill-down on two fields
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"));
    ddq.add(new CategoryPath("Publish Date", "2010"));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(1, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is drill-sideways + drill-down: Lisa
    // (drill-down) published once in 2010 and once in 2012:
    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));

    // Even more interesting case: drill down on two fields,
    // but one of them is OR
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());

    // Drill down on Lisa or Bob:
    ddq.add(new CategoryPath("Author", "Lisa"),
            new CategoryPath("Author", "Bob"));
    ddq.add(new CategoryPath("Publish Date", "2010"));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(2, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is both drill-sideways + drill-down:
    // Lisa or Bob published twice in 2010 and once in 2012:
    assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down:
    // only Lisa & Bob published (once each) in 2010:
    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));

    // Test drilling down on invalid field:
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Foobar", "Baz"));
    fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10),
        new CountFacetRequest(new CategoryPath("Foobar"), 10));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(0, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    assertEquals("Publish Date:", toString(r.facetResults.get(0)));
    assertEquals("Foobar:", toString(r.facetResults.get(1)));

    // Test drilling down on valid term or'd with invalid term:
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"),
            new CategoryPath("Author", "Tom"));
    fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10),
        new CountFacetRequest(new CategoryPath("Author"), 10));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(2, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));

    // LUCENE-4915: test drilling down on a dimension but
    // NOT facet counting it:
    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"),
            new CategoryPath("Author", "Tom"));
    fsp = new FacetSearchParams(
              new CountFacetRequest(new CategoryPath("Publish Date"), 10));
    r = ds.search(null, ddq, 10, fsp);
    assertEquals(2, r.hits.totalHits);
    assertEquals(1, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));

    // Test main query gets null scorer:
    fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10),
        new CountFacetRequest(new CategoryPath("Author"), 10));
    ddq = new DrillDownQuery(fsp.indexingParams, new TermQuery(new Term("foobar", "baz")));
    ddq.add(new CategoryPath("Author", "Lisa"));
    r = ds.search(null, ddq, 10, fsp);

    assertEquals(0, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    assertEquals("Publish Date:", toString(r.facetResults.get(0)));
    assertEquals("Author:", toString(r.facetResults.get(1)));

    searcher.getIndexReader().close();
    taxoReader.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here

    writer.close();

    //System.out.println("searcher=" + searcher);

    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();

    // Count both "Publish Date" and "Author" dimensions, in
    // drill-down:
    FacetSearchParams fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("Publish Date"), 10),
        new CountFacetRequest(new CategoryPath("Author"), 10));

    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("Author", "Lisa"));
    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);

    assertEquals(1, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("Publish Date: 2010=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published once, and Bob
    // published once:
    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));

    searcher.getIndexReader().close();
    taxoReader.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here

    writer.close();

    //System.out.println("searcher=" + searcher);

    // NRT open
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();

    // Two requests against the same dim:
    FacetSearchParams fsp = new FacetSearchParams(
        new CountFacetRequest(new CategoryPath("dim"), 10),
        new CountFacetRequest(new CategoryPath("dim", "a"), 10));

    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
    ddq.add(new CategoryPath("dim", "a"));
    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);

    assertEquals(3, r.hits.totalHits);
    assertEquals(2, r.facetResults.size());
    // Publish Date is only drill-down, and Lisa published
    // one in 2012 and one in 2010:
    assertEquals("dim: a=3 d=1 c=1 b=1", toString(r.facetResults.get(0)));
    // Author is drill-sideways + drill-down: Lisa
    // (drill-down) published twice, and Frank/Susan/Bob
    // published once:
    assertEquals("a (3)\n  z (1)\n  y (1)\n  x (1)\n", FacetTestUtils.toSimpleString(r.facetResults.get(1)));

    searcher.getIndexReader().close();
    taxoReader.close();
    dir.close();
    taxoDir.close();
  }
View Full Code Here

    if (VERBOSE) {
      System.out.println("r.numDocs() = " + r.numDocs());
    }

    // NRT open
    TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
    tw.close();

    int numIters = atLeast(10);

    for(int iter=0;iter<numIters;iter++) {

      String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
      int numDrillDown = _TestUtil.nextInt(random(), 1, Math.min(4, numDims));
      if (VERBOSE) {
        System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown + " useSortedSetDV=" + doUseDV);
      }

      List<FacetRequest> requests = new ArrayList<FacetRequest>();
      while(true) {
        for(int i=0;i<numDims;i++) {
          // LUCENE-4915: sometimes don't request facet
          // counts on the dim(s) we drill down on
          if (random().nextDouble() <= 0.9) {
            if (VERBOSE) {
              System.out.println("  do facet request on dim=" + i);
            }
            requests.add(new CountFacetRequest(new CategoryPath("dim" + i), dimValues[numDims-1].length));
          } else {
            if (VERBOSE) {
              System.out.println("  skip facet request on dim=" + i);
            }
          }
        }
        if (!requests.isEmpty()) {
          break;
        }
      }
      FacetSearchParams fsp = new FacetSearchParams(requests);
      String[][] drillDowns = new String[numDims][];

      int count = 0;
      boolean anyMultiValuedDrillDowns = false;
      while (count < numDrillDown) {
        int dim = random().nextInt(numDims);
        if (drillDowns[dim] == null) {
          if (random().nextBoolean()) {
            // Drill down on one value:
            drillDowns[dim] = new String[] {dimValues[dim][random().nextInt(dimValues[dim].length)]};
          } else {
            int orCount = _TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
            drillDowns[dim] = new String[orCount];
            anyMultiValuedDrillDowns |= orCount > 1;
            for(int i=0;i<orCount;i++) {
              while (true) {
                String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
                for(int j=0;j<i;j++) {
                  if (value.equals(drillDowns[dim][j])) {
                    value = null;
                    break;
                  }
                }
                if (value != null) {
                  drillDowns[dim][i] = value;
                  break;
                }
              }
            }
          }
          if (VERBOSE) {
            BytesRef[] values = new BytesRef[drillDowns[dim].length];
            for(int i=0;i<values.length;i++) {
              values[i] = new BytesRef(drillDowns[dim][i]);
            }
            System.out.println("  dim" + dim + "=" + Arrays.toString(values));
          }
          count++;
        }
      }

      Query baseQuery;
      if (contentToken == null) {
        baseQuery = new MatchAllDocsQuery();
      } else {
        baseQuery = new TermQuery(new Term("content", contentToken));
      }

      DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, baseQuery);

      for(int dim=0;dim<numDims;dim++) {
        if (drillDowns[dim] != null) {
          CategoryPath[] paths = new CategoryPath[drillDowns[dim].length];
          int upto = 0;
          for(String value : drillDowns[dim]) {
            paths[upto++] = new CategoryPath("dim" + dim, value);
          }
          ddq.add(paths);
        }
      }

      Filter filter;
      if (random().nextInt(7) == 6) {
        if (VERBOSE) {
          System.out.println("  only-even filter");
        }
        filter = new Filter() {
            @Override
            public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
              int maxDoc = context.reader().maxDoc();
              final FixedBitSet bits = new FixedBitSet(maxDoc);
              for(int docID=0;docID < maxDoc;docID++) {
                // Keeps only the even ids:
                if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
                  bits.set(docID);
                }
              }
              return bits;
            }
          };
      } else {
        filter = null;
      }

      // Verify docs are always collected in order.  If we
      // had an AssertingScorer it could catch it when
      // Weight.scoresDocsOutOfOrder lies!:
      new DrillSideways(s, tr).search(ddq,
                           new Collector() {
                             int lastDocID;

                             @Override
                             public void setScorer(Scorer s) {
                             }

                             @Override
                             public void collect(int doc) {
                               assert doc > lastDocID;
                               lastDocID = doc;
                             }

                             @Override
                             public void setNextReader(AtomicReaderContext context) {
                               lastDocID = -1;
                             }

                             @Override
                             public boolean acceptsDocsOutOfOrder() {
                               return false;
                             }
                           }, fsp);

      // Also separately verify that DS respects the
      // scoreSubDocsAtOnce method, to ensure that all
      // subScorers are on the same docID:
      if (!anyMultiValuedDrillDowns) {
        // Can only do this test when there are no OR'd
        // drill-down values, beacuse in that case it's
        // easily possible for one of the DD terms to be on
        // a future docID:
        new DrillSideways(s, tr) {
          @Override
          protected boolean scoreSubDocsAtOnce() {
            return true;
          }
        }.search(ddq, new AssertingSubDocsAtOnceCollector(), fsp);
      }

      SimpleFacetResult expected = slowDrillSidewaysSearch(s, requests, docs, contentToken, drillDowns, dimValues, filter);

      Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
      DrillSideways ds;
      if (doUseDV) {
        ds = new DrillSideways(s, sortedSetDVState);
      } else {
        ds = new DrillSideways(s, tr);
      }

      // Retrieve all facets:
      DrillSidewaysResult actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);

      TopDocs hits = s.search(baseQuery, numDocs);
      Map<String,Float> scores = new HashMap<String,Float>();
      for(ScoreDoc sd : hits.scoreDocs) {
        scores.put(s.doc(sd.doc).get("id"), sd.score);
      }
      if (VERBOSE) {
        System.out.println("  verify all facets");
      }
      verifyEquals(requests, dimValues, s, expected, actual, scores, -1, doUseDV);

      // Retrieve topN facets:
      int topN = _TestUtil.nextInt(random(), 1, 20);

      List<FacetRequest> newRequests = new ArrayList<FacetRequest>();
      for(FacetRequest oldRequest : requests) {
        newRequests.add(new CountFacetRequest(oldRequest.categoryPath, topN));
      }
      fsp = new FacetSearchParams(newRequests);
      actual = ds.search(ddq, filter, null, numDocs, sort, true, true, fsp);
      if (VERBOSE) {
        System.out.println("  verify topN=" + topN);
      }
      verifyEquals(newRequests, dimValues, s, expected, actual, scores, topN, doUseDV);

      // Make sure drill down doesn't change score:
      TopDocs ddqHits = s.search(ddq, filter, numDocs);
      assertEquals(expected.hits.size(), ddqHits.totalHits);
      for(int i=0;i<expected.hits.size();i++) {
        // Score should be IDENTICAL:
        assertEquals(scores.get(expected.hits.get(i).id), ddqHits.scoreDocs[i].score, 0.0f);
      }
    }

    tr.close();
    r.close();
    td.close();
    d.close();
  }
View Full Code Here

    Directory taxoDir = newDirectory();
    writer = new RandomIndexWriter(random(), dir);
    taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
    IndexSearcher searcher = newSearcher(writer.getReader());
    writer.close();
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
    taxoWriter.close();

    // Count "Author"
    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
View Full Code Here

    IndexReader ir = iw.getReader();
    tw.commit();

    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);

    FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher);
View Full Code Here

    IndexReader ir = iw.getReader();
    tw.commit();

    // prepare index reader and taxonomy.
    TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

    // prepare searcher to search against
    IndexSearcher searcher = newSearcher(ir);

    FacetsCollector facetsCollector = performSearch(iParams, tr, ir, searcher);
View Full Code Here

TOP

Related Classes of org.apache.lucene.facet.taxonomy.TaxonomyReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.