Examples of PruningReader


Examples of org.apache.lucene.index.PruningReader

    } else if (impl.equals("ridf")) {
      tpp = new RIDFTermPruningPolicy(in, delFields, null, thr);     
    } else {
      throw new Exception("Unknown algorithm: '" + impl + "'");
    }
    pruning = new PruningReader(in, stp, tpp);
    IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_31,
            new WhitespaceAnalyzer(Version.LUCENE_31));
    IndexWriter iw = new IndexWriter(out, cfg);
    iw.addIndexes(new IndexReader[]{pruning});
    iw.close();
View Full Code Here

Examples of org.apache.lucene.index.PruningReader

  public void testRIDFPruning() throws Exception {
    RAMDirectory targetDir = new RAMDirectory();
    IndexReader in = IndexReader.open(sourceDir, true);
    // remove only very popular terms
    RIDFTermPruningPolicy ridf = new RIDFTermPruningPolicy(in, null, null, -0.12);
    PruningReader tfr = new PruningReader(in, null, ridf);
    assertTDCount(tfr, new Term("body", "one"), 0);
    assertTD(tfr, new Term("body", "two"), new int[]{0, 1, 2, 4});
    assertTD(tfr, new Term("body", "three"), new int[]{0, 1, 3});
    assertTD(tfr, new Term("test", "one"), new int[]{4});
    assertTD(tfr, new Term("body", "four"), new int[]{0});
View Full Code Here

Examples of org.apache.lucene.index.PruningReader

  public void testTfPruning() throws Exception {
    RAMDirectory targetDir = new RAMDirectory();
    IndexReader in = IndexReader.open(sourceDir, true);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, null, null, 2);
    PruningReader tfr = new PruningReader(in, null, tfp);
    // verify
    assertTD(tfr, new Term("body", "one"), new int[]{1, 2, 3, 4});
    assertTD(tfr, new Term("body", "two"), new int[]{1, 2});
    assertTD(tfr, new Term("body", "three"), new int[]{1, 3});
    assertTD(tfr, new Term("test", "one"), new int[]{4});
    assertTDCount(tfr, new Term("body", "four"), 0);
    assertTDCount(tfr, new Term("test", "four"), 0);
    // verify new reader
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    IndexWriter iw = new IndexWriter(targetDir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    iw.addIndexes(new IndexReader[]{tfr});
    iw.close();
    IndexReader ir = IndexReader.open(targetDir, true);
    assertTD(ir, new Term("body", "one"), new int[]{1, 2, 3, 4});
    assertTD(ir, new Term("body", "two"), new int[]{1, 2});
    assertTD(ir, new Term("body", "three"), new int[]{1, 3});
    assertTD(ir, new Term("test", "one"), new int[]{4});
    tfr.close();
    ir.close();
  }
View Full Code Here

Examples of org.apache.lucene.index.PruningReader

    IndexReader in = IndexReader.open(sourceDir, true);
    // validate full scores - without pruning, just to make sure we test the right thing
    validateDocScores(fullScores, in, false, false); // validate both docs and scores
    // prune reader
    CarmelTopKTermPruningPolicy tfp = new CarmelTopKTermPruningPolicy(in, null, K, EPSILON, R, null);
    PruningReader tfr = new PruningReader(in, null, tfp);
   
    // create the pruned index
    RAMDirectory targetDir = new RAMDirectory();
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    IndexWriter iw = new IndexWriter(targetDir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
View Full Code Here

Examples of org.apache.lucene.index.PruningReader

  public void testThresholds() throws Exception {
    Map<String, Integer> thresholds = new HashMap<String, Integer>();
    thresholds.put("test", 3);
    IndexReader in = IndexReader.open(sourceDir, true);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, null, thresholds, 2);
    PruningReader tfr = new PruningReader(in, null, tfp);
    assertTDCount(tfr, new Term("test", "one"), 0);
    assertTDCount(tfr, new Term("test", "two"), 0);
    assertTD(tfr, new Term("test", "three"), new int[]{4});
    assertTDCount(tfr, new Term("test", "four"), 0);
  }
View Full Code Here

Examples of org.apache.lucene.index.PruningReader

    Map<String, Integer> removeFields = new HashMap<String, Integer>();
    removeFields.put("test", PruningPolicy.DEL_POSTINGS | PruningPolicy.DEL_STORED);
    IndexReader in = IndexReader.open(sourceDir, true);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, removeFields, null, 2);
    StorePruningPolicy stp = new StorePruningPolicy(in, removeFields);
    PruningReader tfr = new PruningReader(in, stp, tfp);
    Document doc = tfr.document(4);
    // removed stored values?
    assertNull(doc.get("test"));
    // removed postings ?
    TermEnum te = tfr.terms();
    while (te.next()) {
      assertFalse("test".equals(te.term().field()));
    }
    // but vectors should be present !
    TermFreqVector tv = tfr.getTermFreqVector(4, "test");
    assertNotNull(tv);
    assertEquals(4, tv.getTerms().length); // term "four" not deleted yet from TermEnum
    // verify new reader
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
    IndexWriter iw = new IndexWriter(targetDir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
View Full Code Here

Examples of org.apache.lucene.pruning.PruningReader

  public void testTfPruning() throws Exception {
    RAMDirectory targetDir = new RAMDirectory();
    //IndexReader in = IndexReader.open(sourceDir, true);
    IndexReader in = IndexReader.open(sourceDir);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, null, null, 2);
    PruningReader tfr = new PruningReader(in, null, tfp);
    // verify
    assertTD(tfr, new Term("body", "one"), new int[]{1, 2, 3, 4});
    assertTD(tfr, new Term("body", "two"), new int[]{1, 2});
    assertTD(tfr, new Term("body", "three"), new int[]{1, 3});
    assertTD(tfr, new Term("test", "one"), new int[]{4});
    assertTDCount(tfr, new Term("body", "four"), 0);
    assertTDCount(tfr, new Term("test", "four"), 0);
    // verify new reader
    IndexWriter iw = new IndexWriter(targetDir, new WhitespaceAnalyzer(), /*MaxFieldLength.LIMITED*/ true);
    iw.addIndexes(new IndexReader[]{tfr});
    iw.close();
    //IndexReader ir = IndexReader.open(targetDir, true);
    IndexReader ir = IndexReader.open(targetDir);
    assertTD(ir, new Term("body", "one"), new int[]{1, 2, 3, 4});
    assertTD(ir, new Term("body", "two"), new int[]{1, 2});
    assertTD(ir, new Term("body", "three"), new int[]{1, 3});
    assertTD(ir, new Term("test", "one"), new int[]{4});
    tfr.close();
    ir.close();
  }
View Full Code Here

Examples of org.apache.lucene.pruning.PruningReader

    Map<String, Integer> thresholds = new HashMap<String, Integer>();
    thresholds.put("test", 3);
    //IndexReader in = IndexReader.open(sourceDir, true);
    IndexReader in = IndexReader.open(sourceDir);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, null, thresholds, 2);
    PruningReader tfr = new PruningReader(in, null, tfp);
    assertTDCount(tfr, new Term("test", "one"), 0);
    assertTDCount(tfr, new Term("test", "two"), 0);
    assertTD(tfr, new Term("test", "three"), new int[]{4});
    assertTDCount(tfr, new Term("test", "four"), 0);
  }
View Full Code Here

Examples of org.apache.lucene.pruning.PruningReader

    removeFields.put("test", PruningPolicy.DEL_POSTINGS | PruningPolicy.DEL_STORED);
    //IndexReader in = IndexReader.open(sourceDir, true);
    IndexReader in = IndexReader.open(sourceDir);
    TFTermPruningPolicy tfp = new TFTermPruningPolicy(in, removeFields, null, 2);
    StorePruningPolicy stp = new StorePruningPolicy(in, removeFields);
    PruningReader tfr = new PruningReader(in, stp, tfp);
    Document doc = tfr.document(4);
    // removed stored values?
    assertNull(doc.get("test"));
    // removed postings ?
    TermEnum te = tfr.terms();
    while (te.next()) {
      assertFalse("test".equals(te.term().field()));
    }
    // but vectors should be present !
    TermFreqVector tv = tfr.getTermFreqVector(4, "test");
    assertNotNull(tv);
    assertEquals(4, tv.getTerms().length); // term "four" not deleted yet from TermEnum
    // verify new reader
    IndexWriter iw = new IndexWriter(targetDir, new WhitespaceAnalyzer(), /*MaxFieldLength.LIMITED*/ true);
    iw.addIndexes(new IndexReader[]{tfr});
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.