Examples of TFIDFSimilarity


Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

    Object encNorm = find(dialog, "encNorm");
    Object doc = find(dialog, "docNum");
    Object fld = find(dialog, "fld");
    Object srchOpts = find("srchOptTabs");
    Similarity sim = createSimilarity(srchOpts);
    TFIDFSimilarity s = null;
    if (sim != null && (sim instanceof TFIDFSimilarity)) {
      s = (TFIDFSimilarity)sim;
    } else {
      s = defaultSimilarity;
    }
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

 
  public void displayNewNorm(Object dialog) {
    Object newNorm = find(dialog, "newNorm");
    Object encNorm = find(dialog, "encNorm");
    Field f = (Field)getProperty(dialog, "field");
    TFIDFSimilarity s = (TFIDFSimilarity)getProperty(dialog, "similarity");
    Object sim = find(dialog, "sim");
    String simClassString = getString(sim, "text");
    if (simClassString != null && simClassString.trim().length() > 0
            && !simClassString.equals(defaultSimilarity.getClass().getName())) {
      try {
        Class cls = Class.forName(simClassString.trim());
        if (TFIDFSimilarity.class.isAssignableFrom(cls)) {
          s = (TFIDFSimilarity)cls.newInstance();
          putProperty(dialog, "similarity", s);
        }
      } catch (Throwable t) {
        t.printStackTrace();
        showStatus("Invalid similarity class " + simClassString + ", using DefaultSimilarity.");
      }
    }
    if (s == null) {
      s = defaultSimilarity;
    }
    setString(sim, "text", s.getClass().getName());
    try {
      float newFVal = Float.parseFloat(getString(newNorm, "text"));
      long newLVal = Util.encodeNormValue(newFVal, f.name(), s);
      float encFVal = Util.decodeNormValue(newLVal, f.name(), s);
      setString(encNorm, "text", String.valueOf(encFVal) +
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  }

  @Override
  public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher)context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
    }
    final NumericDocValues norms = readerContext.reader().getNormValues(field);

    if (norms == null) {
      return new ConstDoubleDocValues(0.0, this);
    }
   
    return new FloatDocValues(this) {
      @Override
      public float floatVal(int doc) {
        return similarity.decodeNormValue((byte)norms.get(doc));
      }
    };
  }
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  @Override
  public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    Fields fields = readerContext.reader().fields();
    final Terms terms = fields.terms(indexedField);
    IndexSearcher searcher = (IndexSearcher)context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField);
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
    }

    return new FloatDocValues(this) {
      DocsEnum docs ;
      int atDoc;
      int lastDocRequested = -1;

      { reset(); }

      public void reset() throws IOException {
        // no one should call us for deleted docs?
       
        if (terms != null) {
          final TermsEnum termsEnum = terms.iterator(null);
          if (termsEnum.seekExact(indexedBytes, false)) {
            docs = termsEnum.docs(null, null);
          } else {
            docs = null;
          }
        } else {
          docs = null;
        }

        if (docs == null) {
          docs = new DocsEnum() {
            @Override
            public int freq() {
              return 0;
            }

            @Override
            public int docID() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int nextDoc() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int advance(int target) {
              return DocIdSetIterator.NO_MORE_DOCS;
            }
          };
        }
        atDoc = -1;
      }

      @Override
      public float floatVal(int doc) {
        try {
          if (doc < lastDocRequested) {
            // out-of-order access.... reset
            reset();
          }
          lastDocRequested = doc;

          if (atDoc < doc) {
            atDoc = docs.advance(doc);
          }

          if (atDoc > doc) {
            // term doesn't match this document... either because we hit the
            // end, or because the next doc is after this doc.
            return similarity.tf(0);
          }

          // a match!
          return similarity.tf(docs.freq());
        } catch (IOException e) {
          throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
        }
      }
    };
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  @Override
  public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    Fields fields = readerContext.reader().fields();
    final Terms terms = fields.terms(indexedField);
    IndexSearcher searcher = (IndexSearcher)context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField);
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
    }

    return new FloatDocValues(this) {
      DocsEnum docs ;
      int atDoc;
      int lastDocRequested = -1;

      { reset(); }

      public void reset() throws IOException {
        // no one should call us for deleted docs?
       
        if (terms != null) {
          final TermsEnum termsEnum = terms.iterator(null);
          if (termsEnum.seekExact(indexedBytes, false)) {
            docs = termsEnum.docs(null, null);
          } else {
            docs = null;
          }
        } else {
          docs = null;
        }

        if (docs == null) {
          docs = new DocsEnum() {
            @Override
            public int freq() {
              return 0;
            }

            @Override
            public int docID() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int nextDoc() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int advance(int target) {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public long cost() {
              return 0;
            }
          };
        }
        atDoc = -1;
      }

      @Override
      public float floatVal(int doc) {
        try {
          if (doc < lastDocRequested) {
            // out-of-order access.... reset
            reset();
          }
          lastDocRequested = doc;

          if (atDoc < doc) {
            atDoc = docs.advance(doc);
          }

          if (atDoc > doc) {
            // term doesn't match this document... either because we hit the
            // end, or because the next doc is after this doc.
            return similarity.tf(0);
          }

          // a match!
          return similarity.tf(docs.freq());
        } catch (IOException e) {
          throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
        }
      }
    };
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  }

  @Override
  public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    IndexSearcher searcher = (IndexSearcher)context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
    }
    final NumericDocValues norms = readerContext.reader().getNormValues(field);

    if (norms == null) {
      return new ConstDoubleDocValues(0.0, this);
    }
   
    return new FloatDocValues(this) {
      @Override
      public float floatVal(int doc) {
        return similarity.decodeNormValue((byte)norms.get(doc));
      }
    };
  }
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  @Override
  public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    Fields fields = readerContext.reader().fields();
    final Terms terms = fields.terms(indexedField);
    IndexSearcher searcher = (IndexSearcher)context.get("searcher");
    final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField);
    if (similarity == null) {
      throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
    }

    return new FloatDocValues(this) {
      DocsEnum docs ;
      int atDoc;
      int lastDocRequested = -1;

      { reset(); }

      public void reset() throws IOException {
        // no one should call us for deleted docs?
       
        if (terms != null) {
          final TermsEnum termsEnum = terms.iterator(null);
          if (termsEnum.seekExact(indexedBytes, false)) {
            docs = termsEnum.docs(null, null);
          } else {
            docs = null;
          }
        } else {
          docs = null;
        }

        if (docs == null) {
          docs = new DocsEnum() {
            @Override
            public int freq() {
              return 0;
            }

            @Override
            public int docID() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int nextDoc() {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int advance(int target) {
              return DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public long cost() {
              return 0;
            }
          };
        }
        atDoc = -1;
      }

      @Override
      public float floatVal(int doc) {
        try {
          if (doc < lastDocRequested) {
            // out-of-order access.... reset
            reset();
          }
          lastDocRequested = doc;

          if (atDoc < doc) {
            atDoc = docs.advance(doc);
          }

          if (atDoc > doc) {
            // term doesn't match this document... either because we hit the
            // end, or because the next doc is after this doc.
            return similarity.tf(0);
          }

          // a match!
          return similarity.tf(docs.freq());
        } catch (IOException e) {
          throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
        }
      }
    };
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  public void testSweetSpotTf() {
 
    SweetSpotSimilarity ss = new SweetSpotSimilarity();

    TFIDFSimilarity d = new DefaultSimilarity();
    TFIDFSimilarity s = ss;
   
    // tf equal

    ss.setBaselineTfFactors(0.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertEquals("tf: i="+i,
                   d.tf(i), s.tf(i), 0.0f);
    }

    // tf higher
 
    ss.setBaselineTfFactors(1.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertTrue("tf: i="+i+" : d="+d.tf(i)+
                 " < s="+s.tf(i),
                 d.tf(i) < s.tf(i));
    }

    // tf flat
 
    ss.setBaselineTfFactors(1.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
    }
    ss.setBaselineTfFactors(2.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
    }
    for (int i = 6; i <=1000; i++) {
      assertTrue("tf: i="+i+" : s="+s.tf(i)+
                 " < d="+d.tf(i),
                 s.tf(i) < d.tf(i));
    }

    // stupidity
    assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  }
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

          return hyperbolicTf(freq);
        }
      };
    ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
   
    TFIDFSimilarity s = ss;

    for (int i = 1; i <=1000; i++) {
      assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
                 3.3f <= s.tf(i));
      assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
                 s.tf(i) <= 7.7f);
    }
    assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
   
    // stupidity
    assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
   
  }
View Full Code Here

Examples of org.apache.lucene.search.similarities.TFIDFSimilarity

  public void testSweetSpotTf() {
 
    SweetSpotSimilarity ss = new SweetSpotSimilarity();

    TFIDFSimilarity d = new DefaultSimilarity();
    TFIDFSimilarity s = ss;
   
    // tf equal

    ss.setBaselineTfFactors(0.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertEquals("tf: i="+i,
                   d.tf(i), s.tf(i), 0.0f);
    }

    // tf higher
 
    ss.setBaselineTfFactors(1.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertTrue("tf: i="+i+" : d="+d.tf(i)+
                 " < s="+s.tf(i),
                 d.tf(i) < s.tf(i));
    }

    // tf flat
 
    ss.setBaselineTfFactors(1.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
    }
    ss.setBaselineTfFactors(2.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
    }
    for (int i = 6; i <=1000; i++) {
      assertTrue("tf: i="+i+" : s="+s.tf(i)+
                 " < d="+d.tf(i),
                 s.tf(i) < d.tf(i));
    }

    // stupidity
    assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.