Package org.apache.lucene.search

Examples of org.apache.lucene.search.Similarity


    private Similarity cachedSimilarity;
   
    @Override
    public byte[] norms(String fieldName) {
      byte[] norms = cachedNorms;
      Similarity sim = getSimilarity();
      if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
        Info info = getInfo(fieldName);
        int numTokens = info != null ? info.numTokens : 0;
        int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
        float boost = info != null ? info.getBoost() : 1.0f;
        FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
        float n = sim.computeNorm(fieldName, invertState);
        byte norm = Similarity.encodeNorm(n);
        norms = new byte[] {norm};
       
        // cache it for future reuse
        cachedNorms = norms;
View Full Code Here


  public void testSpanScorerZeroSloppyFreq() throws Exception {
    boolean ordered = true;
    int slop = 1;

    final Similarity sim = new DefaultSimilarity() {
      @Override
      public float sloppyFreq(int distance) {
        return 0.0f;
      }
    };
View Full Code Here

  public void testSweetSpotLengthNorm() {
 
    SweetSpotSimilarity ss = new SweetSpotSimilarity();
    ss.setLengthNormFactors(1,1,0.5f);

    Similarity d = new DefaultSimilarity();
    Similarity s = ss;


    // base case, should degrade
 
    for (int i = 1; i < 1000; i++) {
      assertEquals("base case: i="+i,
                   d.lengthNorm("foo",i), s.lengthNorm("foo",i),
                   0.0f);
    }

    // make a sweet spot
 
    ss.setLengthNormFactors(3,10,0.5f);
 
    for (int i = 3; i <=10; i++) {
      assertEquals("3,10: spot i="+i,
                   1.0f, s.lengthNorm("foo",i),
                   0.0f);
    }
 
    for (int i = 10; i < 1000; i++) {
      assertEquals("3,10: 10<x : i="+i,
                   d.lengthNorm("foo",i-9), s.lengthNorm("foo",i),
                   0.0f);
    }


    // seperate sweet spot for certain fields

    ss.setLengthNormFactors("bar",8,13, 0.5f, false);
    ss.setLengthNormFactors("yak",6,9, 0.5f, false);

 
    for (int i = 3; i <=10; i++) {
      assertEquals("f: 3,10: spot i="+i,
                   1.0f, s.lengthNorm("foo",i),
                   0.0f);
    }
    for (int i = 10; i < 1000; i++) {
      assertEquals("f: 3,10: 10<x : i="+i,
                   d.lengthNorm("foo",i-9), s.lengthNorm("foo",i),
                   0.0f);
    }
    for (int i = 8; i <=13; i++) {
      assertEquals("f: 8,13: spot i="+i,
                   1.0f, s.lengthNorm("bar",i),
                   0.0f);
    }
    for (int i = 6; i <=9; i++) {
      assertEquals("f: 6,9: spot i="+i,
                   1.0f, s.lengthNorm("yak",i),
                   0.0f);
    }
    for (int i = 13; i < 1000; i++) {
      assertEquals("f: 8,13: 13<x : i="+i,
                   d.lengthNorm("foo",i-12), s.lengthNorm("bar",i),
                   0.0f);
    }
    for (int i = 9; i < 1000; i++) {
      assertEquals("f: 6,9: 9<x : i="+i,
                   d.lengthNorm("foo",i-8), s.lengthNorm("yak",i),
                   0.0f);
    }


    // steepness

    ss.setLengthNormFactors("a",5,8,0.5f, false);
    ss.setLengthNormFactors("b",5,8,0.1f, false);

    for (int i = 9; i < 1000; i++) {
      assertTrue("s: i="+i+" : a="+ss.lengthNorm("a",i)+
                 " < b="+ss.lengthNorm("b",i),
                 ss.lengthNorm("a",i) < s.lengthNorm("b",i));
    }

  }
View Full Code Here

  public void testSweetSpotTf() {
 
    SweetSpotSimilarity ss = new SweetSpotSimilarity();

    Similarity d = new DefaultSimilarity();
    Similarity s = ss;
   
    // tf equal

    ss.setBaselineTfFactors(0.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertEquals("tf: i="+i,
                   d.tf(i), s.tf(i), 0.0f);
    }

    // tf higher
 
    ss.setBaselineTfFactors(1.0f, 0.0f);
 
    for (int i = 1; i < 1000; i++) {
      assertTrue("tf: i="+i+" : d="+d.tf(i)+
                 " < s="+s.tf(i),
                 d.tf(i) < s.tf(i));
    }

    // tf flat
 
    ss.setBaselineTfFactors(1.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
    }
    ss.setBaselineTfFactors(2.0f, 6.0f);
    for (int i = 1; i <=6; i++) {
      assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
    }
    for (int i = 6; i <=1000; i++) {
      assertTrue("tf: i="+i+" : s="+s.tf(i)+
                 " < d="+d.tf(i),
                 s.tf(i) < d.tf(i));
    }

    // stupidity
    assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  }
View Full Code Here

          return hyperbolicTf(freq);
        }
      };
    ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
   
    Similarity s = ss;

    for (int i = 1; i <=1000; i++) {
      assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
                 3.3f <= s.tf(i));
      assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
                 s.tf(i) <= 7.7f);
    }
    assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
   
    // stupidity
    assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
   
  }
View Full Code Here

      //System.out.println(msg);
      lastScore = scores[i];
  }

  // override the norms to be inverted
  Similarity s = new DefaultSimilarity() {
    @Override
    public float lengthNorm(String fieldName, int numTokens) {
        return numTokens;
    }
      };
View Full Code Here

        //System.err.println("total hits: " + results.totalHits);

        //set similarity to use only the frequencies
        //score is based on frequency of phrase only
        searcher.setSimilarity(
                new Similarity() {
                   public static final long serialVersionUID = 1L;
                   public float coord(int overlap, int maxOverlap) {
                      return 1;
                   }
                   public float queryNorm(float sumOfSquaredWeights) {
View Full Code Here

  private IndexSearcher buildSearcher(SearchFactoryImplementor searchFactoryImplementor) {
    Map<Class<?>, DocumentBuilder<?>> builders = searchFactoryImplementor.getDocumentBuilders();
    List<DirectoryProvider> directories = new ArrayList<DirectoryProvider>();
    Set<String> idFieldNames = new HashSet<String>();

    Similarity searcherSimilarity = null;
    //TODO check if caching this work for the last n list of classes makes a perf boost
    if ( classes == null || classes.length == 0 ) {
      // empty classes array means search over all indexed enities,
      // but we have to make sure there is at least one
      if ( builders.isEmpty() ) {
View Full Code Here

  }

  public void performWork(LuceneWork work, IndexWriter writer) {
    DocumentBuilder documentBuilder = workspace.getDocumentBuilder( work.getEntityClass() );
    Analyzer analyzer = documentBuilder.getAnalyzer();
    Similarity similarity = documentBuilder.getSimilarity();
    if ( log.isTraceEnabled() ) {
      log.trace(
          "add to Lucene index: {}#{}:{}",
          new Object[] { work.getEntityClass(), work.getId(), work.getDocument() }
      );
View Full Code Here

    private Similarity cachedSimilarity;
   
    @Override
    public byte[] norms(String fieldName) {
      byte[] norms = cachedNorms;
      Similarity sim = getSimilarity();
      if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached?
        Info info = getInfo(fieldName);
        int numTokens = info != null ? info.numTokens : 0;
        int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
        float boost = info != null ? info.getBoost() : 1.0f;
        FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
        float n = sim.computeNorm(fieldName, invertState);
        byte norm = sim.encodeNormValue(n);
        norms = new byte[] {norm};
       
        // cache it for future reuse
        cachedNorms = norms;
        cachedFieldName = fieldName;
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.Similarity

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.