Package org.apache.lucene.search.similar

Examples of org.apache.lucene.search.similar.MoreLikeThis


    if (nGramSize > 1) { //<co id="mlt.ngramsetup"/>
      analyzer = new ShingleAnalyzerWrapper(analyzer, nGramSize,
              nGramSize);
    }
   
    MoreLikeThis moreLikeThis  = new MoreLikeThis(indexReader); //<co id="mlt.configure"/>
    moreLikeThis.setAnalyzer(analyzer);
    moreLikeThis.setFieldNames(new String[] {
      "content"
    });
   
/*<calloutlist>
<callout arearefs="mlt.indexsetup">Open Index</callout>
<callout arearefs="mlt.analyzersetup">Setup Analyzer</callout>
<callout arearefs="mlt.ngramsetup">Setup NGrams</callout>
<callout arearefs="mlt.configure">Create <classname>MoreLikeThis</classname></callout>
</calloutlist>*/
    //<end id="lucene.examples.mlt.setup"/>
   
    // for testing against the same corpus
    moreLikeThis.setMinTermFreq(1);
    moreLikeThis.setMinDocFreq(1);
   
    //<start id="lucene.examples.mlt.query"/>
    Reader reader = new FileReader(inputPath); //<co id="mlt.query"/>
    Query query = moreLikeThis.like(reader);

    TopDocs results
      = indexSearcher.search(query, maxResults); //<co id="mlt.search"/>
   
    HashMap<String, CategoryHits> categoryHash
View Full Code Here


      if( fields.length < 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "MoreLikeThis requires at least one similarity field: "+MoreLikeThisParams.SIMILARITY_FIELDS );
      }
     
      this.mlt = new MoreLikeThis( reader ); // TODO -- after LUCENE-896, we can use , searcher.getSimilarity() );
      mlt.setFieldNames(fields);
      mlt.setAnalyzer( searcher.getSchema().getAnalyzer() );
     
      // configurable params
      mlt.setMinTermFreq(       params.getInt(MoreLikeThisParams.MIN_TERM_FREQ,         MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
View Full Code Here

  int maxResults = 10;
 
  public MoreLikeThisCategorizer(IndexReader indexReader, String categoryFieldName) throws IOException {
    this.indexReader   = indexReader;
    this.indexSearcher = new IndexSearcher(indexReader);
    this.moreLikeThis  = new MoreLikeThis(indexReader);
    this.categoryFieldName = categoryFieldName;
    loadCategoriesFromIndex();
  }
View Full Code Here

      if( fields.length < 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "MoreLikeThis requires at least one similarity field: "+MoreLikeThisParams.SIMILARITY_FIELDS );
      }
     
      this.mlt = new MoreLikeThis( reader ); // TODO -- after LUCENE-896, we can use , searcher.getSimilarity() );
      mlt.setFieldNames(fields);
      mlt.setAnalyzer( searcher.getSchema().getAnalyzer() );
     
      // configurable params
      mlt.setMinTermFreq(       params.getInt(MoreLikeThisParams.MIN_TERM_FREQ,         MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
View Full Code Here

    this.fieldName = fieldName;
  }
 
  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    MoreLikeThis mlt = new MoreLikeThis(reader);
   
    mlt.setFieldNames(moreLikeFields);
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    if (minDocFreq >= 0) {
      mlt.setMinDocFreq(minDocFreq);
    }
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setStopWords(stopWords);
    BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName);
    BooleanClause[] clauses = bq.getClauses();
    // make at least half the terms match
    bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
    return bq;
  }
View Full Code Here

      if( fields.length < 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "MoreLikeThis requires at least one similarity field: "+MoreLikeThisParams.SIMILARITY_FIELDS );
      }
     
      this.mlt = new MoreLikeThis( reader ); // TODO -- after LUCENE-896, we can use , searcher.getSimilarity() );
      mlt.setFieldNames(fields);
      mlt.setAnalyzer( searcher.getSchema().getAnalyzer() );
     
      // configurable params
      mlt.setMinTermFreq(       params.getInt(MoreLikeThisParams.MIN_TERM_FREQ,         MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
View Full Code Here

        this.moreLikeFields = moreLikeFields;
        this.analyzer = analyzer;
    }

    @Override public Query rewrite(IndexReader reader) throws IOException {
        MoreLikeThis mlt = new MoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);

        mlt.setFieldNames(moreLikeFields);
        mlt.setAnalyzer(analyzer);
        mlt.setMinTermFreq(minTermFrequency);
        mlt.setMinDocFreq(minDocFreq);
        mlt.setMaxDocFreq(maxDocFreq);
        mlt.setMaxQueryTerms(maxQueryTerms);
        mlt.setMinWordLen(minWordLen);
        mlt.setMaxWordLen(maxWordLen);
        mlt.setStopWords(stopWords);
        mlt.setBoost(boostTerms);
        mlt.setBoostFactor(boostTermsFactor);
        BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText));
        BooleanClause[] clauses = bq.getClauses();

        bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
        return bq;
    }
View Full Code Here

      if( fields.length < 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "MoreLikeThis requires at least one similarity field: "+MoreLikeThisParams.SIMILARITY_FIELDS );
      }
     
      this.mlt = new MoreLikeThis( reader ); // TODO -- after LUCENE-896, we can use , searcher.getSimilarity() );
      mlt.setFieldNames(fields);
      mlt.setAnalyzer( searcher.getSchema().getAnalyzer() );
     
      // configurable params
      mlt.setMinTermFreq(       params.getInt(MoreLikeThisParams.MIN_TERM_FREQ,         MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
View Full Code Here

        this.analyzer=analyzer;
    }
   
    public Query rewrite(IndexReader reader) throws IOException
    {
        MoreLikeThis mlt=new MoreLikeThis(reader);
       
        mlt.setFieldNames(moreLikeFields);
        mlt.setAnalyzer(analyzer);
        mlt.setMinTermFreq(minTermFrequency);
        mlt.setMaxQueryTerms(maxQueryTerms);
        mlt.setStopWords(stopWords);
        BooleanQuery bq= (BooleanQuery) mlt.like(new ByteArrayInputStream(likeText.getBytes()));       
        BooleanClause[] clauses = bq.getClauses();
        //make at least half the terms match
        bq.setMinimumNumberShouldMatch((int)(clauses.length*percentTermsToMatch));
        return bq;
    }
View Full Code Here

        this.analyzer=analyzer;
    }
   
    public Query rewrite(IndexReader reader) throws IOException
    {
        MoreLikeThis mlt=new MoreLikeThis(reader);
       
        mlt.setFieldNames(moreLikeFields);
        mlt.setAnalyzer(analyzer);
        mlt.setMinTermFreq(minTermFrequency);
        if(minDocFreq>=0)
        {
          mlt.setMinDocFreq(minDocFreq);
        }       
        mlt.setMaxQueryTerms(maxQueryTerms);
        mlt.setStopWords(stopWords);
        BooleanQuery bq= (BooleanQuery) mlt.like(new ByteArrayInputStream(likeText.getBytes()));       
        BooleanClause[] clauses = bq.getClauses();
        //make at least half the terms match
        bq.setMinimumNumberShouldMatch((int)(clauses.length*percentTermsToMatch));
        return bq;
    }
View Full Code Here

TOP

Related Classes of org.apache.lucene.search.similar.MoreLikeThis

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.