Package com.sohospace.lucene.analysis.xanalyzer

Source Code of com.sohospace.lucene.analysis.xanalyzer.BoldFormatter

package com.sohospace.lucene.analysis.xanalyzer;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.TokenGroup;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class TestHighLight {
  private static String FIELD_NAME = "content";
  private static String CONTENT = "Since development first began on " +
      "Spring in 2003, there's been a constant buzz about it in " +
      "Java development publications and corporate IT departments." +
      " The reason is clear: Spring is a lightweight Java framework " +
      "in a world of complex heavyweight architectures that take " +
      "forever to implement. Spring is like a breath of fresh air " +
      "to overworked developers. In Spring, you can make an object " +
      "secure, remote, or transactional, with a couple of lines of " +
      "configuration instead of embedded code. The resulting application" +
      " is simple and clean. In Spring, you can work less and go home " +
      "early, because you can strip away a whole lot of the redundant " +
      "code that you tend to see in most J2EE applications. You won't" +
      " be nearly as burdened with meaningless detail. In Spring, you" +
      " can change your mind without the consequences bleeding through " +
      "your entire application. You'll adapt much more quickly than you " +
      "ever could before. Spring: A Developer's Notebook offers a quick " +
      "dive into the new Spring framework, designed to let you get " +
      "hands-on as quickly as you like. If you don't want to bother with" +
      " a lot of theory, this book is definitely for you. You'll work " +
      "through one example after another. Along the way, you'll discover " +
      "the energy and promise of the Spring framework. This practical guide " +
      "features ten code-intensive labs that'll rapidly get you up to speed." +
      " You'll learn how to do the following, and more: install the Spring" +
      " Framework set up the development environment use Spring with other " +
      "open source Java tools such as Tomcat, Struts, and Hibernate master " +
      "AOP and transactions utilize ORM solutions As with all titles in the" +
      " Developer's Notebook series, this no-nonsense book skips all the " +
      "boring prose and cuts right to the chase. It's an approach that " +
      "forces you to get your hands dirty by working through one instructional" +
      " example after another-examples that speak to you instead of at you. ";
  private static String QUERY = "tomcat";
  /**
   * @param args
   */
  public static void main(String[] args) {

    Directory ramDir = new RAMDirectory();
    try {
      IndexWriter writer = new IndexWriter(ramDir, /*new StandardAnalyzer()/*/XFactory.getWriterAnalyzer());
      Document doc = new Document();
      Field fd = new Field(FIELD_NAME, CONTENT, Field.Store.YES, Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
      doc.add(fd);
      writer.addDocument(doc);
      writer.optimize();
      writer.close();
     
      IndexReader reader = IndexReader.open(ramDir);
      String queryString = QUERY;
      QueryParser parser=new QueryParser(FIELD_NAME, /*new StandardAnalyzer()/*/XFactory.getWriterAnalyzer());
      Query query = parser.parse(queryString);
      System.out.println(query);
      Searcher searcher = new IndexSearcher(ramDir);
      query = query.rewrite(reader);
      System.out.println(query);
      System.out.println("Searching for: " + query.toString(FIELD_NAME));
      Hits hits = searcher.search(query);
     
      BoldFormatter formatter = new BoldFormatter();
      Highlighter highlighter =new Highlighter(formatter,new QueryScorer(query));
      highlighter.setTextFragmenter(new SimpleFragmenter(50));
      for (int i = 0; i < hits.length(); i++)
      {
        String text = hits.doc(i).get(FIELD_NAME);
        int maxNumFragmentsRequired = 5;
        String fragmentSeparator = "...";
        TermPositionVector tpv = (TermPositionVector)reader.getTermFreqVector(hits.id(i),FIELD_NAME);
        TokenStream tokenStream=TokenSources.getTokenStream(tpv);
        /*
        TokenStream tokenStream2=
          (new StandardAnalyzer())
          //XFactory.getWriterAnalyzer()
            .tokenStream(FIELD_NAME,new StringReader(text));
       
        do {
          Token t = tokenStream2.next();
          if(t==null)break;
          System.out.println("\t" + t.startOffset() + "," + t.endOffset() + "\t" + t.termText());
        }while(true);
        */
        String result =
          highlighter.getBestFragments(
            tokenStream, text, maxNumFragmentsRequired, fragmentSeparator);
        System.out.println("\n" + result);
      }
      reader.close();
    }catch(Exception e) {
      e.printStackTrace();
    }
  }
}

class BoldFormatter implements Formatter {
  public String highlightTerm(String originalText , TokenGroup group)
  {
    if(group.getTotalScore()<=0)
    {
      return originalText;
    }
    return "<b>" + originalText + "</b>";
  }
}

TOP

Related Classes of com.sohospace.lucene.analysis.xanalyzer.BoldFormatter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.