Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer


    return new IndexSearcher(pr);
  }

  private Directory getDir1() throws IOException {
    Directory dir1 = new MockRAMDirectory();
    IndexWriter w1 = new IndexWriter(dir1, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
    Document d1 = new Document();
    d1.add(new Field("f1", "v1", Field.Store.YES, Field.Index.ANALYZED));
    d1.add(new Field("f2", "v1", Field.Store.YES, Field.Index.ANALYZED));
    w1.addDocument(d1);
    Document d2 = new Document();
View Full Code Here


    return dir1;
  }

  private Directory getDir2() throws IOException {
    Directory dir2 = new RAMDirectory();
    IndexWriter w2 = new IndexWriter(dir2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
    Document d3 = new Document();
    d3.add(new Field("f3", "v1", Field.Store.YES, Field.Index.ANALYZED));
    d3.add(new Field("f4", "v1", Field.Store.YES, Field.Index.ANALYZED));
    w2.addDocument(d3);
    Document d4 = new Document();
View Full Code Here

          sb.append(" ");
          // only one stopword
          sb.append(stopWords.iterator().next());
        }
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION, stopWords).tokenStream(
            "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
        // new
        // QueryTermScorer(query));
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(100);
        match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());

        // add another tokenized word to the overrall length - but set way
        // beyond
        // the length of text under consideration (after a large slug of stop
        // words
        // + whitespace)
        sb.append(" ");
        sb.append(goodWord);
        match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());
      }
    };
View Full Code Here

        stopWords.add("it");
        TermQuery query = new TermQuery(new Term("text", "searchterm"));

        String text = "this is a text with searchterm in it";
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
        Highlighter hg = getHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION,
            stopWords).tokenStream("text", new StringReader(text)), fm);
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(36);
        String match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION, stopWords), "text", text);
        assertTrue(
            "Matched text should contain remainder of text after highlighted query ",
            match.endsWith("in it"));
      }
    };
View Full Code Here

      @Override
      public void run() throws Exception {
        numHighlights = 0;
        // test to show how rewritten query can still be used
        searcher = new IndexSearcher(ramDir, true);
        Analyzer analyzer = new StandardAnalyzer(TEST_VERSION);

        QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, analyzer);
        Query query = parser.parse("JF? or Kenned*");
        System.out.println("Searching with primitive query");
        // forget to set this and...
        // query=query.rewrite(reader);
        TopDocs hits = searcher.search(query, null, 1000);

        // create an instance of the highlighter with the tags used to surround
        // highlighted text
        // QueryHighlightExtractor highlighter = new
        // QueryHighlightExtractor(this,
        // query, new StandardAnalyzer(TEST_VERSION));

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here

  }

  public void testMultiSearcher() throws Exception {
    // setup index 1
    RAMDirectory ramDir1 = new RAMDirectory();
    IndexWriter writer1 = new IndexWriter(ramDir1, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED);
    Document d = new Document();
    Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer1.addDocument(d);
    writer1.optimize();
    writer1.close();
    IndexReader reader1 = IndexReader.open(ramDir1, true);

    // setup index 2
    RAMDirectory ramDir2 = new RAMDirectory();
    IndexWriter writer2 = new IndexWriter(ramDir2, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED);
    d = new Document();
    f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED);
    d.add(f);
    writer2.addDocument(d);
    writer2.optimize();
    writer2.close();
    IndexReader reader2 = IndexReader.open(ramDir2, true);

    IndexSearcher searchers[] = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(ramDir1, true);
    searchers[1] = new IndexSearcher(ramDir2, true);
    MultiSearcher multiSearcher = new MultiSearcher(searchers);
    QueryParser parser = new QueryParser(TEST_VERSION, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
    parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
    query = parser.parse("multi*");
    System.out.println("Searching for: " + query.toString(FIELD_NAME));
    // at this point the multisearcher calls combine(query[])
    hits = multiSearcher.search(query, null, 1000);
View Full Code Here

  @Override
  protected void setUp() throws Exception {
    super.setUp();
    ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, new StandardAnalyzer(TEST_VERSION), true, IndexWriter.MaxFieldLength.UNLIMITED);
    for (int i = 0; i < texts.length; i++) {
      addDoc(writer, texts[i]);
    }
    Document doc = new Document();
    NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
View Full Code Here

*/
public class TestDemo extends LuceneTestCase {

  public void testDemo() throws IOException, ParseException {

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.open("/tmp/testindex");
View Full Code Here

    IndexSearcher searcher = new IndexSearcher(directory, true);

    String query = args[1];
    String field = "contents";

    Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field, 0.9f);
    System.out.println( "Query: " + q.toString( field));



    searcher.close();
View Full Code Here

    throws IOException
  {
    final Set<String> already = new HashSet<String>(); // avoid dups
    List<String> top = new LinkedList<String>(); // needs to be separately listed..
    final String field = ( f == null) ? "contents" : f;
    if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);

    // [1] Parse query into separate words so that when we expand we can avoid dups
    TokenStream ts = a.tokenStream( field, new StringReader( query));
    TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
   
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.standard.StandardAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.