Package org.apache.lucene.analysis.standard

Examples of org.apache.lucene.analysis.standard.StandardAnalyzer.tokenStream()


        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.totalHits; i++) {
          String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream, HighlighterTest.this, false);

          highlighter.setTextFragmenter(new SimpleFragmenter(40));

          String highlightedText = highlighter.getBestFragments(tokenStream, text,
View Full Code Here


   
    String country = getCountry(categories);
   
    if(!country.equals("Unknown")){
      document = StringEscapeUtils.unescapeHtml(document.replaceFirst("<text xml:space=\"preserve\">", "").replaceAll("</text>", ""));
      TokenStream stream = analyzer.tokenStream(country, new StringReader(document));
      while(true){
        Token token = stream.next();
        if(token==null) break;
        contents.append(token.termBuffer(), 0, token.termLength()).append(' ');
      }
View Full Code Here

            throw new IllegalArgumentException
              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          try {
            for (Token t = stream.next(); t != null; t = stream.next()) {
              position += (t.getPositionIncrement() - 1);
              position++;
              String name = t.termText();
View Full Code Here

    int maxNumFragmentsRequired = 3;

    for (int i = 0; i < hits.length(); i++)
    {
      String text = hits.doc(i).get(FIELD_NAME);
      TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));

      String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"...");
      System.out.println(highlightedText);
    }
    //We expect to have zero highlights if the query is multi-terms and is not rewritten!
View Full Code Here

    int maxNumFragmentsRequired = 3;

    for (int i = 0; i < hits.length(); i++)
    {
      String text = hits.doc(i).get(FIELD_NAME);
      TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));

      String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"...");
      System.out.println(highlightedText);
    }
    //We expect to have zero highlights if the query is multi-terms and is not rewritten!
View Full Code Here

            throw new IllegalArgumentException
              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          try {
            for (Token t = stream.next(); t != null; t = stream.next()) {
              position += (t.getPositionIncrement() - 1);
              position++;
              String name = t.termText();
View Full Code Here

    int maxNumFragmentsRequired = 3;

    for (int i = 0; i < hits.length(); i++)
    {
      String text = hits.doc(i).get(FIELD_NAME);
      TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));

      String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"...");
      System.out.println(highlightedText);
    }
    //We expect to have zero highlights if the query is multi-terms and is not rewritten!
View Full Code Here

    int maxNumFragmentsRequired = 3;

    for (int i = 0; i < hits.length(); i++)
    {
      String text = hits.doc(i).get(FIELD_NAME);
      TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));

      String highlightedText = highlighter.getBestFragments(tokenStream,text,maxNumFragmentsRequired,"...");
      System.out.println(highlightedText);
    }
    //We expect to have zero highlights if the query is multi-terms and is not rewritten!
View Full Code Here

    public TokenStream tokenStream(String fieldName, Reader reader) {
      StandardAnalyzer analyzer = new StandardAnalyzer(VERSION);
//      Set<String> stopWords = new HashSet<String>();
//      stopWords.add("gt");
//      stopWords.add("lt");
      LengthFilter lengthFilter = new LengthFilter(true, analyzer.tokenStream(fieldName, reader), 3, 1000);
      return lengthFilter;
    }

    @Override
    public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
View Full Code Here

        int maxNumFragmentsRequired = 3;

        for (int i = 0; i < hits.length(); i++) {
          String text = hits.doc(i).get(FIELD_NAME);
          TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
          Highlighter highlighter = getHighlighter(query, FIELD_NAME, tokenStream,
              HighlighterTest.this);
          highlighter.setTextFragmenter(new SimpleFragmenter(40));
          String highlightedText = highlighter.getBestFragments(tokenStream, text,
              maxNumFragmentsRequired, "...");
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.