Examples of TokenStream


Examples of org.apache.lucene.analysis.TokenStream

   */
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {

    /* Split at delim Char */
    TokenStream result = new CharTokenizer(reader) {

      @Override
      protected boolean isTokenChar(char c) {
        return c != fDelim;
      }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

     * @see org.apache.lucene.analysis.KeywordAnalyzer#tokenStream(java.lang.String,
     * java.io.Reader)
     */
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = super.tokenStream(fieldName, reader);
      result = new LowerCaseFilter(result);

      return result;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

  private BooleanClause createAllNewsFieldsClause(ISearchCondition condition, boolean matchAllConditions) throws IOException {
    BooleanQuery allFieldsQuery = new BooleanQuery();
    String value = String.valueOf(condition.getValue());

    LowercaseWhitespaceAnalyzer analyzer = new LowercaseWhitespaceAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
    Token token = null;
    while ((token = tokenStream.next()) != null) {
      String termText = token.termText();

      /* Contained in Title */
      WildcardQuery titleQuery = new WildcardQuery(new Term(String.valueOf(INews.TITLE), termText));
      allFieldsQuery.add(new BooleanClause(titleQuery, Occur.SHOULD));
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

        /* Fuzzy Query */
      case SIMILIAR_TO: {
        BooleanQuery similarityQuery = new BooleanQuery();

        LowercaseWhitespaceAnalyzer analyzer = new LowercaseWhitespaceAnalyzer();
        TokenStream tokenStream = analyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
        Token token = null;
        while ((token = tokenStream.next()) != null) {
          Term term = new Term(fieldname, token.termText());
          similarityQuery.add(new BooleanClause(new FuzzyQuery(term), Occur.MUST));
        }

        return similarityQuery;
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

        /* Fuzzy Query */
      case SIMILIAR_TO: {
        BooleanQuery similarityQuery = new BooleanQuery();

        LowercaseWhitespaceAnalyzer similarAnalyzer = new LowercaseWhitespaceAnalyzer();
        TokenStream tokenStream = similarAnalyzer.tokenStream(String.valueOf(IEntity.ALL_FIELDS), new StringReader(value));
        Token token = null;
        while ((token = tokenStream.next()) != null) {
          String termText = new String(token.termBuffer(), 0, token.termLength());
          Term term = new Term(fieldname, termText);
          similarityQuery.add(new BooleanClause(new FuzzyQuery(term), Occur.MUST));
        }

View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

     * org.apache.lucene.analysis.KeywordAnalyzer#tokenStream(java.lang.String,
     * java.io.Reader)
     */
    @Override
    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = super.tokenStream(fieldName, reader);
      result = new LowerCaseFilter(result);

      return result;
    }
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

    PatternAnalyzer analyzer = new PatternAnalyzer(pattern, toLowerCase, stopWords);
    return analyzer.tokenStream("", text);
  }
 
  private TokenStream luceneTokenStream(String text, boolean letters, boolean toLowerCase, Set stopWords) {
    TokenStream stream;
    if (letters)
      stream = new LetterTokenizer(new StringReader(text));
    else
      stream = new WhitespaceTokenizer(new StringReader(text));
    if (toLowerCasestream = new LowerCaseFilter(stream);
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

        // The HTML escaping forces us to first fragment with internal placeholders...
        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(INTERNAL_BEGIN_HIT, INTERNAL_END_HIT), new QueryScorer(query));
        highlighter.setTextFragmenter(fragmenter);
        try {
            // Use the same analyzer as the indexer!
            TokenStream tokenStream = new StandardAnalyzer().tokenStream(null, new StringReader(indexedText));

            String unescapedFragements =
                    highlighter.getBestFragments(tokenStream, indexedText, numOfFragments, getFragmentSeparator());

            String escapedFragments = WikiUtil.escapeHtml(WikiUtil.removeMacros(unescapedFragements), false, false);
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

    private Query buildPhraseQuery(String fieldName, String terms) {
        try {
            PhraseQuery query = new PhraseQuery();
            query.setSlop(0);

            TokenStream includeStream =
                    new StandardAnalyzer().tokenStream(null, new StringReader(escape(terms).toLowerCase()));

            while (true) {
                Token t = includeStream.next();
                if (t == null) break;
                query.add( new Term(fieldName, t.termText()) );
            }

            return query.getTerms().length > 0 ? query : null;
View Full Code Here

Examples of org.apache.lucene.analysis.TokenStream

  }
 
  protected Set<String> getHighlightWords(String searchString) {
    try {
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
      TokenStream stream = analyzer.tokenStream("content", new StringReader(searchString));
      TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
      for (boolean next = stream.incrementToken(); next; next = stream.incrementToken()) {
        String term = termAtt.term();
        if(log.isDebug()) log.debug(term);
      }
    } catch (IOException e) {
      log.error("", e);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.