Examples of CachingTokenFilter

org.apache.lucene.analysis.CachingTokenFilter
This class can be used if the token attributes of a TokenStream are intended to be consumed more than once. It caches all token attribute states locally in a List.
CachingTokenFilter implements the optional method {@link TokenStream#reset()}, which repositions the stream to the first Token.

Examples of org.apache.lucene.analysis.CachingTokenFilter

      String text = fieldNode.getTextAsString();
      String field = fieldNode.getFieldAsString();


      TokenStream source = this.analyzer.tokenStream(field, new StringReader(
          text));
      CachingTokenFilter buffer = new CachingTokenFilter(source);


      PositionIncrementAttribute posIncrAtt = null;
      int numTokens = 0;
      int positionCount = 0;
      boolean severalTokensAtSamePosition = false;


      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
      }


      try {


        while (buffer.incrementToken()) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt
              .getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;


          } else {
            severalTokensAtSamePosition = true;
          }


        }


      } catch (IOException e) {
        // ignore
      }


      try {
        // rewind the buffer stream
        buffer.reset();


        // close original stream - all tokens buffered
        source.close();
      } catch (IOException e) {
        // ignore
      }


      if (!buffer.hasAttribute(TermAttribute.class)) {
        return new NoTokenFoundQueryNode();
      }


      TermAttribute termAtt = buffer.getAttribute(TermAttribute.class);


      if (numTokens == 0) {
        return new NoTokenFoundQueryNode();


      } else if (numTokens == 1) {
        String term = null;
        try {
          boolean hasNext;
          hasNext = buffer.incrementToken();
          assert hasNext == true;
          term = termAtt.term();


        } catch (IOException e) {
          // safe to ignore, because we know the number of tokens
        }


        fieldNode.setText(term);


        return fieldNode;


      } else if (severalTokensAtSamePosition) {
        if (positionCount == 1) {
          // no phrase query:
          LinkedList<QueryNode> children = new LinkedList<QueryNode>();


          for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();


            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            children.add(new FieldQueryNode(field, term, -1, -1));


          }


          return new GroupQueryNode(
              new StandardBooleanQueryNode(children, true));


        } else {
          // phrase query:
          MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();


          List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
          int position = -1;
          int i = 0;
          int termGroupCount = 0;
          for (; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();
              if (posIncrAtt != null) {
                positionIncrement = posIncrAtt.getPositionIncrement();
              }


            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            if (positionIncrement > 0 && multiTerms.size() > 0) {


              for (FieldQueryNode termNode : multiTerms) {


                if (this.positionIncrementsEnabled) {
                  termNode.setPositionIncrement(position);
                } else {
                  termNode.setPositionIncrement(termGroupCount);
                }


                mpq.add(termNode);


              }


              // Only increment once for each "group" of
              // terms that were in the same position:
              termGroupCount++;


              multiTerms.clear();


            }


            position += positionIncrement;
            multiTerms.add(new FieldQueryNode(field, term, -1, -1));


          }


          for (FieldQueryNode termNode : multiTerms) {


            if (this.positionIncrementsEnabled) {
              termNode.setPositionIncrement(position);


            } else {
              termNode.setPositionIncrement(termGroupCount);
            }


            mpq.add(termNode);


          }


          return mpq;


        }


      } else {


        TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();


        int position = -1;


        for (int i = 0; i < numTokens; i++) {
          String term = null;
          int positionIncrement = 1;


          try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.term();


            if (posIncrAtt != null) {
              positionIncrement = posIncrAtt.getPositionIncrement();

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

      source = analyzer.reusableTokenStream(field, new StringReader(queryText));
      source.reset();
    } catch (IOException e) {
      source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;


    boolean success = false;
    try {
      buffer.reset();
      success = true;
    } catch (IOException e) {
      // success==false if we hit an exception
    }
    if (success) {
      if (buffer.hasAttribute(TermAttribute.class)) {
        termAtt = buffer.getAttribute(TermAttribute.class);
      }
      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
      }
    }


    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;


    boolean hasMoreTokens = false;
    if (termAtt != null) {
      try {
        hasMoreTokens = buffer.incrementToken();
        while (hasMoreTokens) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;
          } else {
            severalTokensAtSamePosition = true;
          }
          hasMoreTokens = buffer.incrementToken();
        }
      } catch (IOException e) {
        // ignore
      }
    }
    try {
      // rewind the buffer stream
      buffer.reset();


      // close original stream - all tokens buffered
      source.close();
    }
    catch (IOException e) {
      // ignore
    }


    if (numTokens == 0)
      return null;
    else if (numTokens == 1) {
      String term = null;
      try {
        boolean hasNext = buffer.incrementToken();
        assert hasNext == true;
        term = termAtt.term();
      } catch (IOException e) {
        // safe to ignore, because we know the number of tokens
      }
      return newTermQuery(new Term(field, term));
    } else {
      if (severalTokensAtSamePosition) {
        if (positionCount == 1) {
          // no phrase query:
          BooleanQuery q = newBooleanQuery(true);
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            Query currentQuery = newTermQuery(
                new Term(field, term));
            q.add(currentQuery, BooleanClause.Occur.SHOULD);
          }
          return q;
        }
        else {
          // phrase query:
          MultiPhraseQuery mpq = newMultiPhraseQuery();
          mpq.setSlop(phraseSlop);
          List<Term> multiTerms = new ArrayList<Term>();
          int position = -1;
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.term();
              if (posIncrAtt != null) {
                positionIncrement = posIncrAtt.getPositionIncrement();
              }
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            if (positionIncrement > 0 && multiTerms.size() > 0) {
              if (enablePositionIncrements) {
                mpq.add(multiTerms.toArray(new Term[0]),position);
              } else {
                mpq.add(multiTerms.toArray(new Term[0]));
              }
              multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(new Term(field, term));
          }
          if (enablePositionIncrements) {
            mpq.add(multiTerms.toArray(new Term[0]),position);
          } else {
            mpq.add(multiTerms.toArray(new Term[0]));
          }
          return mpq;
        }
      }
      else {
        PhraseQuery pq = newPhraseQuery();
        pq.setSlop(phraseSlop);
        int position = -1;




        for (int i = 0; i < numTokens; i++) {
          String term = null;
          int positionIncrement = 1;


          try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.term();
            if (posIncrAtt != null) {
              positionIncrement = posIncrAtt.getPositionIncrement();
            }

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

    return rv;
  }


  private IndexReader getReaderForField(String field) throws IOException {
    if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
      tokenStream = new CachingTokenFilter(tokenStream);
      cachedTokenStream = true;
    }
    IndexReader reader = readers.get(field);
    if (reader == null) {
      MemoryIndex indexer = new MemoryIndex();

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

  public void testEndOffsetPositionWithCachingTokenFilter() throws Exception {
    MockRAMDirectory dir = new MockRAMDirectory();
    Analyzer analyzer = new WhitespaceAnalyzer();
    IndexWriter w = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    TokenStream stream = new CachingTokenFilter(analyzer.tokenStream("field", new StringReader("abcd   ")));
    Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS);
    doc.add(f);
    doc.add(f);
    w.addDocument(doc);
    w.close();

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

      source = analyzer.reusableTokenStream(field, new StringReader(queryText));
      source.reset();
    } catch (IOException e) {
      source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    CharTermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;


    boolean success = false;
    try {
      buffer.reset();
      success = true;
    } catch (IOException e) {
      // success==false if we hit an exception
    }
    if (success) {
      if (buffer.hasAttribute(CharTermAttribute.class)) {
        termAtt = buffer.getAttribute(CharTermAttribute.class);
      }
      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
      }
    }


    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;


    boolean hasMoreTokens = false;
    if (termAtt != null) {
      try {
        hasMoreTokens = buffer.incrementToken();
        while (hasMoreTokens) {
          numTokens++;
          int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;
          } else {
            severalTokensAtSamePosition = true;
          }
          hasMoreTokens = buffer.incrementToken();
        }
      } catch (IOException e) {
        // ignore
      }
    }
    try {
      // rewind the buffer stream
      buffer.reset();


      // close original stream - all tokens buffered
      source.close();
    }
    catch (IOException e) {
      // ignore
    }


    if (numTokens == 0)
      return null;
    else if (numTokens == 1) {
      String term = null;
      try {
        boolean hasNext = buffer.incrementToken();
        assert hasNext == true;
        term = termAtt.toString();
      } catch (IOException e) {
        // safe to ignore, because we know the number of tokens
      }
      // return newTermQuery(new Term(field, term));
      return new TermQuery(new Term(field, term));
    } else {
      if (severalTokensAtSamePosition) {
        if (positionCount == 1) {
          // no phrase query:
          // BooleanQuery q = newBooleanQuery(true);
          BooleanQuery q = new BooleanQuery(true);
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.toString();
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            // Query currentQuery = newTermQuery(new Term(field, term));
            Query currentQuery = new TermQuery(new Term(field, term));
            q.add(currentQuery, BooleanClause.Occur.SHOULD);
          }
          return q;
        }
        else {
          // phrase query:
          // MultiPhraseQuery mpq = newMultiPhraseQuery();
          MultiPhraseQuery mpq = new MultiPhraseQuery();
          mpq.setSlop(phraseSlop);
          List multiTerms = new ArrayList();
          int position = -1;
          for (int i = 0; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
              boolean hasNext = buffer.incrementToken();
              assert hasNext == true;
              term = termAtt.toString();
              if (posIncrAtt != null) {
                positionIncrement = posIncrAtt.getPositionIncrement();
              }
            } catch (IOException e) {
              // safe to ignore, because we know the number of tokens
            }


            if (positionIncrement > 0 && multiTerms.size() > 0) {
              if (enablePositionIncrements) {
                mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
              } else {
                mpq.add((Term[])multiTerms.toArray(new Term[0]));
              }
              multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(new Term(field, term));
          }
          if (enablePositionIncrements) {
            mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
          } else {
            mpq.add((Term[])multiTerms.toArray(new Term[0]));
          }
          return mpq;
        }
      }
      else {
        // PhraseQuery pq = newPhraseQuery();
        PhraseQuery pq = new PhraseQuery();
        pq.setSlop(phraseSlop);
        int position = -1;




        for (int i = 0; i < numTokens; i++) {
          String term = null;
          int positionIncrement = 1;


          try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.toString();
            if (posIncrAtt != null) {
              positionIncrement = posIncrAtt.getPositionIncrement();
            }

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

      Highlighter highlighter;
      if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
        // TODO: this is not always necessary - eventually we would like to avoid this wrap
        //       when it is not needed.
        if (maxCharsToAnalyze < 0) {
          tstream = new CachingTokenFilter(tstream);
        } else {
          tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
        }
        
        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

        source = this.analyzer.tokenStream(field, new StringReader(text));
        source.reset();
      } catch (final IOException e1) {
        throw new RuntimeException(e1);
      }
      final CachingTokenFilter buffer = new CachingTokenFilter(source);


      int numTokens = 0;
      try {
        while (buffer.incrementToken()) {
          numTokens++;
        }
      } catch (final IOException e) {
        // ignore
      }


      try {
        // rewind the buffer stream
        buffer.reset();
        // close original stream - all tokens buffered
        source.close();
      } catch (final IOException e) {
        // ignore
      }


      if (!buffer.hasAttribute(CharTermAttribute.class)) {
        return new NoTokenFoundQueryNode();
      }
      final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);


      if (numTokens == 0) {
        return new NoTokenFoundQueryNode();
      } else if (numTokens != 1) {
        // phrase query
        final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();


        for (int i = 0; i < numTokens; i++) {
          String term = null;


          try {
            final boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.toString();


          } catch (final IOException e) {
            // safe to ignore, because we know the number of tokens

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

        source = analyzer.tokenStream(field, new StringReader(text));
        source.reset();
      } catch (final IOException e1) {
        throw new RuntimeException(e1);
      }
      final CachingTokenFilter buffer = new CachingTokenFilter(source);


      if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
      }


      try {
        while (buffer.incrementToken()) {
          numTokens++;
          final int positionIncrement = (posIncrAtt != null) ? posIncrAtt
              .getPositionIncrement() : 1;
          if (positionIncrement != 0) {
            positionCount += positionIncrement;
          } else {
            severalTokensAtSamePosition = true;
          }
        }
      } catch (final IOException e) {
        // ignore
      }


      try {
        // rewind the buffer stream
        buffer.reset();
        // close original stream - all tokens buffered
        source.close();
      } catch (final IOException e) {
        // ignore
      }


      if (!buffer.hasAttribute(CharTermAttribute.class)) {
        return new NoTokenFoundQueryNode();
      }
      final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);


      if (numTokens == 0) {
        if (nbTwigs != 0) { // Twig special case
          return new WildcardNodeQueryNode();
        }
        return new NoTokenFoundQueryNode();
      }
      else if (numTokens == 1) {
        String term = null;
        try {
          boolean hasNext;
          hasNext = buffer.incrementToken();
          assert hasNext == true;
          term = termAtt.toString();
        } catch (final IOException e) {
          // safe to ignore, because we know the number of tokens
        }
        fieldNode.setText(term);
        return fieldNode;
      }
      else {
        // no phrase query:
        final LinkedList<QueryNode> children = new LinkedList<QueryNode>();


        int position = -1;


        for (int i = 0; i < numTokens; i++) {
          String term = null;
          final int positionIncrement = 1;


          try {
            final boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.toString();


          } catch (final IOException e) {
            // safe to ignore, because we know the number of tokens

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

    return rv;
  }


  private IndexReader getReaderForField(String field) throws IOException {
    if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
      tokenStream = new CachingTokenFilter(tokenStream);
      cachedTokenStream = true;
    }
    IndexReader reader = (IndexReader) readers.get(field);
    if (reader == null) {
      MemoryIndex indexer = new MemoryIndex();

View Full Code Here

Examples of org.apache.lucene.analysis.CachingTokenFilter

    tokens.add(tokenFactory("tellus", 0, 1f, 5, 10, ShingleMatrixFilter.TokenPositioner.newRow));


    tls = new TokenListStream(tokens);


    ts = new PrefixAndSuffixAwareTokenFilter(new SingleTokenTokenStream(tokenFactory("^", 1, 100f, 0, 0)), tls, new SingleTokenTokenStream(tokenFactory("$", 1, 50f, 0, 0)));
    tls = new CachingTokenFilter(ts);


    // bi-grams, position incrememnt, weight, start offset, end offset


    ts = new ShingleMatrixFilter(tls, 2, 2, new Character('_'), false);
//

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.