Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.incrementToken()


    TokenStream tokenStream = a.tokenStream("test", "this is a Text with Whitespaces");
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

    tokenStream.incrementToken();
    String t2 = charTermAttribute.toString();
    tokenStream.incrementToken();
    String t3 = charTermAttribute.toString();
   
    assertEquals("Second Token did not match!", "text", t2);
    assertEquals("Third Token did not match!", "whitespaces", t3);
   
View Full Code Here


    TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
    OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
    TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class);
    try {
      while (result.size() < token_deep && ts.incrementToken()) {
        final Token token = new Token(
            termAtt.termBuffer(), 0, termAtt.termLength(),
            offsetAtt.startOffset(), offsetAtt.endOffset());
        token.setType(typeAtt.type());
        token.setPositionIncrement(posIncrAtt.getPositionIncrement());
View Full Code Here

      TermAttribute ta = tokens.getAttribute(TermAttribute.class);
      try
      {
        String termText;
        while (tokens.incrementToken())
        {
          if ((termText = ta.term()) == null)
            break;
          result.add(termText);
        }
View Full Code Here

            stream.reset();

            int poz = 0;
            boolean hasFulltextToken = false;
            StringBuilder token = new StringBuilder();
            while (stream.incrementToken()) {
                String term = termAtt.toString();
                int start = offsetAtt.startOffset();
                int end = offsetAtt.endOffset();
                if (start > poz) {
                    for (int i = poz; i < start; i++) {
View Full Code Here

    private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName)
            throws IOException {
        TokenStream ts = analyzer.tokenStream(fieldName, r);
        int tokenCount = 0;
        // for every token
        while (ts.incrementToken()) {
            TermAttribute term = ts.getAttribute(TermAttribute.class);
            String word =  term.term();
            tokenCount++;
            if (tokenCount > maxNumTokensParsed) {
                break;
View Full Code Here

            stream.reset();

            int poz = 0;
            boolean hasFulltextToken = false;
            StringBuilder token = new StringBuilder();
            while (stream.incrementToken()) {
                String term = termAtt.toString();
                int start = offsetAtt.startOffset();
                int end = offsetAtt.endOffset();
                if (start > poz) {
                    for (int i = poz; i < start; i++) {
View Full Code Here

                                    // assume properties fields use SingleTokenStream
                                    TokenStream tokenStream = field.tokenStreamValue();
                                    TermAttribute termAttribute = tokenStream.addAttribute(TermAttribute.class);
                                    PayloadAttribute payloadAttribute = tokenStream.addAttribute(PayloadAttribute.class);
                                    tokenStream.incrementToken();
                                    tokenStream.end();
                                    tokenStream.close();

                                    String value = new String(termAttribute.termBuffer(), 0, termAttribute.termLength());
                                    if (value.startsWith(namePrefix)) {
View Full Code Here

    try {
      ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        terms.add(BytesRef.deepCopyOf(bytes));
      }
      ts.end();
    }
View Full Code Here

        TokenStream ts = null;
        try {
          ts = analyzer.tokenStream(field, stopWords);
          CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
          ts.reset();
          while (ts.incrementToken()) {
            stopWordsSet.add(termAtt.toString());
          }
          ts.end();
        } catch (IOException ioe) {
          throw new ParserException("IoException parsing stop words list in "
View Full Code Here

    try {
      ts = analyzer.tokenStream(fieldName, value);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
        clausesList.add(stq);
      }
      ts.end();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.