Examples of org.apache.lucene.analysis.TokenStream.reset()

Class org.apache.lucene.analysis.TokenStream

Examples of org.apache.lucene.analysis.TokenStream.reset()

org.apache.lucene.analysis.TokenStream.reset()
Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement this method. {@link #reset()} is not needed forthe standard indexing process. However, if the tokens of a TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the tokens when you store them away (on the first pass) as well as when you return them (on future passes after {@link #reset()}).

    }


    TokenStream ts_2 = a.tokenStream("dummy", "danych");
    try {
      CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
      ts_2.reset();
      ts_2.incrementToken();
      assertEquals("second stream", "dany", termAtt_2.toString());
      ts_2.end();
    } finally {
      IOUtils.closeWhileHandlingException(ts_2);

View Full Code Here


  /** Test morphosyntactic annotations. */
  public final void testPOSAttribute() throws IOException {
    TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście");
    try {
      ts.reset();
      assertPOSToken(ts, "liście",  
        "subst:sg:acc:n2",
        "subst:sg:nom:n2",
        "subst:sg:voc:n2");

View Full Code Here

    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int start = 0; start < codePointCount; ++start) {
      for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
        assertTrue(tk.incrementToken());
        assertEquals(0, offsetAtt.startOffset());
        assertEquals(s.length(), offsetAtt.endOffset());

View Full Code Here

    };
    final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
    final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
    grams.reset();
    for (int start = 0; start < codePoints.length; ++start) {
      nextGram:
      for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
        if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
          // not on an edge

View Full Code Here

    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);

View Full Code Here

    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);

View Full Code Here

    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);

View Full Code Here

          if (!(o instanceof String)) {
              throw new IOException("Expected input to be chararray, but  got " + o.getClass().getName());
          }
          Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_43, new StringReader((String)o));
          TokenStream tokenstream = new LowerCaseEntityPreservingFilter(source);
          tokenstream.reset();
          while (tokenstream.incrementToken()){
            String token = tokenstream.getAttribute(CharTermAttribute.class).toString();
            output.add(mTupleFactory.newTuple(token));
          }
          return output;

View Full Code Here

    try {
      TokenStream stream = null;
      stream = analyzer.tokenStream("text", new StringReader(text));


      CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while(stream.incrementToken()) {
        String term = charTermAttribute.toString();
        result.add(term);
      }
    } catch (IOException e) {

View Full Code Here

        }
      }
      TokenStream ts = analyzer.tokenStream("text", new StringReader(app
              .getString(inputText, "text")));
      app.removeAll(resultsList);
      ts.reset();


      while (ts.incrementToken()) {
        Object row = app.create("item");
        app.setString(row, "text", ((CharTermAttribute)ts.getAttribute(CharTermAttribute.class)).toString());
        app.add(resultsList, row);

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.