Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.TokenStream.reset()


    }

    TokenStream ts_2 = a.tokenStream("dummy", "danych");
    try {
      CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
      ts_2.reset();
      ts_2.incrementToken();
      assertEquals("second stream", "dany", termAtt_2.toString());
      ts_2.end();
    } finally {
      IOUtils.closeWhileHandlingException(ts_2);
View Full Code Here


  /** Test morphosyntactic annotations. */
  public final void testPOSAttribute() throws IOException {
    TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście");
    try {
      ts.reset();
      assertPOSToken(ts, "liście"
        "subst:sg:acc:n2",
        "subst:sg:nom:n2",
        "subst:sg:voc:n2");

View Full Code Here

    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int start = 0; start < codePointCount; ++start) {
      for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
        assertTrue(tk.incrementToken());
        assertEquals(0, offsetAtt.startOffset());
        assertEquals(s.length(), offsetAtt.endOffset());
View Full Code Here

    };
    final CharTermAttribute termAtt = grams.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncAtt = grams.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLenAtt = grams.addAttribute(PositionLengthAttribute.class);
    final OffsetAttribute offsetAtt = grams.addAttribute(OffsetAttribute.class);
    grams.reset();
    for (int start = 0; start < codePoints.length; ++start) {
      nextGram:
      for (int end = start + minGram; end <= start + maxGram && end <= codePoints.length; ++end) {
        if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1])) {
          // not on an edge
View Full Code Here

    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);
View Full Code Here

    int num = 1000 * RANDOM_MULTIPLIER;
    for (int i = 0; i < num; i++) {
      String s = _TestUtil.randomUnicodeString(random());
      TokenStream ts = analyzer.tokenStream("foo", s);
      try {
        ts.reset();
        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
        while (ts.incrementToken()) {
          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
            cp = highlightedText.codePointAt(j);
View Full Code Here

    final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
    TokenStream tk = new KeywordTokenizer(new StringReader(s));
    tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
    final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
    final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
    tk.reset();
    for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
View Full Code Here

          if (!(o instanceof String)) {
              throw new IOException("Expected input to be chararray, but  got " + o.getClass().getName());
          }
          Tokenizer source = new WhitespaceTokenizer(Version.LUCENE_43, new StringReader((String)o));
          TokenStream tokenstream = new LowerCaseEntityPreservingFilter(source);
          tokenstream.reset();
          while (tokenstream.incrementToken()){
            String token = tokenstream.getAttribute(CharTermAttribute.class).toString();
            output.add(mTupleFactory.newTuple(token));
          }
          return output;
View Full Code Here

    try {
      TokenStream stream = null;
      stream = analyzer.tokenStream("text", new StringReader(text));

      CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while(stream.incrementToken()) {
        String term = charTermAttribute.toString();
        result.add(term);
      }
    } catch (IOException e) {
View Full Code Here

        }
      }
      TokenStream ts = analyzer.tokenStream("text", new StringReader(app
              .getString(inputText, "text")));
      app.removeAll(resultsList);
      ts.reset();

      while (ts.incrementToken()) {
        Object row = app.create("item");
        app.setString(row, "text", ((CharTermAttribute)ts.getAttribute(CharTermAttribute.class)).toString());
        app.add(resultsList, row);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.