Examples of incrementToken()


Examples of org.apache.lucene.analysis.core.KeywordTokenizer.incrementToken()

      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int end = Character.offsetByCodePoints(s, 0, i);
      assertEquals(s.substring(0, end), termAtt.toString());
    }
    assertFalse(tk.incrementToken());
  }

}
View Full Code Here

Examples of org.apache.lucene.analysis.icu.segmentation.ICUTokenizer.incrementToken()

        int lastOffset = 0;
        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
          tokenizer.reset(); //starting with Solr4 reset MUST BE called before using
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
View Full Code Here

Examples of org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter.incrementToken()

    ResourceLoader loader = new SolrResourceLoader(null, null);
    factory.inform(loader);

    TokenStream input = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("the|0.1 quick|0.1 red|0.1"));
    DelimitedPayloadTokenFilter tf = factory.create(input);
    while (tf.incrementToken()){
      PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
      assertTrue("payAttr is null and it shouldn't be", payAttr != null);
      byte[] payData = payAttr.getPayload().getData();
      assertTrue("payData is null and it shouldn't be", payData != null);
      assertTrue("payData is null and it shouldn't be", payData != null);
View Full Code Here

Examples of org.apache.lucene.analysis.shingle.ShingleFilter.incrementToken()

        count++;
        ngrams.adjustOrPutValue(term, 1, 1);
      } else if (emitUnigrams && term.length() > 0) { // unigram
        unigrams.adjustOrPutValue(term, 1, 1);
      }
    } while (sf.incrementToken());
   
    try {
      final GramKey gramKey = new GramKey();
     
      ngrams.forEachPair(new ObjectIntProcedure<String>() {
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardFilter.incrementToken()

      tokenizer = new StandardTokenizer(Version.LUCENE_35, new StringReader(finalTokenized.toString().trim()));
      tokenStream = new ArabicStemFilter(new ArabicNormalizationFilter(tokenizer));
      CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
      tokenStream.clearAttributes();
      try {
        while (tokenStream.incrementToken()) {
          String curToken = termAtt.toString();
          if ( vocab != null && vocab.get(curToken) <= 0) {
            countOOV++;
          }
          countAll++;
View Full Code Here

Examples of org.apache.lucene.analysis.standard.StandardTokenizer.incrementToken()

    result = new StopFilter(true, result, StandardAnalyzer.STOP_WORDS_SET);
   
    TermAttribute termAtt = (TermAttribute) result.addAttribute(TermAttribute.class);
    StringBuilder buf = new StringBuilder();
    try {
      while (result.incrementToken()) {
        if (termAtt.termLength() < 3) continue;
        String word = new String(termAtt.termBuffer(), 0, termAtt.termLength());
        Matcher m = alphabets.matcher(word);
       
        if (m.matches()) {
View Full Code Here

Examples of org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.incrementToken()

    int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0,
        0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 };
    FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class);
    tf.reset();
    for (int i = 0; i < expectedFlags.length; i++) {
      assertTrue(tf.incrementToken());
      assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags());
    }
    assertFalse(tf.incrementToken());
    tf.close();
  }
View Full Code Here

Examples of org.apache.lucene.facet.index.streaming.CategoryAttributesStream.incrementToken()

  public void testCategoryAttributesStream() throws IOException {
    CategoryAttributesStream stream = new CategoryAttributesStream(
        categoryContainer);
    // count the number of tokens
    int nTokens;
    for (nTokens = 0; stream.incrementToken(); nTokens++) {
    }
    assertEquals("Wrong number of tokens", 3, nTokens);
  }

  /**
 
View Full Code Here

Examples of org.apache.lucene.facet.index.streaming.CategoryParentsStream.incrementToken()

        new CategoryAttributesStream(categoryContainer),
        taxonomyWriter, new DefaultFacetIndexingParams());

    // count the number of tokens
    int nTokens;
    for (nTokens = 0; stream.incrementToken(); nTokens++) {
    }
    // should be 6 - all categories and parents
    assertEquals("Wrong number of tokens", 6, nTokens);

    taxonomyWriter.close();
View Full Code Here

Examples of org.apache.lucene.facet.index.streaming.CategoryTokenizer.incrementToken()

      categoryTerms.add(initialCatgeories[i]
          .toString(indexingParams.getFacetDelimChar()));
    }

    int nTokens;
    for (nTokens = 0; tokenizer.incrementToken(); nTokens++) {
      if (!categoryTerms.remove(tokenizer.termAttribute.toString())) {
        fail("Unexpected term: " + tokenizer.termAttribute.toString());
      }
    }
    assertTrue("all category terms should have been found", categoryTerms
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.