Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.Tokenizer.incrementToken()


      System.out.print("Text: ");
      String line = in.readLine();
      Tokenizer tokenizer = new NutchDocumentTokenizer(new StringReader(line));
      TermAttribute termAtt = tokenizer.getAttribute(TermAttribute.class);
      System.out.print("Tokens: ");
      while (tokenizer.incrementToken()) {
        System.out.print(termAtt.term());
        System.out.print(" ");
      }
      System.out.println();
    }
View Full Code Here


        int lastAdded = -1;
        int lastOffset = 0;
        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
View Full Code Here

        int lastOffset = 0;
        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
          tokenizer.reset(); //starting with Solr4 reset MUST BE called before using
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
View Full Code Here

        ArrayList<String> tokenList = new ArrayList<String>();

        if ( !indexerField.isKeyword() )
        {
            while ( ts.incrementToken() )
            {
                TermAttribute term = ts.getAttribute( TermAttribute.class );

                tokenList.add( term.term() );
            }
View Full Code Here

        int lastAdded = -1;
        int lastOffset = 0;
        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
View Full Code Here

 
  @Test
  public void testEmptyQuery() throws Exception {
    Tokenizer tokenizer = createTokenizer(
        new StringReader(""), TokenGenerator.DEFAULT_COMPOUND_NOUN_MIN_LENGTH);
    assertEquals(false, tokenizer.incrementToken());
    tokenizer.close();
  }
 
  @Test
  public void testEmptyMorphemes() throws Exception {
View Full Code Here

  @Test
  public void testEmptyMorphemes() throws Exception {
    Tokenizer tokenizer = createTokenizer(
        new StringReader("!@#$%^&*"),
        TokenGenerator.DEFAULT_COMPOUND_NOUN_MIN_LENGTH);
    assertEquals(false, tokenizer.incrementToken());
    tokenizer.close();
  }
 
  @Test
  public void testSemanticClassSentence() throws Exception {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.