Package org.apache.lucene.analysis.icu.segmentation

Examples of org.apache.lucene.analysis.icu.segmentation.ICUTokenizer


/** Factory for {@link ICUTokenizer} */
public class ICUTokenizerFactory extends BaseTokenizerFactory {
  // TODO: add support for custom configs
  public Tokenizer create(Reader input) {
    return new ICUTokenizer(input);
  }
View Full Code Here


     * @throws IOException
     */
    private static QueryTerm[] parseWildcardQueryTerms(String value,boolean loewercaseWildcardTokens) {
        //This assumes that the Tokenizer does tokenize '*' and '?',
        //what makes it a little bit tricky.
        Tokenizer tokenizer = new ICUTokenizer(new StringReader(value),tokenizerConfig);
        Matcher m = WILDCARD_QUERY_CHAR_PATTERN.matcher(value);
        int next = m.find()?m.start()+1:-1;
        if(next < 0){ //No wildcard
            return new QueryTerm[]{new QueryTerm(value, false, true, true)};
        }
        ArrayList<QueryTerm> queryElements = new ArrayList<QueryTerm>(5);
        int lastAdded = -1;
        int lastOffset = 0;
        boolean foundWildcard = false;
        //Lucene tokenizer are really low level ...
        try {
          tokenizer.reset(); //starting with Solr4 reset MUST BE called before using
            while(tokenizer.incrementToken()){
                //only interested in the start/end indexes of tokens
                OffsetAttribute offset = tokenizer.addAttribute(OffsetAttribute.class);
                if(lastAdded < 0){ //rest with this token
                    lastAdded = offset.startOffset();
                }
                if(foundWildcard){ //wildcard present in the current token
                    //two cases: "wildcar? at the end", "wild?ard within the word"
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.icu.segmentation.ICUTokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.