Package org.elasticsearch.index.analysis

Source Code of org.elasticsearch.index.analysis.MMsegTokenizerFactory

package org.elasticsearch.index.analysis;

import com.chenlb.mmseg4j.*;
import com.chenlb.mmseg4j.analysis.MMSegTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;

import java.io.File;
import java.io.Reader;

/**
* Created by IntelliJ IDEA.
* User: Medcl'
* Date: 12-6-6
* Time: 下午3:59
*/
public class MMsegTokenizerFactory extends AbstractTokenizerFactory {

    Dictionary dic;
    private String seg_type;

    @Inject
    public MMsegTokenizerFactory(Index index, @IndexSettings Settings indexSettings,Environment env, @Assisted String name, @Assisted Settings settings) {
        super(index, indexSettings, name, settings);
        String path=new File(env.configFile(),"mmseg").getPath();
        dic = Dictionary.getInstance(path);
        seg_type = settings.get("seg_type", "max_word");
    }

    @Override
    public Tokenizer create(Reader reader) {
        Seg seg_method=null;
        if(seg_type.equals("max_word")){
            seg_method = new MaxWordSeg(dic);
        }else if(seg_type.equals("complex")){
            seg_method = new ComplexSeg(dic);
        }else if(seg_type.equals("simple")){
            seg_method =new SimpleSeg(dic);
        }
        return  new MMSegTokenizer(seg_method,reader);
    }
}
TOP

Related Classes of org.elasticsearch.index.analysis.MMsegTokenizerFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.