Package com.chenlb.mmseg4j.analysis

Source Code of com.chenlb.mmseg4j.analysis.MMSegAnalyzer

package com.chenlb.mmseg4j.analysis;

import java.io.File;
import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;

import com.chenlb.mmseg4j.Dictionary;
import com.chenlb.mmseg4j.MaxWordSeg;
import com.chenlb.mmseg4j.Seg;

/**
* 默认使用 max-word
*
* @see {@link SimpleAnalyzer}, {@link ComplexAnalyzer}, {@link MaxWordAnalyzer}
*
* @author chenlb
*/
public class MMSegAnalyzer extends Analyzer {

  protected Dictionary dic;
 
  /**
   * @see Dictionary#getInstance()
   */
  public MMSegAnalyzer() {
    dic = Dictionary.getInstance();
  }
 
  /**
   * @param path 词库路径
   * @see Dictionary#getInstance(String)
   */
  public MMSegAnalyzer(String path) {
    dic = Dictionary.getInstance(path);
  }
 
  /**
   * @param path 词库目录
   * @see Dictionary#getInstance(File)
   */
  public MMSegAnalyzer(File path) {
    dic = Dictionary.getInstance(path);
  }
 
  public MMSegAnalyzer(Dictionary dic) {
    super();
    this.dic = dic;
  }

  protected Seg newSeg() {
    return new MaxWordSeg(dic);
  }
 
  public Dictionary getDict() {
    return dic;
  }
 
  @Override
  public TokenStream reusableTokenStream(String fieldName, Reader reader)
      throws IOException {
   
    MMSegTokenizer mmsegTokenizer = (MMSegTokenizer) getPreviousTokenStream();
    if(mmsegTokenizer == null) {
      mmsegTokenizer = new MMSegTokenizer(newSeg(), reader);
      setPreviousTokenStream(mmsegTokenizer)//保存实例
    } else {
      mmsegTokenizer.reset(reader);
    }
   
    return mmsegTokenizer;
  }

  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream ts = new MMSegTokenizer(newSeg(), reader);
    return ts;
  }
}
TOP

Related Classes of com.chenlb.mmseg4j.analysis.MMSegAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.