Package org.nlpcn.commons.lang.tire

Source Code of org.nlpcn.commons.lang.tire.GetWord

package org.nlpcn.commons.lang.tire;


import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.WoodInterface;
import org.nlpcn.commons.lang.tire.library.Library;
import org.nlpcn.commons.lang.util.StringUtil;

import java.io.BufferedReader;
import java.io.StringReader;

public class GetWord {
  private static final String EMPTYSTRING = "";
  public int offe;
  byte status = 0;
  int root = 0;
  int i = this.root;
  boolean isBack = false;
  private Forest forest;
  WoodInterface branch = this.forest;
  private char[] chars;
  private String str;
  private int tempOffe;
  private String[] param;

  public GetWord(Forest forest, String content) {
    this.chars = content.toCharArray();
    this.forest = forest;
    this.branch = forest;
  }

  public GetWord(Forest forest, char[] chars) {
    this.chars = chars;
    this.forest = forest;
    this.branch = forest;
  }

  public static void main(String[] args) throws Exception {
    /**
     * 词典的构造.一行一个词后面是参数.可以从文件读取.可以是read流.
     */
    long start = System.currentTimeMillis();
    String dic = "android\t10\nc\t100\nC++\t10\nc++\t5\nc#\t100\nVC++\t100".toLowerCase();
    Forest forest = Library.makeForest(new BufferedReader(new StringReader(dic)));
    /**
     * 删除一个单词
     */
    Library.insertWord(forest, "中国");
    /**
     * 增加一个新词
     */
    Library.insertWord(forest, "中国人");
    String content = "Android--中国人";
    content = StringUtil.rmHtmlTag(content);

    for (int i = 0; i < 1; i++) {
      GetWord udg = forest.getWord(content.toLowerCase().toCharArray());

      String temp = null;
      while ((temp = udg.getFrontWords()) != null) {
        System.out.println(temp + "\t\t" + udg.getParam(0) + "\t\t" + udg.getParam(2));
      }
    }
    System.out.println(System.currentTimeMillis() - start);
  }

  public String getAllWords() {
    String temp = this.allWords();
    while (EMPTYSTRING.equals(temp)) {
      temp = this.allWords();
    }
    return temp;
  }

  public String getFrontWords() {
    String temp = this.frontWords();
    while (EMPTYSTRING.equals(temp)) {
      temp = this.frontWords();
    }
    return temp;
  }

  private String allWords() {
    if ((!this.isBack) || (this.i == this.chars.length - 1)) {
      this.i = (this.root - 1);
    }
    for (this.i += 1; this.i < this.chars.length; this.i = (this.i + 1)) {
      this.branch = this.branch.get(this.chars[this.i]);
      if (this.branch == null) {
        this.root += 1;
        this.branch = this.forest;
        this.i = (this.root - 1);
        this.isBack = false;
      } else {
        switch (this.branch.getStatus()) {
          case 2:
            this.isBack = true;
            this.offe = (this.tempOffe + this.root);
            this.param = this.branch.getParams();
            return new String(this.chars, this.root, this.i - this.root + 1);
          case 3:
            this.offe = (this.tempOffe + this.root);
            this.str = new String(this.chars, this.root, this.i - this.root + 1);
            this.param = this.branch.getParams();
            this.branch = this.forest;
            this.isBack = false;
            this.root += 1;
            return this.str;
        }
      }
    }
    this.tempOffe += this.chars.length;
    return null;
  }

  private String frontWords() {
    for (; this.i < this.chars.length + 1; this.i++) {
      if (i == chars.length) {
        this.branch = null;
      } else {
        this.branch = this.branch.get(this.chars[this.i]);
      }
      if (this.branch == null) {
        this.branch = this.forest;
        if (this.isBack) {
          this.offe = this.root;
          this.str = new String(this.chars, this.root, this.tempOffe);

          if ((this.root > 0) && (isE(this.chars[(this.root - 1)])) && (isE(this.str.charAt(0)))) {
            this.str = EMPTYSTRING;
          }

          if ((this.str.length() != 0) && (this.root + this.tempOffe < this.chars.length) && (isE(this.str.charAt(this.str.length() - 1)))
              && (isE(this.chars[(this.root + this.tempOffe)]))) {
            this.str = EMPTYSTRING;
          }
          if (this.str.length() == 0) {
            this.root += 1;
            this.i = this.root;
          } else {
            this.i = (this.root + this.tempOffe);
            this.root = this.i;
          }
          this.isBack = false;

          if (EMPTYSTRING.equals(this.str)) {
            return EMPTYSTRING;
          }
          return this.str;
        }
        this.i = this.root;
        this.root += 1;
      } else {
        switch (this.branch.getStatus()) {
          case 2:
            this.isBack = true;
            this.tempOffe = (this.i - this.root + 1);
            this.param = this.branch.getParams();
            break;
          case 3:
            this.offe = this.root;
            this.str = new String(this.chars, this.root, this.i - this.root + 1);
            String temp = this.str;

            if ((this.root > 0) && (isE(this.chars[(this.root - 1)])) && (isE(this.str.charAt(0)))) {
              this.str = EMPTYSTRING;
            }

            if ((this.str.length() != 0) && (this.i + 1 < this.chars.length) && (isE(this.str.charAt(this.str.length() - 1)))
                && (isE(this.chars[(this.i + 1)]))) {
              this.str = EMPTYSTRING;
            }
            this.param = this.branch.getParams();
            this.branch = this.forest;
            this.isBack = false;
            if (temp.length() > 0) {
              this.i += 1;
              this.root = this.i;
            } else {
              this.i = (this.root + 1);
            }
            if (EMPTYSTRING.equals(this.str)) {
              return EMPTYSTRING;
            }
            return this.str;
        }
      }
    }
    this.tempOffe += this.chars.length;
    return null;
  }

  public boolean isE(char c) {
    if (c == '.' || ((c >= 'a') && (c <= 'z'))) {
      return true;
    }
    return false;
  }

  public void reset(String content) {
    this.offe = 0;
    this.status = 0;
    this.root = 0;
    this.i = this.root;
    this.isBack = false;
    this.tempOffe = 0;
    this.chars = content.toCharArray();
    this.branch = this.forest;
  }

  public String getParam(int i) {
    if ((this.param == null) || (this.param.length < i + 1)) {
      return null;
    }
    return this.param[i];
  }

  /**
   * 得到全部参数
   *
   * @return
   */
  public String[] getParams() {
    return this.param;
  }
}
TOP

Related Classes of org.nlpcn.commons.lang.tire.GetWord

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.