Package net.paoding.analysis.dictionary

Examples of net.paoding.analysis.dictionary.Word


    }
    BufferedOutputStream out = new BufferedOutputStream(
        new FileOutputStream(dicFile), 1024 * 16);
   
    for (int i = 0; i < wordsSize; i++) {
      Word word = dictionary.get(i);
      out.write(word.getText().getBytes(charsetName));
      if (word.getModifiers() != Word.DEFAUL) {
        out.write("[m=".getBytes());
        out.write(String.valueOf(word.getModifiers()).getBytes());
        out.write(']');
      }
      out.write('\r');
      out.write('\n');
    }
View Full Code Here


      IOException, UnsupportedEncodingException {
    int vocabularySize = vocabularyDictionary.size();
    Word[] vocabularyWords = new Word[vocabularySize];
    char[] chs = new char[128];
    for (int i = 0; i < vocabularySize; i ++) {
      final Word curWord = vocabularyDictionary.get(i);
      curWord.getText().getChars(0, curWord.length(), chs, 0);
      chs[curWord.length()] = (char) -1;
      Beef beef = new Beef(chs, 0, curWord.length() + 1);
      final BitSet bs = new BitSet(curWord.length());
      knife.dissect(new Collector(){
        public void collect(String word, int offset, int end) {
          Hit hit = vocabularyDictionary.search(word, 0, word.length());
          if (hit.isHit() && hit.getWord().length() != curWord.length()) {
            for (int j = offset; j < end; j++) {
              bs.set(j, true);
            }
          }
        }
       
      }, beef, 0);
     
      for (int j = 0; j < curWord.length();j++) {
        if (!bs.get(j)) {
          vocabularyWords[i] = curWord;
          break;
        }
      }
View Full Code Here

    }
    BufferedOutputStream out = new BufferedOutputStream(
        new FileOutputStream(dicFile), 1024 * 16);
   
    for (int i = 0; i < wordsSize; i++) {
      Word word = dictionary.get(i);
      out.write(word.getText().getBytes(charsetName));
      if (word.getModifiers() != Word.DEFAUL) {
        out.write("[m=".getBytes());
        out.write(String.valueOf(word.getModifiers()).getBytes());
        out.write(']');
      }
      out.write('\r');
      out.write('\n');
    }
View Full Code Here

          if (curSearchOffset == offset
              && maxDicWordLength < numberSearchLength) {
            maxDicWordLength = numberSearchLength;
          }

          Word word = curSearch.getWord();
          if (!word.isNoise()) {
            dissectIsolated(collector, beef, curSearchOffset,
                curSearch.getIndex());
          }
          curSearchOffset = numberSearchEnd - 1;
          break;
        }
        if (curSearch.isUnclosed()) {
          continue;
        }

        // 通过词汇表判断,返回判断结果curSearch
        curSearch = vocabulary.search(beef, curSearchOffset,
            curSearchLength);

        // ---------------分析返回的判断结果--------------------------

        // 1)
        // 从词汇表中找到了该词语...
        if (curSearch.isHit()) {

          // 1.1)
          // 确认孤立字符串的结束位置=curSearchOffset,
          // 并调用子方法分解把从isolatedOffset开始的到curSearchOffset之间的孤立字符串
          // 孤立字符串分解完毕,将孤立字符串开始位置isolatedOffset清空
          if (isolatedOffset >= 0) {
            dissectIsolated(collector, beef, isolatedOffset,
                curSearchOffset);
            isolatedOffset = -1;
          }

          // 1.2)
          // 更新最大结束位置
          if (maxDicWordEnd < curSearchEnd) {
            maxDicWordEnd = curSearchEnd;
          }

          // 1.3)
          // 更新词语最大长度变量的值
          if (curSearchOffset == offset
              && maxDicWordLength < curSearchLength) {
            maxDicWordLength = curSearchLength;
          }

          // 1.2)
          // 通知collector本次找到的词语
          Word word = curSearch.getWord();
          if (!word.isNoise()) {
            collector.collect(word.getText(), curSearchOffset,
                curSearchEnd);
          }
        }

        // 若isolatedFound==true,表示词典没有该词语
View Full Code Here

      return Hit.UNDEFINED;
    }
   
    //部分含有中文数字,取这一部分出来
    //trick: 我们这里用index参数传递该部分中文的结束位置
    return new Hit(offset + endPos + 1, new Word(nums.toString()), null);
  }
View Full Code Here

    }
    BufferedOutputStream out = new BufferedOutputStream(
        new FileOutputStream(dicFile), 1024 * 16);
   
    for (int i = 0; i < wordsSize; i++) {
      Word word = dictionary.get(i);
      out.write(word.getText().getBytes(charsetName));
      if (word.getModifiers() != Word.DEFAUL) {
        out.write("[m=".getBytes());
        out.write(String.valueOf(word.getModifiers()).getBytes());
        out.write(']');
      }
      out.write('\r');
      out.write('\n');
    }
View Full Code Here

      IOException, UnsupportedEncodingException {
    int vocabularySize = vocabularyDictionary.size();
    Word[] vocabularyWords = new Word[vocabularySize];
    char[] chs = new char[128];
    for (int i = 0; i < vocabularySize; i ++) {
      final Word curWord = vocabularyDictionary.get(i);
      curWord.getText().getChars(0, curWord.length(), chs, 0);
      chs[curWord.length()] = (char) -1;
      Beef beef = new Beef(chs, 0, curWord.length() + 1);
      final BitSet bs = new BitSet(curWord.length());
      knife.dissect(new Collector(){
        public void collect(String word, int offset, int end) {
          Hit hit = vocabularyDictionary.search(word, 0, word.length());
          if (hit.isHit() && hit.getWord().length() != curWord.length()) {
            for (int j = offset; j < end; j++) {
              bs.set(j, true);
            }
          }
        }
       
      }, beef, 0);
     
      for (int j = 0; j < curWord.length();j++) {
        if (!bs.get(j)) {
          vocabularyWords[i] = curWord;
          break;
        }
      }
View Full Code Here

    }
    BufferedOutputStream out = new BufferedOutputStream(
        new FileOutputStream(dicFile), 1024 * 16);
   
    for (int i = 0; i < wordsSize; i++) {
      Word word = dictionary.get(i);
      out.write(word.getText().getBytes(charsetName));
      if (word.getModifiers() != Word.DEFAUL) {
        out.write("[m=".getBytes());
        out.write(String.valueOf(word.getModifiers()).getBytes());
        out.write(']');
      }
      out.write('\r');
      out.write('\n');
    }
View Full Code Here

            maxDicWordLength = curSearchLength;
          }
         
          // 1.2)
          // 通知collector本次找到的词语
          Word word = curSearch.getWord();
          if (!word.isNoise()) {
            collector.collect(word.getText(), curSearchOffset,
              curSearchEnd);
          }
        }

        // 若isolatedFound==true,表示词典没有该词语
View Full Code Here

        || wordText.charAt(0) == '-') {
      return;
    }
   
    if (!wordText.endsWith("]")) {
      words.add(new Word(wordText));
    }
    else {
      int index = wordText.indexOf('[');
      Word w = new Word(wordText.substring(0, index));
      int mindex = wordText.indexOf("m=", index);
      int mEndIndex = wordText.indexOf("]", mindex);
      String m = wordText.substring(mindex + "m=".length(), mEndIndex);
      w.setModifiers(Integer.parseInt(m));
      words.add(w);
    }
  }
View Full Code Here

TOP

Related Classes of net.paoding.analysis.dictionary.Word

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.