Package com.chenlb.mmseg4j

Examples of com.chenlb.mmseg4j.Word


  //lucene 2.9/3.0
  @Override
  public boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here


     
    });
    long time = 0;
    for(File txt : txts) {
      MMSeg mmSeg = new MMSeg(new InputStreamReader(new FileInputStream(txt)), seg);
      Word word = null;
      OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(new File(txt.getAbsoluteFile()+"."+mode+".word")));
      BufferedWriter bw = new BufferedWriter(osw);
      long start = System.currentTimeMillis();
      while((word=mmSeg.next())!=null) {

        bw.append(new String(word.getString())).append("\r\n");
      }
      time += System.currentTimeMillis() - start;
      bw.close();
    }
    System.out.println("use "+time+"ms");
View Full Code Here

 
  public String segWords(Reader input, String wordSpilt) throws IOException {
    StringBuilder sb = new StringBuilder();
    Seg seg = getSeg()//取得不同的分词具体算法
    MMSeg mmSeg = new MMSeg(input, seg);
    Word word = null;
    boolean first = true;
    while((word=mmSeg.next())!=null) {
      if(!first) {
        sb.append(wordSpilt);
      }
      String w = word.getString();
      sb.append(w);
      first = false;
     
    }
    return sb.toString();
View Full Code Here

  //lucene 2.9/3.0
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here

  //lucene 2.9/3.0
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here

  //lucene 2.9/3.0
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here

 
  public String segWords(Reader input, String wordSpilt) throws IOException {
    StringBuilder sb = new StringBuilder();
    Seg seg = getSeg()//取得不同的分词具体算法
    MMSeg mmSeg = new MMSeg(input, seg);
    Word word = null;
    boolean first = true;
    while((word=mmSeg.next())!=null) {
      if(!first) {
        sb.append(wordSpilt);
      }
      String w = word.getString();
      sb.append(w);
      first = false;
     
    }
    return sb.toString();
View Full Code Here

  //lucene 2.9/3.0
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here

  //lucene 2.9/3.0
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    Word word = mmSeg.next();
    if(word != null) {
      //lucene 3.0
      //termAtt.setTermBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      //lucene 3.1
      termAtt.copyBuffer(word.getSen(), word.getWordOffset(), word.getLength());
      offsetAtt.setOffset(word.getStartOffset(), word.getEndOffset());
      typeAtt.setType(word.getType());
      return true;
    } else {
      end();
      return false;
    }
View Full Code Here

  }

  public static List<String> toMMsegWords(String txt, Seg seg) {
    List<String> words = new ArrayList<String>();
    MMSeg mmSeg = new MMSeg(new StringReader(txt), seg);
    Word word = null;
    try {
      while ((word = mmSeg.next()) != null) {
        String w = word.getString();
        words.add(w);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
View Full Code Here

TOP

Related Classes of com.chenlb.mmseg4j.Word

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.