Package com.jgaap.languages

Source Code of com.jgaap.languages.ChineseJointMM

/*
* JGAAP -- a graphical program for stylometric authorship attribution
* Copyright (C) 2009,2011 by Patrick Juola
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package com.jgaap.languages;

import java.util.List;

import WordSegment.JointMM;
import WordSegment.WordSegment;

import com.jgaap.generics.Language;

/**
*
* Representation of Chinese in jgaap using GB2312 and a joint matching technique for parsing it into words.
* Joint matching compares the results of forward matching and reverse matching and takes the more likely result.
*
* @author Michael Ryan
*
*/
public class ChineseJointMM extends Language {

  public ChineseJointMM() {
    super("Chinese JointMM (GB2312)", "chinese", "GB2312");
    super.setParseable(true);
  }

  @Override
  public char[] parseLanguage(String document) {
    StringBuilder stringBuilder = new StringBuilder(document.length());
    WordSegment wordSegmenter = new WordSegment(new JointMM());
    List<String> segmented = wordSegmenter.segment(document);
    for (String word : segmented) {
      stringBuilder.append(word).append(" ");
    }
    return stringBuilder.toString().toCharArray();
  }
 
  public boolean showInGUI() {
    return true;
  }

}
TOP

Related Classes of com.jgaap.languages.ChineseJointMM

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.