Package org.fnlp.train.seg

Source Code of org.fnlp.train.seg.SegTrain

/**
*  This file is part of FNLP (formerly FudanNLP).
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
*  Copyright 2009-2014 www.fnlp.org. All rights reserved.
*/

package org.fnlp.train.seg;

import org.fnlp.ml.types.alphabet.AlphabetFactory;
import org.fnlp.ml.types.alphabet.AlphabetFactory.Type;
import org.fnlp.nlp.pipe.Pipe;
import org.fnlp.nlp.pipe.SeriesPipes;
import org.fnlp.nlp.pipe.Target2Label;
import org.fnlp.nlp.pipe.WeightPipe;
import org.fnlp.nlp.pipe.seq.AddCharRange;
import org.fnlp.nlp.pipe.seq.Sequence2FeatureSequence;
import org.fnlp.nlp.pipe.seq.templet.BaseTemplet;
import org.fnlp.nlp.pipe.seq.templet.CharClassTemplet;
import org.fnlp.nlp.pipe.seq.templet.CustomTemplet;
import org.fnlp.nlp.pipe.seq.templet.Templet;
import org.fnlp.nlp.pipe.seq.templet.TempletGroup;
import org.fnlp.nlp.tag.AbstractTagger;
import org.fnlp.ontology.CharClassDictionary;

/**
* 序列标注器训练和测试程序
*
* @author xpqiu
*
*/
public class SegTrain extends AbstractTagger{

  @Override
  public Pipe createProcessor() throws Exception {

    if(cl!=null){
      factory = cl.getAlphabetFactory();
    }else{
      factory = AlphabetFactory.buildFactory();
      templets = new TempletGroup();
      templets.load(templateFile);
      for(Templet templet:templets){
        ((BaseTemplet) templet).minLen = 0;
      }
      //Dictionary d = new Dictionary();
      // d.loadWithWeigth("D:/xpqiu/项目/自选/CLP2010/CWS/av-b-lut.txt",
      // "AV");
      // templets.add(new DictionaryTemplet(d, gid++, 0, 1));
      // templets.add(new DictionaryTemplet(d, gid++, -1,0, 1));
      // templets.add(new DictionaryTemplet(d, gid++, -2,-1,0, 1));
//      CharClassDictionary dsurname = new CharClassDictionary();
//      dsurname.load("../data/knowledge/百家姓.txt", "姓");
//      templets.add(new CharClassTemplet(templets.gid++, new CharClassDictionary[]{dsurname}));
//      templets.add(new CustomTemplet(templets.gid++));
    }


    labels = factory.DefaultLabelAlphabet();

    //TODO: 修改字典类型
    AlphabetFactory.defaultFeatureType = Type.String;
    // 将样本通过Pipe抽取特征
    features = factory.DefaultFeatureAlphabet();
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);
    AddCharRange typePip = new AddCharRange();
    Pipe weightPipe = new WeightPipe(true);
    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels),  typePip, featurePipe, weightPipe  });
    return pipe;
  }
}
TOP

Related Classes of org.fnlp.train.seg.SegTrain

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.