Package org.fnlp.nlp.pipe

Examples of org.fnlp.nlp.pipe.Pipe


    // 特征集合
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });

    return pipe;
  }
View Full Code Here


    if (cl == null)
      loadFrom(model);

    long starttime = System.currentTimeMillis();
    // 将样本通过Pipe抽取特征
    Pipe pipe = createProcessor();

    // 测试集
    testSet = new InstanceSet(pipe);

    testSet.loadThruStagePipes(new SequenceReader(testfile,hasLabel,"utf8"));
View Full Code Here

  public void loadTrainingData() throws Exception{

    System.out.print("Loading training data ...");
    long beginTime = System.currentTimeMillis();

    Pipe pipe = createProcessor();

   
    trainSet = new InstanceSet(pipe, factory);
     labels = factory.DefaultLabelAlphabet();
    features = factory.DefaultFeatureAlphabet();
View Full Code Here

  }
 
 
  public void loadTestData() throws Exception{
    System.out.print("Loading test data ...");
    Pipe pipe = createProcessor();
   
   
    // /////////////////
    if (testfile != null) {
      boolean hasTarget = true;;
View Full Code Here

     */
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();

    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {1,2});
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet());   

    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    SeriesPipes  pp2 = new SeriesPipes(new Pipe[]{prePipe, ngrampp,targetpp,indexpp});
View Full Code Here

  public void load(String modelFile) throws LoadModelException{
    pclassifier =Linear.loadFrom(modelFile);
  }

  public TPredict<String> classify(String str){
    Pipe p = pclassifier.getPipe();
    Instance inst = new Instance(str);
    try {
      //特征转换
      if(prePipe!=null)
        prePipe.addThruPipe(inst);
      p.addThruPipe(inst);
    } catch (Exception e) {
      e.printStackTrace();
    }
    TPredict<String> res = pclassifier.classify(inst,Type.STRING);
    return res;
View Full Code Here

    public void train() throws Exception {

      //建立字典管理器

     
      Pipe lpipe = new Target2Label(al);
      Pipe fpipe = new StringArray2IndexArray(factory, true);
      //构造转换器组
      SeriesPipes pipe = new SeriesPipes(new Pipe[]{lpipe,fpipe});


View Full Code Here

   */
  private static String bayesModelFile = dataPath+"modelBayes.gz";

  public static void main(String[] args) throws Exception {
    //分词
    Pipe removepp=new RemoveWords();
    CWSTagger tag = new CWSTagger("../models/seg.m");
    Pipe segpp=new CNPipe(tag);
    Pipe s2spp=new Strings2StringArray();
    /**
     * Bayes
     */
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{removepp,segpp,s2spp,targetpp,sparsepp});

    System.out.print("\nReading data......\n");
    InstanceSet instset = new InstanceSet(pp,af)
View Full Code Here

   
    long start = System.currentTimeMillis();

    path = "./example-data/data-classification.txt";

    Pipe lpipe = new Target2Label(al);
    Pipe fpipe = new StringArray2SV(factory, true);
    //构造转换器组
    Pipe pipe = new SeriesPipes(new Pipe[]{lpipe,fpipe});
   
    //构建训练集
    train = new InstanceSet(pipe, factory);
    SimpleFileReader reader = new SimpleFileReader (path,true);
    train.loadThruStagePipes(reader);
View Full Code Here

   
    String train = "./example-data/sequence/train.txt";
    String testfile = "./example-data/sequence/test.txt";
    String templateFile="./example-data/sequence/template";
    AlphabetFactory factory;
    Pipe featurePipe;
    TempletGroup templets;

    templets = new TempletGroup();
    templets.load(templateFile);
    factory = AlphabetFactory.buildFactory();

    /**
     * 标签字典。转为0、1、2、...
     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    /**
     * 特征字典
     */
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    // 将样本通过Pipe抽取特征
   
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });


    System.out.print("读入训练数据 ...");
    InstanceSet trainSet = new InstanceSet(pipe, factory);

    // 训练集
    trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
    System.out.println("训练样本个数 " + trainSet.size());
    System.out.println("标签个数: " + labels.size()); //
    System.out.println("特征个数" + features.size());

    // 冻结特征集
    features.setStopIncrement(true);
    labels.setStopIncrement(true);


    // viterbi解码
    HammingLoss loss = new HammingLoss();
    Inferencer inference = new LinearViterbi(templets, labels.size());
    Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);


    OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
        features.size(), 50,0.1f);

    Linear cl = trainer.train(trainSet);


    // test data没有标注
    Pipe tpipe = featurePipe;
    // 测试集
    InstanceSet testSet = new InstanceSet(tpipe);

    testSet.loadThruPipes(new SequenceReader(testfile, false, "utf8"));
    System.out.println("测试样本个数: " + testSet.size()); //
View Full Code Here

TOP

Related Classes of org.fnlp.nlp.pipe.Pipe

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.