Package org.fnlp.ml.types

Examples of org.fnlp.ml.types.Instance


  private String str;
  public MyDocumentWriter(String str){
    this.str = str;
    fgr = new FileGroupReader(str);
    this.llist = new LinkedList<Instance>();
    Instance in = null; FeatureGeter fp = null;
    ARInstanceGetter arip = null;
    while(fgr.hasNext()){
      in = fgr.next();
      fp = new FeatureGeter(in);
      arip = new ARInstanceGetter(fp);
View Full Code Here


      out = new OutputStreamWriter(new FileOutputStream(f));
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    Instance mi = null;
    while(it.hasNext()){
      mi = (Instance) it.next();
      try {   
        out.write(mi.getTarget()+" ");
        int[] dat = (int[]) mi.getData();
        for(int i=0;i<dat.length;i++)
          out.write(dat[i]+" ");
        out.write('\n');
        out.flush();
      } catch (IOException e) {
View Full Code Here

    System.out.print("..Loading model complete!\n");
   
    System.out.println("Testing Bayes...");
    int count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=bayes.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
//      String pred_label = bayes.getStringLabel(data);
      String gold_label = bayes.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetbayes.getInstance(i).getTempData());
        count++;
      }
      else{
//        System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//        for(int j=0;j<3;j++)
//          System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
      }
    }
    int bayesCount=count;
    System.out.println("..Testing Bayes complete!");
    System.out.println("Bayes Precision:"+((float)bayesCount/testset.size())+"("+bayesCount+"/"+testset.size()+")");


    /**
     * Knn
     */
    System.out.print("\nKnn\n");
    //建立字典管理器
    AlphabetFactory af2 = AlphabetFactory.buildFactory();
    //使用n元特征
    ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    sparsepp=new StringArray2SV(af2);
    //将目标值对应的索引号作为类别
    targetpp = new Target2Label(af2.DefaultLabelAlphabet())
    //建立pipe组合
    pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});

    System.out.print("Init dataset...");
    trainset.setAlphabetFactory(af2)
    trainset.setPipes(pp)
    testset.setAlphabetFactory(af2)
    testset.setPipes(pp);     
    for(int i=0;i<trainset.size();i++){
      Instance inst=trainset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }   
    for(int i=0;i<testset.size();i++){
      Instance inst=testset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }

    System.out.print("complete!\n");
    System.out.print("Training Knn...\n");
    SparseVectorSimilarity sim=new SparseVectorSimilarity();
    pp.removeTargetPipe();
    KNNClassifier knn=new KNNClassifier(trainset, pp, sim, af2, 7)
    af2.setStopIncrement(true)
    System.out.print("..Training compelte!\n");
    System.out.print("Saving model...\n");
    knn.saveTo(knnModelFile)
    knn = null;
    System.out.print("..Saving model compelte!\n");

   
    System.out.print("Loading model...\n");
    knn =KNNClassifier.loadFrom(knnModelFile);
    System.out.print("..Loading model compelte!\n");
    System.out.println("Testing Knn...\n");
    count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=(Predict<String>) knn.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
      String gold_label = knn.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetknn.getInstance(i).getTempData());
        count++;
      }
      else{
//        System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//        for(int j=0;j<3;j++)
//          System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
      }
    }
    int knnCount=count;
    System.out.println("..Testing Knn Complete");
    System.out.println("Bayes Precision:"+((float)bayesCount/testset.size())+"("+bayesCount+"/"+testset.size()+")");
    System.out.println("Knn Precision:"+((float)knnCount/testset.size())+"("+knnCount+"/"+testset.size()+")");
   
    //建立字典管理器
    AlphabetFactory af3 = AlphabetFactory.buildFactory();
    //使用n元特征
    ngrampp = new NGram(new int[] {2,3 });
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af3);
    //将目标值对应的索引号作为类别
    targetpp = new Target2Label(af3.DefaultLabelAlphabet());   
   
    //建立pipe组合
    pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    trainset.setAlphabetFactory(af3)
    trainset.setPipes(pp)
    testset.setAlphabetFactory(af3)
    testset.setPipes(pp);
    for(int i=0;i<trainset.size();i++){
      Instance inst=trainset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }   
    for(int i=0;i<testset.size();i++){
      Instance inst=testset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }     
   
    /**
     * 建立分类器
     */   
    OnlineTrainer trainer3 = new OnlineTrainer(af3);
    Linear pclassifier = trainer3.train(trainset);
    pp.removeTargetPipe();
    pclassifier.setPipe(pp);
    af.setStopIncrement(true);
   
    //将分类器保存到模型文件
    pclassifier.saveTo(linearModelFile)
    pclassifier = null;
   
    //从模型文件读入分类器
    Linear cl =Linear.loadFrom(linearModelFile);
   
    //性能评测
    Evaluation eval = new Evaluation(testset);
    eval.eval(cl,1);
    /**
     * 测试
     */
    System.out.println("类别 : 文本内容");
    System.out.println("===================");
    count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
     
      Integer gold = (Integer) data.getTarget();
      String pred_label = cl.getStringLabel(data);
      String gold_label = cl.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetliner.getInstance(i).getSource());
View Full Code Here

    String[][] labelsSet = new String[testSet.size()][];
    String[][] targetSet = new String[testSet.size()][];
    LabelAlphabet labels = cl.getAlphabetFactory().buildLabelAlphabet(
        "labels");
    for (int i = 0; i < testSet.size(); i++) {
      Instance carrier = testSet.get(i);
      int[] pred = (int[]) cl.classify(carrier).getLabel(0);
      if (acc) {
        len += pred.length;
        double e = loss.calc(carrier.getTarget(), pred);
        error += e;
        if(e != 0)
          senError++;
        //测试中英混杂语料
        if(hasENG) {
          String[][] origin = (String[][])carrier.getSource();
          int[] target = (int[])carrier.getTarget();
          for(int j = 0; j < target.length; j++) {
            if(origin[j][0].contains("ENG")) {
              ENG_all++;
              if(target[j] == pred[j])
                ENG_right++;
            }
          }
        }
      }
      labelsSet[i] = labels.lookupString(pred);
      targetSet[i] = labels.lookupString((int[])carrier.getTarget());
    }

    long endtime = System.currentTimeMillis();
    System.out.println("totaltime\t" + (endtime - starttime) / 1000.0);
    System.out.println("feature\t" + (featuretime - starttime) / 1000.0);
View Full Code Here

    String[][] labelsSet = new String[testSet.size()][];
    String[][] targetSet = new String[testSet.size()][];
    LabelAlphabet labels = cl.getAlphabetFactory().buildLabelAlphabet(
        "labels");
    for (int i = 0; i < testSet.size(); i++) {
      Instance carrier = testSet.get(i);
      int[] pred = (int[]) cl.classify(carrier).getLabel(0);
      if (acc) {
        len += pred.length;
        double e = loss.calc(carrier.getTarget(), pred);
        error += e;
        if(e != 0)
          senError++;
        //测试中英混杂语料
        if(hasENG) {
          String[][] origin = (String[][])carrier.getSource();
          int[] target = (int[])carrier.getTarget();
          for(int j = 0; j < target.length; j++) {
            if(origin[j][0].contains("ENG")) {
              ENG_all++;
              if(target[j] == pred[j])
                ENG_right++;
            }
          }
        }
      }
      labelsSet[i] = labels.lookupString(pred);
      targetSet[i] = labels.lookupString((int[])carrier.getTarget());
    }

    long endtime = System.currentTimeMillis();
    System.out.println("totaltime\t" + (endtime - starttime) / 1000.0);
    System.out.println("feature\t" + (featuretime - starttime) / 1000.0);
View Full Code Here

    pool = Executors.newFixedThreadPool(numThread);
    f= new ArrayList<Future>();
  }

  public void classify(String c) throws Exception{
    Instance inst = new Instance(c);

    ClassifyTask t = new ClassifyTask(inst);
    f.add(pool.submit(t));   
  }
View Full Code Here

    String path=f.getPath();
    int pos=path.lastIndexOf("\\");
    path=path.substring(0, pos);
    pos=path.lastIndexOf("\\");
    path=path.substring(pos+1);
    cur = new Instance(buff.toString(), path);
    cur.setTempData(f.getPath());
    buff = null;
  }
View Full Code Here

     * 分类器使用
     */
    String str = "韦德:不拿冠军就是失败 詹皇:没拿也不意味失败";
    System.out.println("============\n分类:"+ str);
    Pipe p = bayes.getPipe();
    Instance inst = new Instance(str);
    try {
      //特征转换
      p.addThruPipe(inst);
    } catch (Exception e) {
      e.printStackTrace();
    }
    String res = bayes.getStringLabel(inst);
    System.out.println("xxx")
    System.out.println("类别:"+ res)
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
   
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {1,2});
    //分词
//    CWSTagger tag = new CWSTagger("../models/seg.m");
//    Pipe segpp=new CNPipe(tag);
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af)
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});
   
    InstanceSet instset = new InstanceSet(pp,af);
   
    //用不同的Reader读取相应格式的文件
    Reader reader = new FileReader(trainDataPath,"UTF-8",".data");
   
    //读入数据,并进行数据处理
    instset.loadThruStagePipes(reader);
    //将数据集分为训练是和测试集
    float percent = 0.8f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]

    /**
     * 测试
     */
    System.out.println("类别 : 文本内容");
    System.out.println("===================");
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
     
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=bayes.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
//      String pred_label = bayes.getStringLabel(data);
      String gold_label = bayes.getLabel(gold);
     
View Full Code Here

     
      beginTimeIter = System.currentTimeMillis();
      for (int ii = 0; ii < numSamples; ii++) {
       
        k++;
        Instance inst = trainset.getInstance(ii);
        Predict pred = (Predict) inferencer.getBest(inst,2);       
       
        float l = loss.calc(pred.getLabel(0), inst.getTarget());
        if (l > 0) {
          err += l;
          errtot++;
          update.update(inst, weights, k, extraweight, pred.getLabel(0), c);
         
        }else{
          if (pred.size() > 1)
            update.update(inst, weights, k, extraweight, pred.getLabel(1), c);
        }
        cnt += inst.length();
        cnttot++;       

        if (!simpleOutput && progress != 0 && ii % progress == 0) {
          System.out.print('.');
          progress += frac;
View Full Code Here

  public void evaluate(InstanceSet devset) {
    float err = 0;
    float errtot = 0;
    int total = 0;
    for (int i = 0; i < devset.size(); i++) {
      Instance inst = devset.getInstance(i);
      total += inst.length();
      Predict pred = (Predict) inferencer.getBest(inst);
      float l = loss.calc(pred.getLabel(0), inst.getTarget());
      if (l > 0) {
        errtot += 1.0;
        err += l;
      }
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.Instance

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.