Package org.fnlp.ml.types.alphabet

Examples of org.fnlp.ml.types.alphabet.IFeatureAlphabet


    }

    //类别集合
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    // 特征集合
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });
View Full Code Here


   
    ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream (
        new GZIPOutputStream (new FileOutputStream(to))));
    AlphabetFactory factory = AlphabetFactory.buildFactory();
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();;
    String s;
    rd.readLine()//version
    List lst = new ArrayList();      //template
    while(true) {
      s = rd.readLine();
      if(s.isEmpty()) break;
      lst.add(s);
    }
    out.writeInt(lst.size());
    Iterator it1 = lst.iterator();
    while(it1.hasNext()) {
      out.writeObject(it1.next());
    }
   
    s = rd.readLine();          //#label
    int nLabel = Integer.parseInt(s);
    System.out.println(nLabel);
    for(int i=0; i<nLabel; i++) {
      s = rd.readLine();        //label
      labels.lookupIndex(s);
    }
    out.writeObject(labels);
    rd.readLine();            //blank line
    rd.readLine();            //#column
    rd.readLine();            //blank line
   
    TreeMap map = new TreeMap();
    String[] arr;
   
    s = rd.readLine();          //#feature
    int nFeature = Integer.parseInt(s);
    System.out.println(nFeature);
    for(int i=0; i<nFeature; i++) {
      s = rd.readLine();        //feature: string offset
      arr = s.split("\t");
      map.put(Integer.parseInt(arr[1]), arr[0]);
    }
   
    rd.readLine();            //blank line
    s = rd.readLine();          //#index
    int nIndex = Integer.parseInt(s);
    System.out.println(nIndex);
    int[] index = new int[nIndex];
    for(int i=0; i<nIndex; i++) {
      s = rd.readLine();        //index of feature weight
      index[i] = Integer.parseInt(s);
    }
   
    rd.readLine();            //blank line
    s = rd.readLine();          //#weight
    int nWeight = Integer.parseInt(s);
    System.out.println(nWeight);
    double[] wt = new double[nWeight];
    for(int i=0; i<nWeight; i++) {
      s = rd.readLine();        //weight
      wt[i] = Double.parseDouble(s);
    }
   
    Iterator<Entry> it2 = map.entrySet().iterator();
    Entry e1, e2;
    int key1 = 0, key2 = 0;
    String v1 = null, v2 = null;
    e1 = it2.next();
    while(e1 != null) {
     
      key1 = (Integer) e1.getKey();
      v1 = (String) e1.getValue();
     
      if(it2.hasNext()) {
        e2 = it2.next();
        key2 = (Integer) e2.getKey();
      } else {
        e2 = null;
        key2 = nIndex;
      }
     
      int ofs = features.lookupIndex(v1, key2-key1);
      e1 = e2;
      System.out.print(key1);
      System.out.print('\t');
      System.out.print(ofs);
      System.out.print('\t');
      System.out.println(v1);
    }
   
    System.out.println(features.size());
    out.writeObject(features);
   
    double[] weights = new double[nIndex];
    for(int i=0; i<nIndex; i++) {
      if(index[i] == -1)
View Full Code Here

      throw new LoadModelException("模型为空");
    }

    factory = getClassifier().getAlphabetFactory();
    labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    featurePipe = new Sequence2FeatureSequence(templets, features,
        labels);
  }
View Full Code Here

  public AbstractTagger(AbstractTagger tagger){
   
    setClassifier(tagger.getClassifier());
    factory = getClassifier().getAlphabetFactory();
    labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    featurePipe = new Sequence2FeatureSequence(templets, features,
        labels);
  }
View Full Code Here

      index.put(key, value);
    }
    int[] idx = index.keys();
    Arrays.sort(idx);
    int length = weights.length;
    IFeatureAlphabet newfeat = new StringFeatureAlphabet();
    cl.getAlphabetFactory().setDefaultFeatureAlphabet(newfeat);
    TFloatArrayList ww = new TFloatArrayList();
    float[] vars = new float[idx.length];
    float[] entropy = new float[idx.length];
    for (int i = 0; i < idx.length; i++) {
      int base = idx[i]; //一个特征段起始位置
      int end; //一个特征段结束位置
      if (i < idx.length - 1)
        end = idx[i + 1]; //对应下一个特征段起始位置
      else
        end  = length; //或者整个结束位置
      int interv = end - base;   //一个特征段长度
      float[] sw = new float[interv];
      for (int j = 0; j < interv; j++) {
        sw[j] = weights[base+j];
      }
      //计算方差
//      System.out.println(MyStrings.toString(sw, " "));
      vars[i] = MyArrays.viarance(sw);
      MyArrays.normalize(sw);
      MyArrays.normalize2Prop(sw);
      entropy[i] = MyArrays.entropy(sw);
      int[] maxe = new int[sw.length];
      for(int iii=0;iii<maxe.length;iii++){
        maxe[iii]=1;
      }
      float maxen = MyArrays.entropy(maxe);
      if (i==0||vars[i]>varsthresh&&entropy[i]<maxen*0.999) {
        String str = index.get(base);
        int id = newfeat.lookupIndex(str, interv);
        for (int j = 0; j < interv; j++) {
          ww.insert(id + j, weights[base + j]);
        }
      }else{
//                System.out.print("."); 
      }
    }
    System.out.println("方差均值:"+MyArrays.average(vars));
    System.out.println("方差非零个数:"+MyArrays.countNoneZero(vars));
    System.out.println("方差直方图:"+MyStrings.toString(MyArrays.histogram(vars, 10)));
//    MyArrays.normalize2Prop(entropy);
    System.out.println("熵均值:"+MyArrays.average(entropy));
    System.out.println("熵非零个数:"+MyArrays.countNoneZero(entropy));
    System.out.println("熵直方图:"+MyStrings.toString(MyArrays.histogram(entropy, 10)));
   
    newfeat.setStopIncrement(freeze);
    cl.setWeights(ww.toArray());

    float[] www = cl.getWeights();
    c = MyArrays.countNoneZero(www);

View Full Code Here

     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    /**
     * 特征字典
     */
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    // 将样本通过Pipe抽取特征
   
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });


    System.out.print("读入训练数据 ...");
    InstanceSet trainSet = new InstanceSet(pipe, factory);

    // 训练集
    trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
    System.out.println("训练样本个数 " + trainSet.size());
    System.out.println("标签个数: " + labels.size()); //
    System.out.println("特征个数" + features.size());

    // 冻结特征集
    features.setStopIncrement(true);
    labels.setStopIncrement(true);


    // viterbi解码
    HammingLoss loss = new HammingLoss();
    Inferencer inference = new LinearViterbi(templets, labels.size());
    Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);


    OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
        features.size(), 50,0.1f);

    Linear cl = trainer.train(trainSet);


    // test data没有标注
View Full Code Here

     * 标签转为0、1、2、...
     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();

    // 将样本通过Pipe抽取特征
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });
    return pipe;
  }
View Full Code Here

    LabelAlphabet labels = factory.DefaultLabelAlphabet();

    // 将样本通过Pipe抽取特征
    // 这里用的重建特征,而Label不需要重建
    // 测试时不需要重建特征
    IFeatureAlphabet features = null;
    if(cl != null)
      features = factory.DefaultFeatureAlphabet();
    else
      features = factory.rebuildFeatureAlphabet("feature");
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);
View Full Code Here

    Pipe pipe = createProcessor(false);
    InstanceSet trainSet = new InstanceSet(pipe, factory);

    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    // 训练集
    trainSet.loadThruStagePipes(new SequenceReader(train,true, "utf8"));

    long endTime = System.currentTimeMillis();
    System.out.println(" done!");
    System.out
    .println("Time escape: " + (endTime - beginTime) / 1000 + "s");
    System.out.println();

    // 输出
    System.out.println("Training Number: " + trainSet.size());

    System.out.println("Label Number: " + labels.size()); // 标签个数
    System.out.println("Feature Number: " + features.size()); // 特征个数

    // 冻结特征集
    features.setStopIncrement(true);
    labels.setStopIncrement(true);

    InstanceSet testSet = null;
    // /////////////////
    if (testfile != null) {

      Pipe tpipe;
      if (false) {// 如果test data没有标注
        tpipe = new SeriesPipes(new Pipe[] { featurePipe });
      } else {
        tpipe = pipe;
      }

      // 测试集
      testSet = new InstanceSet(tpipe);

      testSet.loadThruStagePipes(new SequenceReader(testfile, true, "utf8"));
      System.out.println("Test Number: " + testSet.size()); // 样本个数
    }

    /**
     *
     * 更新参数的准则
     */
    Update update;
    // viterbi解码
    Inferencer inference;
    boolean standard = true;
    HammingLoss loss = new HammingLoss();
    if (standard) {
      inference = new LinearViterbi(templets, labels.size());
      update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);
    } else {
      inference = new HigherOrderViterbi(templets, labels.size());
      update = new HigherOrderViterbiPAUpdate(templets, labels.size(), true);
    }

    OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
        features.size(), iterNum, c1);
   
    trainer.innerOptimized = false;
    trainer.finalOptimized = true;

    cl = trainer.train(trainSet, testSet);
View Full Code Here

    buildInstanceList(dataFile);

    LabelAlphabet postagAlphabet = factory.buildLabelAlphabet("postag");

    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    SFGenerator generator = new SFGenerator();
    Linear[] models = new Linear[postagAlphabet.size()];
    int fsize = features.size();

    for (int i = 0; i < postagAlphabet.size(); i++) {
      String pos = postagAlphabet.lookupString(i);
      InstanceSet instset = readInstanceSet(pos);
      LabelAlphabet alphabet = factory.buildLabelAlphabet(pos);
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.alphabet.IFeatureAlphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.