Examples of LabelAlphabet


Examples of cc.mallet.types.LabelAlphabet

  /** Tests how serializing labels separately can lead to big losses.
   *   This currently fails.  I'm not sure what to do about this. -cas
   */
  public void testReadResolve () throws IOException, ClassNotFoundException
  {
    LabelAlphabet dict = new LabelAlphabet ();
    dict.lookupIndex ("TEST1");
    dict.lookupIndex ("TEST2");
    dict.lookupIndex ("TEST3");


    Label t1 = dict.lookupLabel ("TEST1");
    Labelee l = new Labelee (dict, t1);
    Labelee l2 = (Labelee) TestSerializable.cloneViaSerialization (l);

    assertTrue (l.dict == l2.dict);
    assertTrue (dict.lookupLabel("TEST1") == l.theLabel);
    assertTrue (dict.lookupLabel("TEST1") == l2.theLabel);
    assertTrue (l.theLabel == l2.theLabel);
  }
View Full Code Here

Examples of cc.mallet.types.LabelAlphabet

    double approxMatchThreshold;

    public ClusteringPipe(int[] exactMatchFields, int[] approxMatchFields,
        int[] substringMatchFields) {
      super(new Alphabet(), new LabelAlphabet());
      this.exactMatchFields = exactMatchFields;
      this.approxMatchFields = approxMatchFields;
      this.substringMatchFields = substringMatchFields;
    }
View Full Code Here

Examples of cc.mallet.types.LabelAlphabet

      features = addSubstringMatch(records, fieldAlph, valueAlph, features);
      carrier
          .setData(new FeatureVector(getDataAlphabet(), features,
              true));

      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      String label = (original.getLabel(cluster1[0]) == original
          .getLabel(cluster2[0])) ? "YES" : "NO";
      carrier.setTarget(ldict.lookupLabel(label));     
      return carrier;
    }
View Full Code Here

Examples of cc.mallet.types.LabelAlphabet

  }

  @Override
  public void print(PrintWriter out) {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
      out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (li));
      out.println (" <default> "+parameters [li*numFeatures + defaultFeatureIndex]);
      for (int i = 0; i < defaultFeatureIndex; i++) {
        Object name = dict.lookupObject (i);
        double weight = parameters [li*numFeatures + i];
        out.println (" "+name+" "+weight);
View Full Code Here

Examples of cc.mallet.types.LabelAlphabet

  //printRank, added by Limin Yao
  public void printRank (PrintWriter out)
  {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
  // Include the feature weights according to each label
    RankedFeatureVector rfv;
    double[] weights = new double[numFeatures-1]; // do not deal with the default feature
    for (int li = 0; li < numLabels; li++) {
      out.print ("FEATURES FOR CLASS "+labelDict.lookupObject (li) + " ");
      for (int i = 0; i < defaultFeatureIndex; i++) {
        double weight = parameters [li*numFeatures + i];
        weights[i] = weight;
      }
      rfv = new RankedFeatureVector(dict,weights);
View Full Code Here

Examples of cc.mallet.types.LabelAlphabet

  }

  public void printExtremeFeatures (PrintWriter out,int num)
  {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();

    // Include the feature weights according to each label
    RankedFeatureVector rfv;
    double[] weights = new double[numFeatures-1]; // do not deal with the default feature
    for (int li = 0; li < numLabels; li++) {
      out.print ("FEATURES FOR CLASS "+labelDict.lookupObject (li) + " ");
      for (int i = 0; i < defaultFeatureIndex; i++) {
        Object name = dict.lookupObject (i);
        double weight = parameters [li*numFeatures + i];
        weights[i] = weight;
      }
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.LabelAlphabet

      templets = new TempletGroup();
      templets.load(templateFile);
    }

    //类别集合
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    // 特征集合
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();

    featurePipe = new Sequence2FeatureSequence(templets, features, labels);
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.LabelAlphabet

          new InputStreamReader(new FileInputStream(from), "gbk"));
   
    ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream (
        new GZIPOutputStream (new FileOutputStream(to))));
    AlphabetFactory factory = AlphabetFactory.buildFactory();
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();;
    String s;
    rd.readLine()//version
    List lst = new ArrayList();      //template
    while(true) {
      s = rd.readLine();
      if(s.isEmpty()) break;
      lst.add(s);
    }
    out.writeInt(lst.size());
    Iterator it1 = lst.iterator();
    while(it1.hasNext()) {
      out.writeObject(it1.next());
    }
   
    s = rd.readLine();          //#label
    int nLabel = Integer.parseInt(s);
    System.out.println(nLabel);
    for(int i=0; i<nLabel; i++) {
      s = rd.readLine();        //label
      labels.lookupIndex(s);
    }
    out.writeObject(labels);
    rd.readLine();            //blank line
    rd.readLine();            //#column
    rd.readLine();            //blank line
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.LabelAlphabet

    int len = 0;
    Loss loss = new HammingLoss();

    String[][] predictSet = new String[testSet.size()][];
    String[][] goldSet = new String[testSet.size()][];
    LabelAlphabet la = cl.getAlphabetFactory().DefaultLabelAlphabet();
    for (int i = 0; i < testSet.size(); i++) {
      Instance carrier = testSet.get(i);
      int[] pred = (int[]) cl.classify(carrier).getLabel(0);
      if (hasLabel) {
        len += pred.length;
        float e = loss.calc(carrier.getTarget(), pred);
        error += e;
        if(e != 0)
          senError++;

      }
      predictSet[i] = la.lookupString(pred);
      if(hasLabel)
        goldSet[i] = la.lookupString((int[])carrier.getTarget());
    }

    long endtime = System.currentTimeMillis();
    System.out.println("总时间:\t" + (endtime - starttime) / 1000.0);
    System.out.println("抽取特征时间:\t" + (featuretime - starttime) / 1000.0);
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.LabelAlphabet

    factory = AlphabetFactory.buildFactory();

    /**
     * 标签字典。转为0、1、2、...
     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    /**
     * 特征字典
     */
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    // 将样本通过Pipe抽取特征
   
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });


    System.out.print("读入训练数据 ...");
    InstanceSet trainSet = new InstanceSet(pipe, factory);

    // 训练集
    trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
    System.out.println("训练样本个数 " + trainSet.size());
    System.out.println("标签个数: " + labels.size()); //
    System.out.println("特征个数" + features.size());

    // 冻结特征集
    features.setStopIncrement(true);
    labels.setStopIncrement(true);


    // viterbi解码
    HammingLoss loss = new HammingLoss();
    Inferencer inference = new LinearViterbi(templets, labels.size());
    Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);


    OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
        features.size(), 50,0.1f);

    Linear cl = trainer.train(trainSet);


    // test data没有标注
    Pipe tpipe = featurePipe;
    // 测试集
    InstanceSet testSet = new InstanceSet(tpipe);

    testSet.loadThruPipes(new SequenceReader(testfile, false, "utf8"));
    System.out.println("测试样本个数: " + testSet.size()); //
    String[][] labelsSet = new String[testSet.size()][];
    for (int i = 0; i < testSet.size(); i++) {
      Instance carrier = testSet.get(i);
      int[] pred = (int[]) cl.classify(carrier).getLabel(0);
      labelsSet[i] = labels.lookupString(pred);
    }
   
    String s = SimpleFormatter.format(testSet, labelsSet);
    System.out.println(s);
    System.out.println("Done");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.