Package cc.mallet.types

Examples of cc.mallet.types.FeatureVector


      Variable var2 = lblseq.varOfIndex (t + 1, lvl2);
      assert var1 != null : "Couldn't get label factor "+lvl1+" time "+t;
      assert var2 != null : "Couldn't get label factor "+lvl2+" time "+(t+1);

      Variable[] vars = new Variable[] { var1, var2 };
      FeatureVector fv = fvs.getFeatureVector (t);
      ACRF.UnrolledVarSet vs = new ACRF.UnrolledVarSet (graph, this, vars, fv);
      graph.addClique (vs);
    }
  }
View Full Code Here


      PropertyList features = null;
      features = addExactMatch(records, fieldAlph, valueAlph, features);
      features = addApproxMatch(records, fieldAlph, valueAlph, features);
      features = addSubstringMatch(records, fieldAlph, valueAlph, features);
      carrier
          .setData(new FeatureVector(getDataAlphabet(), features,
              true));

      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      String label = (original.getLabel(cluster1[0]) == original
          .getLabel(cluster2[0])) ? "YES" : "NO";
View Full Code Here

      for (int fi = 0; fi < exactMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && exactMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(exactMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(exactMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                if (valsj.contains(valueAlph.lookupObject(valsi
                    .indexAtLocation(ii)))) {
                  matches++;
                  break;
                }
              }
View Full Code Here

      for (int fi = 0; fi < approxMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && approxMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(approxMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(approxMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                String si = (String) valueAlph
                    .lookupObject(valsi.indexAtLocation(ii));
                for (int jj = 0; jj < valsj.numLocations(); jj++) {
                  String sj = (String) valueAlph
                      .lookupObject(valsj
                          .indexAtLocation(jj));
                  if (Strings.levenshteinDistance(si, sj) < approxMatchThreshold) {
                    matches++;
                    break;
                  }
View Full Code Here

      for (int fi = 0; fi < substringMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && substringMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(substringMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(substringMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                String si = (String) valueAlph
                .lookupObject(valsi.indexAtLocation(ii));
                if (si.length() < 2) break;
                for (int jj = 0; jj < valsj.numLocations(); jj++) {
                  String sj = (String) valueAlph
                      .lookupObject(valsj
                          .indexAtLocation(jj));
                  if (sj.length() > 2 && (si.contains(si) || sj.contains(si))) {
                    matches++;
                    break;
                  }
View Full Code Here

      return getLeaf (node.child0, fv);
  }

  public Classification classify (Instance instance)
  {
    FeatureVector fv = (FeatureVector) instance.getData ();
    assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ());
   
    Node leaf = getLeaf (root, fv);
    return new Classification (instance, this, leaf.labeling);
  }
View Full Code Here

        throw new IllegalStateException ("Frozen.  Cannot split.");
      InstanceList ilist0 = new InstanceList (ilist.getPipe());
      InstanceList ilist1 = new InstanceList (ilist.getPipe());
      for (int i = 0; i < ilist.size(); i++) {
        Instance instance = ilist.get(i);
        FeatureVector fv = (FeatureVector) instance.getData ();
        // xxx What test should this be?  What to do with negative values?
          // Whatever is decided here should also go in InfoGain.calcInfoGains()
        if (fv.value (featureIndex) != 0) {
          //System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
          ilist1.add (instance, ilist.getInstanceWeight(i));
        } else {
          //System.out.println ("list0 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
          ilist0.add (instance, ilist.getInstanceWeight(i));
View Full Code Here

      // loop through all instances
      for (int ii = 0; ii < trainingList.size(); ii++) {
        Instance inst = trainingList.get(ii);
        Labeling labeling = inst.getLabeling ();
        FeatureVector fv = (FeatureVector) inst.getData();
        int fvisize = fv.numLocations();
        int correctIndex = labeling.getBestIndex();
        Arrays.fill(results, 0);

        // compute dot(x, wi) for each class i
        for(int lpos = 0; lpos < numLabels; lpos++) {
          for(int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            double vi = fv.valueAtLocation(fvi);
            results[lpos] += vi * m_weights[lpos][fi];
          }

          // This extra value comes from the extra
          // "feature" present in all examples
          results[lpos] += m_weights[lpos][numFeats];
        }

        // Get indices of the classes with the 2 highest dot products
        int predictedIndex = 0;
        int secondHighestIndex = 0;
        double max = Double.MIN_VALUE;
        double secondMax = Double.MIN_VALUE;
        for (int i = 0; i < numLabels; i++) {
          if (results[i] > max) {
            secondMax = max;
            max = results[i];
            secondHighestIndex = predictedIndex;
            predictedIndex = i;
          }
          else if (results[i] > secondMax) {
            secondMax = results[i];
            secondHighestIndex = i;
          }
        }

        // Adjust weights if this example is mispredicted
        // or just barely correct
        if (predictedIndex != correctIndex) {
          for (int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            m_weights[predictedIndex][fi] *= (1 - epsilon);
            m_weights[correctIndex][fi] *= (1 + epsilon);
          }
          m_weights[predictedIndex][numFeats] *= (1 - epsilon);
          m_weights[correctIndex][numFeats] *= (1 + epsilon);
        }
        else if (max/secondMax - 1 < m_delta) {
          for (int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            m_weights[secondHighestIndex][fi] *= (1 - epsilon);
            m_weights[correctIndex][fi] *= (1 + epsilon);
          }
          m_weights[secondHighestIndex][numFeats] *= (1 - epsilon);
          m_weights[correctIndex][numFeats] *= (1 + epsilon);
View Full Code Here

    //        int numFeatures = getAlphabet().size() + 1;
    int numFeatures = this.defaultFeatureIndex + 1;

    int numLabels = getLabelAlphabet().size();
    assert (scores.length == numLabels);
    FeatureVector fv = (FeatureVector) instance.getData ();
    // Make sure the feature vector's feature dictionary matches
    // what we are expecting from our data pipe (and thus our notion
    // of feature probabilities.
    assert (fv.getAlphabet ()
        == this.instancePipe.getDataAlphabet ());

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
      scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
View Full Code Here

            {
               StringBuffer buf = new StringBuffer();
              for (int a = 0; a < k; a++)
                 buf.append(outputs[a].get(j).toString()).append(" ");
              if (includeInput) {
                FeatureVector fv = (FeatureVector)input.get(j);
                buf.append(fv.toString(true));               
              }
              System.out.println(buf.toString());
            }
            System.out.println();
          }
View Full Code Here

TOP

Related Classes of cc.mallet.types.FeatureVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.