Examples of cc.mallet.types.FeatureVector

cc.mallet.types.FeatureVector
A subset of an {@link cc.mallet.types.Alphabet} in which each element of the subset has an associated value.The subset is represented as a {@link cc.mallet.types.SparseVector}
A SparseVector represents only the non-zero locations of a vector. In the case of a FeatureVector, a location represents the index of an entry in the Alphabet that is contained in the FeatureVector.
To loop over the elements of a feature vector, one loops over the consecutive integers between 0 and the number of locations in the feature vector. From these locations one can cheaply obtain the index of the entry in the underlying Alphabet, the entry itself, and the value in this feature vector associated the entry.
A SparseVector (or FeatureVector) can be sparse or dense depending on whether or not an array if indices is specified at construction time. If the FeatureVector is dense, the mapping from location to index is the identity mapping.
The associated value of an element in a SparseVector (or FeatureVector) can be a double or binary (0.0 or 1.0), depending on whether an array of doubles is specified at contruction time. @see SparseVector @see Alphabet @author Andrew McCallum mccallum@cs.umass.edu

      Variable var2 = lblseq.varOfIndex (t + 1, lvl2);
      assert var1 != null : "Couldn't get label factor "+lvl1+" time "+t;
      assert var2 != null : "Couldn't get label factor "+lvl2+" time "+(t+1);


      Variable[] vars = new Variable[] { var1, var2 };
      FeatureVector fv = fvs.getFeatureVector (t);
      ACRF.UnrolledVarSet vs = new ACRF.UnrolledVarSet (graph, this, vars, fv);
      graph.addClique (vs);
    }
  }

View Full Code Here

      PropertyList features = null;
      features = addExactMatch(records, fieldAlph, valueAlph, features);
      features = addApproxMatch(records, fieldAlph, valueAlph, features);
      features = addSubstringMatch(records, fieldAlph, valueAlph, features);
      carrier
          .setData(new FeatureVector(getDataAlphabet(), features,
              true));


      LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet();
      String label = (original.getLabel(cluster1[0]) == original
          .getLabel(cluster2[0])) ? "YES" : "NO";

View Full Code Here

      for (int fi = 0; fi < exactMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && exactMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(exactMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(exactMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                if (valsj.contains(valueAlph.lookupObject(valsi
                    .indexAtLocation(ii)))) {
                  matches++;
                  break;
                }
              }

View Full Code Here

      for (int fi = 0; fi < approxMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && approxMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(approxMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(approxMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                String si = (String) valueAlph
                    .lookupObject(valsi.indexAtLocation(ii));
                for (int jj = 0; jj < valsj.numLocations(); jj++) {
                  String sj = (String) valueAlph
                      .lookupObject(valsj
                          .indexAtLocation(jj));
                  if (Strings.levenshteinDistance(si, sj) < approxMatchThreshold) {
                    matches++;
                    break;
                  }

View Full Code Here

      for (int fi = 0; fi < substringMatchFields.length; fi++) {
        int matches = 0;
        int comparisons = 0;
        for (int i = 0; i < records.length
            && substringMatchFields.length > 0; i++) {
          FeatureVector valsi = records[i]
              .values(substringMatchFields[fi]);
          for (int j = i + 1; j < records.length && valsi != null; j++) {
            FeatureVector valsj = records[j]
                .values(substringMatchFields[fi]);
            if (valsj != null) {
              comparisons++;
              for (int ii = 0; ii < valsi.numLocations(); ii++) {
                String si = (String) valueAlph
                .lookupObject(valsi.indexAtLocation(ii));
                if (si.length() < 2) break;
                for (int jj = 0; jj < valsj.numLocations(); jj++) {
                  String sj = (String) valueAlph
                      .lookupObject(valsj
                          .indexAtLocation(jj));
                  if (sj.length() > 2 && (si.contains(si) || sj.contains(si))) {
                    matches++;
                    break;
                  }

View Full Code Here

      return getLeaf (node.child0, fv);
  }


  public Classification classify (Instance instance)
  {
    FeatureVector fv = (FeatureVector) instance.getData ();
    assert (instancePipe == null || fv.getAlphabet () == this.instancePipe.getDataAlphabet ());
    
    Node leaf = getLeaf (root, fv);
    return new Classification (instance, this, leaf.labeling);
  }

View Full Code Here

        throw new IllegalStateException ("Frozen.  Cannot split.");
      InstanceList ilist0 = new InstanceList (ilist.getPipe());
      InstanceList ilist1 = new InstanceList (ilist.getPipe());
      for (int i = 0; i < ilist.size(); i++) {
        Instance instance = ilist.get(i);
        FeatureVector fv = (FeatureVector) instance.getData ();
        // xxx What test should this be?  What to do with negative values?
          // Whatever is decided here should also go in InfoGain.calcInfoGains()
        if (fv.value (featureIndex) != 0) {
          //System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
          ilist1.add (instance, ilist.getInstanceWeight(i));
        } else {
          //System.out.println ("list0 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i));
          ilist0.add (instance, ilist.getInstanceWeight(i));

View Full Code Here


      // loop through all instances
      for (int ii = 0; ii < trainingList.size(); ii++) {
        Instance inst = trainingList.get(ii);
        Labeling labeling = inst.getLabeling ();
        FeatureVector fv = (FeatureVector) inst.getData();
        int fvisize = fv.numLocations();
        int correctIndex = labeling.getBestIndex();
        Arrays.fill(results, 0);


        // compute dot(x, wi) for each class i
        for(int lpos = 0; lpos < numLabels; lpos++) {
          for(int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            double vi = fv.valueAtLocation(fvi);
            results[lpos] += vi * m_weights[lpos][fi];
          }


          // This extra value comes from the extra
          // "feature" present in all examples
          results[lpos] += m_weights[lpos][numFeats];
        }


        // Get indices of the classes with the 2 highest dot products
        int predictedIndex = 0;
        int secondHighestIndex = 0;
        double max = Double.MIN_VALUE;
        double secondMax = Double.MIN_VALUE;
        for (int i = 0; i < numLabels; i++) {
          if (results[i] > max) {
            secondMax = max;
            max = results[i];
            secondHighestIndex = predictedIndex;
            predictedIndex = i;
          }
          else if (results[i] > secondMax) {
            secondMax = results[i];
            secondHighestIndex = i;
          }
        }


        // Adjust weights if this example is mispredicted
        // or just barely correct
        if (predictedIndex != correctIndex) {
          for (int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            m_weights[predictedIndex][fi] *= (1 - epsilon);
            m_weights[correctIndex][fi] *= (1 + epsilon);
          }
          m_weights[predictedIndex][numFeats] *= (1 - epsilon);
          m_weights[correctIndex][numFeats] *= (1 + epsilon);
        }
        else if (max/secondMax - 1 < m_delta) {
          for (int fvi = 0; fvi < fvisize; fvi++) {
            int fi = fv.indexAtLocation(fvi);
            m_weights[secondHighestIndex][fi] *= (1 - epsilon);
            m_weights[correctIndex][fi] *= (1 + epsilon);
          }
          m_weights[secondHighestIndex][numFeats] *= (1 - epsilon);
          m_weights[correctIndex][numFeats] *= (1 + epsilon);

View Full Code Here

    //        int numFeatures = getAlphabet().size() + 1;
    int numFeatures = this.defaultFeatureIndex + 1;


    int numLabels = getLabelAlphabet().size();
    assert (scores.length == numLabels);
    FeatureVector fv = (FeatureVector) instance.getData ();
    // Make sure the feature vector's feature dictionary matches
    // what we are expecting from our data pipe (and thus our notion
    // of feature probabilities.
    assert (fv.getAlphabet ()
        == this.instancePipe.getDataAlphabet ());


    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
      scores[li] = parameters[li*numFeatures + defaultFeatureIndex]

View Full Code Here

            {
               StringBuffer buf = new StringBuffer();
              for (int a = 0; a < k; a++)
                 buf.append(outputs[a].get(j).toString()).append(" ");
              if (includeInput) {
                FeatureVector fv = (FeatureVector)input.get(j);
                buf.append(fv.toString(true));                
              }
              System.out.println(buf.toString());
            }
            System.out.println();
          }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of cc.mallet.types.FeatureVector

cc.mallet.classify.BalancedWinnow

cc.mallet.classify.BalancedWinnowTrainer

cc.mallet.classify.C45

cc.mallet.classify.C45$Node

cc.mallet.classify.Classification

cc.mallet.classify.constraints.ge.MaxEntFLGEConstraints

cc.mallet.classify.constraints.ge.MaxEntRangeL2FLGEConstraints

cc.mallet.classify.constraints.pr.MaxEntFLPRConstraints

cc.mallet.classify.DecisionTree

cc.mallet.classify.DecisionTree$Node

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.