Package cc.mallet.types

Examples of cc.mallet.types.InstanceList


  // Tests that setWeightsDimensionDensely respects featureSelections
  public void testDenseFeatureSelection() {
    Pipe p = makeSpacePredictionPipe();

    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));

    // Test that dense observations wights aren't added for
    // "default-feature" edges.
    CRF crf1 = new CRF(p, null);
    crf1.addOrderNStates(instances, new int[] { 0 }, null, "start", null,
View Full Code Here


  }

  public void testXis() {
    Pipe p = makeSpacePredictionPipe();

    InstanceList instances = new InstanceList(p);
    instances.addThruPipe(new ArrayIterator(data));

    CRF crf1 = new CRF(p, null);
    crf1.addFullyConnectedStatesForLabels();
    CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(
        crf1);
    crft1.train(instances, 10); // Let's get some parameters

    Instance inst = instances.get(0);
    Sequence input = (Sequence) inst.getData();
    SumLatticeDefault lattice = new SumLatticeDefault(crf1, input,
        (Sequence) inst.getTarget(), null, true);
    for (int ip = 0; ip < lattice.length() - 1; ip++) {
      for (int i = 0; i < crf1.numStates(); i++) {
View Full Code Here

    double deltaPoints = (double) instances.size();
    int iterations = 0;
    SparseVector clusterMean;

    for (int c = 0; c < numClusters; c++) {
      instanceClusters.add(c, new InstanceList(instancePipe));
    }

    logger.info("Entering KMeans iteration");

    while (deltaMeans > MEANS_TOLERANCE && iterations < MAX_ITER
        && deltaPoints > instances.size() * POINTS_TOLERANCE) {

      iterations++;
      deltaPoints = 0;

      // For each instance, measure its distance to the current cluster
      // means, and subsequently assign it to the closest cluster
      // by adding it to an corresponding instance list
      // The mean of each cluster InstanceList is then updated.
      for (int n = 0; n < instances.size(); n++) {

        instClust = 0;
        instClustDist = Double.MAX_VALUE;

        for (int c = 0; c < numClusters; c++) {
          instDist = metric.distance(clusterMeans.get(c),
              (SparseVector) instances.get(n).getData());

          if (instDist < instClustDist) {
            instClust = c;
            instClustDist = instDist;
          }
        }
        // Add to closest cluster & label it such
        instanceClusters.get(instClust).add(instances.get(n));

        if (clusterLabels[n] != instClust) {
          clusterLabels[n] = instClust;
          deltaPoints++;
        }

      }

      deltaMeans = 0;

      for (int c = 0; c < numClusters; c++) {

        if (instanceClusters.get(c).size() > 0) {
          clusterMean = VectorStats.mean(instanceClusters.get(c));

          deltaMeans += metric.distance(clusterMeans.get(c), clusterMean);

          clusterMeans.set(c, clusterMean);

          instanceClusters.set(c, new InstanceList(instancePipe));

        } else {

          logger.info("Empty cluster found.");

          switch (emptyAction) {
            case EMPTY_ERROR:
              return null;
            case EMPTY_DROP:
              logger.fine("Removing cluster " + c);
              clusterMeans.remove(c);
              instanceClusters.remove(c);
              for (int n = 0; n < instances.size(); n++) {

                assert (clusterLabels[n] != c) : "Cluster size is "
                    + instanceClusters.get(c).size()
                    + "+ yet clusterLabels[n] is " + clusterLabels[n];

                if (clusterLabels[n] > c)
                  clusterLabels[n]--;
              }

              numClusters--;
              c--; // <-- note this trickiness. bad style? maybe.
              // it just means now that we've deleted the entry,
              // we have to repeat the index to get the next entry.
              break;

            case EMPTY_SINGLE:

              // Get the instance the furthest from any centroid
              // and make it a new centroid.

              double newCentroidDist = 0;
              int newCentroid = 0;
              InstanceList cacheList = null;

              for (int clusters = 0; clusters < clusterMeans.size(); clusters++) {
                SparseVector centroid = clusterMeans.get(clusters);
                InstanceList centInstances = instanceClusters.get(clusters);

                // Dont't create new empty clusters.

                if (centInstances.size() <= 1)
                  continue;
                for (int n = 0; n < centInstances.size(); n++) {
                  double currentDist = metric.distance(centroid,
                      (SparseVector) centInstances.get(n).getData());
                  if (currentDist > newCentroidDist) {
                    newCentroid = n;
                    newCentroidDist = currentDist;
                    cacheList = centInstances;

View Full Code Here

          MEMM.State source = (MEMM.State) ti.getSourceState();
          if (count != 0) {
            // Create the source state's trainingSet if it doesn't exist yet.
            if (source.trainingSet == null)
              // New InstanceList with a null pipe, because it doesn't do any processing of input.
              source.trainingSet = new InstanceList (null);
            // TODO We should make sure we don't add duplicates (through a second call to setWeightsDimenstion..!
            // TODO Note that when the training data still allows ambiguous outgoing transitions
            // this will add the same FV more than once to the source state's trainingSet, each
            // with >1.0 weight.  Not incorrect, but inefficient.
//            System.out.println ("From: "+source.getName()+" ---> "+getOutput()+" : "+getInput());
View Full Code Here

  public void printInstanceLists ()
  {
    for (int i = 0; i < memm.numStates(); i++) {
      State state = (State) memm.getState (i);
      InstanceList training = state.trainingSet;
      System.out.println ("State "+i+" : "+state.getName());
      if (training == null) {
        System.out.println ("No data");
        continue;
      }
      for (int j = 0; j < training.size(); j++) {
        Instance inst = training.get (j);
        System.out.println ("From : "+state.getName()+" To : "+inst.getTarget());
        System.out.println ("Instance "+j);
        System.out.println (inst.getTarget());
        System.out.println (inst.getData());
      }
View Full Code Here

 
  public InstanceList getInstances () { return this.instances; }

  /** Return an list of instances with a particular label. */
  public InstanceList getCluster(int label) {   
    InstanceList cluster = new InstanceList(instances.getPipe());   
    for (int n=0 ; n<instances.size() ; n++)
      if (labels[n] == label)
        cluster.add(instances.get(n));     
    return cluster;
  }
View Full Code Here

  @Override
  public double[] getEvaluationScores(Clustering truth, Clustering predicted) {
    double precision = 0.0;
    double recall = 0.0;

    InstanceList instances = truth.getInstances();

    for (int i = 0; i < instances.size(); i++) {
      int trueLabel = truth.getLabel(i);
      int predLabel = predicted.getLabel(i);
      int[] trueIndices = truth.getIndicesWithLabel(trueLabel);
      int[] predIndices = predicted.getIndicesWithLabel(predLabel);

      int correct = 0;
      for (int j = 0; j < predIndices.length; j++) {
        for (int k = 0; k < trueIndices.length; k++)
          if (trueIndices[k] == predIndices[j])
            correct++;
      }     
      precision += (double)correct / predIndices.length;
      recall += (double)correct / trueIndices.length;   
    }

    macroPrecision += precision;
    macroRecall += recall;
    macroNumInstances += instances.size();

    precision /= instances.size();
    recall /= instances.size();
    return new double[]{precision, recall, (2 * precision * recall / (precision + recall))};
 
View Full Code Here

   * @param lj
   * @return A new {@link InstanceList} where <code>lj</code> is appended to <code>li</code>.
   */
  public static InstanceList combineLists (InstanceList li,
                                           InstanceList lj) {
    InstanceList newList = new InstanceList(li.getPipe());
    for (int i = 0; i < li.size(); i++)
      newList.add(li.get(i));
    for (int i = 0; i < lj.size(); i++)
      newList.add(lj.get(i));
    return newList;
  }
View Full Code Here

                                          int labeli, int labelj) {
    if (labeli == labelj)
      return clustering;
   
    // Set all labelj labels to labeli.
    InstanceList instances = clustering.getInstances();   
    for (int i = 0; i < instances.size(); i++) {
      int idx = clustering.getLabel(i);
      if (idx == labelj)
        clustering.setLabel(i, labeli);
    }
    clustering.setNumLabels(clustering.getNumClusters() - 1);

    // Decrement cluster indices that are greater than the number of clusters.
    for (int i = 0; i < instances.size(); i++) {
      int idx = clustering.getLabel(i);
      if (idx > labelj)
        clustering.setLabel(i, idx - 1);
    }
   
View Full Code Here

   * @param i
   * @param j
   * @return A new {@link InstanceList} containing the two argument {@link Instance}s.
   */
  public static InstanceList makeList (Instance i, Instance j) {
    InstanceList list = new InstanceList(new Noop(i.getDataAlphabet(), i.getTargetAlphabet()));
    list.add(i);
    list.add(j);
    return list;
  }
View Full Code Here

TOP

Related Classes of cc.mallet.types.InstanceList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.