Package cc.mallet.types

Examples of cc.mallet.types.Alphabet


  }

  public MaxEntOptimizableByLabelDistribution (InstanceList trainingSet, MaxEnt initialClassifier)
  {
    this.trainingList = trainingSet;
    Alphabet fd = trainingSet.getDataAlphabet();
    LabelAlphabet ld = (LabelAlphabet) trainingSet.getTargetAlphabet();
    // Don't fd.stopGrowth, because someone might want to do feature induction
    ld.stopGrowth();
    // Add one feature for the "default feature".
    this.numLabels = ld.size();
    this.numFeatures = fd.size() + 1;
    this.defaultFeatureIndex = numFeatures-1;
    this.parameters = new double [numLabels * numFeatures];
    this.constraints = new double [numLabels * numFeatures];
    this.cachedGradient = new double [numLabels * numFeatures];
    Arrays.fill (parameters, 0.0);
    Arrays.fill (constraints, 0.0);
    Arrays.fill (cachedGradient, 0.0);
    this.featureSelection = trainingSet.getFeatureSelection();
    this.perLabelFeatureSelection = trainingSet.getPerLabelFeatureSelection();
    // Add the default feature index to the selection
    if (featureSelection != null)
      featureSelection.add (defaultFeatureIndex);
    if (perLabelFeatureSelection != null)
      for (int i = 0; i < perLabelFeatureSelection.length; i++)
        perLabelFeatureSelection[i].add (defaultFeatureIndex);
    // xxx Later change this to allow both to be set, but select which one to use by a boolean flag?
    assert (featureSelection == null || perLabelFeatureSelection == null);
    if (initialClassifier != null) {
      this.theClassifier = initialClassifier;
      this.parameters = theClassifier.parameters;
      this.featureSelection = theClassifier.featureSelection;
      this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection;
      this.defaultFeatureIndex = theClassifier.defaultFeatureIndex;
      assert (initialClassifier.getInstancePipe() == trainingSet.getPipe());
    }
    else if (this.theClassifier == null) {
      this.theClassifier = new MaxEnt (trainingSet.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
    }
    cachedValueStale = true;
    cachedGradientStale = true;

    // Initialize the constraints
    logger.fine("Number of instances in training list = " + trainingList.size());
    for (Instance inst : trainingList) {
      double instanceWeight = trainingList.getInstanceWeight(inst);
      Labeling labeling = inst.getLabeling ();
      if (labeling == null)
        continue;
      //logger.fine ("Instance "+ii+" labeling="+labeling);
      FeatureVector fv = (FeatureVector) inst.getData ();
      Alphabet fdict = fv.getAlphabet();
      assert (fv.getAlphabet() == fd);

      // Here is the difference between this code and the single label
      //  version: rather than only picking out the "best" index,
      //  loop over all label indices.
     
      for (int pos = 0; pos < labeling.numLocations(); pos++){
        MatrixOps.rowPlusEquals (constraints, numFeatures,
                     labeling.indexAtLocation(pos),
                     fv,
                     instanceWeight*labeling.valueAtLocation(pos));
      }

      assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN";

      boolean hasNaN = false;
      for (int i = 0; i < fv.numLocations(); i++) {
        if (Double.isNaN(fv.valueAtLocation(i))) {
          logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString());
          hasNaN = true;
        }
      }
      if (hasNaN)
        logger.info("NaN in instance: " + inst.getName());
View Full Code Here


    print(System.out);
  }

  public void print (PrintStream out)
  {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();

    // Include the feature weights according to each label
    for (int li = 0; li < numLabels; li++) {
      out.println ("FEATURES FOR CLASS "+labelDict.lookupObject (li));
      out.println (" <default> "+parameters [li*numFeatures + defaultFeatureIndex]);
      for (int i = 0; i < defaultFeatureIndex; i++) {
        Object name = dict.lookupObject (i);
        double weight = parameters [li*numFeatures + i];
        out.println (" "+name+" "+weight);
      }
    }
  }
View Full Code Here

  }

  //printRank, added by Limin Yao
  public void printRank (PrintWriter out)
  {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();
  // Include the feature weights according to each label
    RankedFeatureVector rfv;
    double[] weights = new double[numFeatures-1]; // do not deal with the default feature
    for (int li = 0; li < numLabels; li++) {
View Full Code Here

    }
  }

  public void printExtremeFeatures (PrintWriter out,int num)
  {
    final Alphabet dict = getAlphabet();
    final LabelAlphabet labelDict = getLabelAlphabet();

    int numFeatures = dict.size() + 1;
    int numLabels = labelDict.size();

    // Include the feature weights according to each label
    RankedFeatureVector rfv;
    double[] weights = new double[numFeatures-1]; // do not deal with the default feature
    for (int li = 0; li < numLabels; li++) {
      out.print ("FEATURES FOR CLASS "+labelDict.lookupObject (li) + " ");
      for (int i = 0; i < defaultFeatureIndex; i++) {
        Object name = dict.lookupObject (i);
        double weight = parameters [li*numFeatures + i];
        weights[i] = weight;
      }
      rfv = new RankedFeatureVector(dict,weights);
      rfv.printTopK(out,num);
View Full Code Here

        model.setTopicDisplay(0,0);
        model.setNumIterations(2000);
        model.estimate();

        // Get the results
        Alphabet dataAlphabet = instances.getDataAlphabet();
        ArrayList<TopicAssignment> assignments = model.getData();

        // Convert the results into comprehensible topics
        for (int topicNum = 0; topicNum < model.getNumTopics(); topicNum++) {
            TreeSet<IDSorter> sortedWords = model.getSortedWords().get(topicNum);
View Full Code Here

TOP

Related Classes of cc.mallet.types.Alphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.