Package cc.mallet.types

Examples of cc.mallet.types.Alphabet


        LinkedHashSet<Integer> stateIndices = new LinkedHashSet<Integer>();
        stateIndices.add(labelIndex);
        labelToState.put(labelIndex, stateIndices);
      }
    } else {
      stateAlphabet = new Alphabet();

      labelIter = labelAlphabet.iterator();
      while (labelIter.hasNext()) {
        String label = (String) labelIter.next();
        labelToState.put(labelAlphabet.lookupIndex(label, false),
View Full Code Here


   
   
   
    if (p.isTargetProcessing())
    {
      Alphabet targets = p.getTargetAlphabet();
      StringBuffer buf = new StringBuffer("Labels:");
      for (int i = 0; i < targets.size(); i++)
        buf.append(" ").append(targets.lookupObject(i).toString());
      logger.info(buf.toString());
    }
    if (trainOption.value)
    {
      crf = train(trainingData, testData, eval,
View Full Code Here

     * Creates a new
     * <code>SimpleTaggerSentence2FeatureVectorSequence</code> instance.
     */
    public SimpleTaggerSentence2FeatureVectorSequence ()
    {
      super (new Alphabet(), new LabelAlphabet());
    }
View Full Code Here

    }

    public Instance pipe (Instance carrier)
    {
      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;
      if (inputData instanceof String)
        tokens = parseSentence((String)inputData);
      else if (inputData instanceof String[][])
        tokens = (String[][])inputData;
      else
        throw new IllegalArgumentException("Not a String or String[][]; got "+inputData);
      FeatureVector[] fvs = new FeatureVector[tokens.length];
      if (isTargetProcessing())
      {
        labels = (LabelAlphabet)getTargetAlphabet();
        target = new LabelSequence (labels, tokens.length);
      }
      for (int l = 0; l < tokens.length; l++) {
        int nFeatures;
        if (isTargetProcessing())
        {
          if (tokens[l].length < 1)
            throw new IllegalStateException ("Missing label at line " + l + " instance "+carrier.getName ());
          nFeatures = tokens[l].length - 1;
          target.add(tokens[l][nFeatures]);
        }
        else nFeatures = tokens[l].length;
        ArrayList<Integer> featureIndices = new ArrayList<Integer>();
        for (int f = 0; f < nFeatures; f++) {
          int featureIndex = features.lookupIndex(tokens[l][f]);
          // gdruck
          // If the data alphabet's growth is stopped, featureIndex
          // will be -1.  Ignore these features.
          if (featureIndex >= 0) {
            featureIndices.add(featureIndex);
View Full Code Here

public class SvmLight2FeatureVectorAndLabel extends Pipe {

  private static final long serialVersionUID = 1L;
 
  public SvmLight2FeatureVectorAndLabel () {
    super (new Alphabet(), new LabelAlphabet());
  }
View Full Code Here

   
   
   
    if (p.isTargetProcessing())
    {
      Alphabet targets = p.getTargetAlphabet();
      StringBuffer buf = new StringBuffer("Labels:");
      for (int i = 0; i < targets.size(); i++)
        buf.append(" ").append(targets.lookupObject(i).toString());
      logger.info(buf.toString());
    }
    if (trainOption.value)
    {
      crf = train(trainingData, testData, eval,
View Full Code Here

   * @return ArrayList with the int indices of the selected features.
   */
  public static ArrayList<Integer> selectTopLDAFeatures(int numSelFeatures, ParallelTopicModel lda, Alphabet alphabet) {
    ArrayList<Integer> features = new ArrayList<Integer>();

    Alphabet seqAlphabet = lda.getAlphabet();
   
    int numTopics = lda.getNumTopics();
   
    Object[][] sorted = lda.getTopWords(seqAlphabet.size());

    for (int pos = 0; pos < seqAlphabet.size(); pos++) {
      for (int ti = 0; ti < numTopics; ti++) {
        Object feat = sorted[ti][pos].toString();
        int fi = alphabet.lookupIndex(feat,false);
        if ((fi >=0) && (!features.contains(fi))) {
          logger.info("Selected feature: " + feat);
View Full Code Here

     * Creates a new
     * <code>SimpleTaggerSentence2FeatureVectorSequence</code> instance.
     */
    public SimpleTaggerSentence2FeatureVectorSequence ()
    {
      super (new Alphabet(), new LabelAlphabet());
    }
View Full Code Here

    }

    public Instance pipe (Instance carrier)
    {
      Object inputData = carrier.getData();
      Alphabet features = getDataAlphabet();
      LabelAlphabet labels;
      LabelSequence target = null;
      String [][] tokens;
      if (inputData instanceof String)
        tokens = parseSentence((String)inputData);
      else if (inputData instanceof String[][])
        tokens = (String[][])inputData;
      else
        throw new IllegalArgumentException("Not a String or String[][]; got "+inputData);
      FeatureVector[] fvs = new FeatureVector[tokens.length];
      if (isTargetProcessing())
      {
        labels = (LabelAlphabet)getTargetAlphabet();
        target = new LabelSequence (labels, tokens.length);
      }
      for (int l = 0; l < tokens.length; l++) {
        int nFeatures;
        if (isTargetProcessing())
        {
          if (tokens[l].length < 1)
            throw new IllegalStateException ("Missing label at line " + l + " instance "+carrier.getName ());
          nFeatures = tokens[l].length - 1;
          target.add(tokens[l][nFeatures]);
        }
        else nFeatures = tokens[l].length;
        ArrayList<Integer> featureIndices = new ArrayList<Integer>();
        for (int f = 0; f < nFeatures; f++) {
          int featureIndex = features.lookupIndex(tokens[l][f]);
          // gdruck
          // If the data alphabet's growth is stopped, featureIndex
          // will be -1.  Ignore these features.
          if (featureIndex >= 0) {
            featureIndices.add(featureIndex);
View Full Code Here

    public double [] initialWeights; // indexed by state index
    public double [] finalWeights; // indexed by state index
   
    /** Construct a new empty Factors with a new empty weightsAlphabet, 0-length initialWeights and finalWeights, and the other arrays null. */
    public Factors () {
      weightAlphabet = new Alphabet();
      initialWeights = new double[0];
      finalWeights = new double[0];
      // Leave the rest as null.  They will get set later by addState() and addWeight()
      // Alternatively, we could create zero-length arrays
    }
View Full Code Here

TOP

Related Classes of cc.mallet.types.Alphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.