Package cc.mallet.fst.semi_supervised.constraints

Source Code of cc.mallet.fst.semi_supervised.constraints.TwoLabelGEConstraints

/* Copyright (C) 2010 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

package cc.mallet.fst.semi_supervised.constraints;

import gnu.trove.TIntArrayList;
import gnu.trove.TIntIntHashMap;

import java.util.ArrayList;
import java.util.BitSet;

import cc.mallet.fst.SumLattice;
import cc.mallet.fst.semi_supervised.StateLabelMap;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;

/**
* A set of constraints on distributions over pairs of consecutive
* labels conditioned on the presence of input features.   
*
* Subclasses are to be used with GE.
*
* Multiple constraints are grouped together here
* to make things more efficient.
*
* @author Gregory Druck
*/
public abstract class TwoLabelGEConstraints implements GEConstraint {

  protected ArrayList<TwoLabelGEConstraint> constraintsList;
  protected TIntIntHashMap constraintsMap;
  protected StateLabelMap map;
  protected TIntArrayList cache;

  public TwoLabelGEConstraints() {
    this.constraintsList = new ArrayList<TwoLabelGEConstraint>();
    this.constraintsMap = new TIntIntHashMap();
    this.map = null;
    this.cache = new TIntArrayList();
  }
 
  protected TwoLabelGEConstraints(ArrayList<TwoLabelGEConstraint> constraintsList, TIntIntHashMap constraintsMap, StateLabelMap map) {
    this.constraintsList = constraintsList;
    this.constraintsMap = constraintsMap;
    this.map = map;
    this.cache = new TIntArrayList();
  }
 
  /**
   * @param fi Input feature index
   * @param target Target distribution over pairs of labels
   * @param weight Weight of this constraint
   */
  public abstract void addConstraint(int fi, double[][] target, double weight);
 
  public boolean isOneStateConstraint() {
    return false;
  }
 
  public void setStateLabelMap(StateLabelMap map) {
    this.map = map;
  }
 
  public void preProcess(FeatureVector fv) {
    cache.resetQuick();
    int fi;
    for (int loc = 0; loc < fv.numLocations(); loc++) {
      fi = fv.indexAtLocation(loc);
      if (constraintsMap.containsKey(fi)) {
        cache.add(constraintsMap.get(fi));
      }
    }
  }
 
  public BitSet preProcess(InstanceList data) {
    // count
    BitSet bitSet = new BitSet(data.size());
    int ii = 0;
    for (Instance instance : data) {
      FeatureVectorSequence fvs = (FeatureVectorSequence)instance.getData();
      for (int ip = 1; ip < fvs.size(); ip++) {
        for (int fi : constraintsMap.keys()) {
          // binary constraint features
          if (fvs.get(ip).location(fi) >= 0) {
            constraintsList.get(constraintsMap.get(fi)).count += 1;
            bitSet.set(ii);
          }
        }
      }
      ii++;
    }
    return bitSet;
  }   
 
  public double getCompositeConstraintFeatureValue(FeatureVector fv, int ip, int si1, int si2) {
    // to avoid complications with the start state,
    // only consider transitions into states at
    // position >= 1
    if (ip == 0) {
      return 0;
    }
   
    double value = 0;
    int li1 = map.getLabelIndex(si1);
    if (li1 == StateLabelMap.START_LABEL) {
      return 0;
    }
   
    int li2 = map.getLabelIndex(si2);
    for (int i = 0; i < cache.size(); i++) {
      value += constraintsList.get(cache.getQuick(i)).getValue(li1,li2);
    }
    return value;
  }

  public abstract double getValue();

  public void zeroExpectations() {
    for (TwoLabelGEConstraint constraint : constraintsList) {
      constraint.expectation = new double[map.getNumLabels()][map.getNumLabels()];
    }
  }
 
  public void computeExpectations(ArrayList<SumLattice> lattices) {
    double[][][] xis;
    TIntArrayList cache = new TIntArrayList();
    for (int i = 0; i < lattices.size(); i++) {
      if (lattices.get(i) == null) { continue; }
      FeatureVectorSequence fvs = (FeatureVectorSequence)lattices.get(i).getInput();
      SumLattice lattice = lattices.get(i);
      xis = lattice.getXis();
      for (int ip = 1; ip < fvs.size(); ++ip) {
        cache.resetQuick();
        FeatureVector fv = fvs.getFeatureVector(ip);
        int fi;
        for (int loc = 0; loc < fv.numLocations(); loc++) {
          fi = fv.indexAtLocation(loc);
          // binary constraint features
          if (constraintsMap.containsKey(fi)) {
            cache.add(constraintsMap.get(fi));
          }
        }
        for (int prev = 0; prev < map.getNumStates(); ++prev) {
          int liPrev = map.getLabelIndex(prev);
          if (liPrev != StateLabelMap.START_LABEL) {
            for (int curr = 0; curr < map.getNumStates(); ++curr) {
              int liCurr = map.getLabelIndex(curr);
              if (liCurr != StateLabelMap.START_LABEL) {
                double prob = Math.exp(xis[ip][prev][curr]);
                for (int j = 0; j < cache.size(); j++) {
                  constraintsList.get(cache.getQuick(j)).expectation[liPrev][liCurr] += prob;
                }
              }
            }
          }
        }
      }
    }
  }
 
  protected abstract class TwoLabelGEConstraint {
   
    protected double[][] target;
    protected double[][] expectation;
    protected double count;
    protected double weight;
   
    public TwoLabelGEConstraint(double[][] target, double weight) {
      this.target = target;
      this.weight = weight;
      this.expectation = null;
      this.count = 0;
    }
   
    public abstract double getValue(int liPrev, int liCurr);
  }
}
TOP

Related Classes of cc.mallet.fst.semi_supervised.constraints.TwoLabelGEConstraints

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.