Package cc.mallet.pipe.tsf

Source Code of cc.mallet.pipe.tsf.OffsetPropertyConjunctions

/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */




/**
   Create new features from all possible conjunctions with other
   (possibly position-offset) features.

   @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/

package cc.mallet.pipe.tsf;

import java.io.*;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import cc.mallet.util.PropertyList;

public class OffsetPropertyConjunctions extends Pipe implements Serializable
{
  int[][] conjunctions;
  boolean includeOriginalSingletons;
  String propertyKey;
 
  // To include all the old previous singleton features, pass {{0}}
  // For a conjunction at the current time step, pass {{0,0}}
  // For a conjunction of current and previous, pass {{0,-1}}
  // For a conjunction of the current and next two, pass {{0,1,2}}
  private OffsetPropertyConjunctions (boolean includeOriginalSingletons, String propertyKey, int[][] conjunctions)
  {
    this.conjunctions = conjunctions;
    this.includeOriginalSingletons = includeOriginalSingletons;
    this.propertyKey = propertyKey;
  }

  public OffsetPropertyConjunctions (boolean includeOriginalSingletons, int[][] conjunctions)
  {
    this (includeOriginalSingletons, null, conjunctions);
  }
   
  public OffsetPropertyConjunctions (int[][] conjunctions)
  {
    this (true, conjunctions);
  }
 
  public Instance pipe (Instance carrier)
  {
    TokenSequence ts = (TokenSequence) carrier.getData();
    int tsSize = ts.size();
    PropertyList[] oldfs = new PropertyList[ts.size()];
    PropertyList[] newfs = new PropertyList[ts.size()];
    for (int i = 0; i < tsSize; i++)
      oldfs[i] = ts.get(i).getFeatures ();
    if (includeOriginalSingletons)
      for (int i = 0; i < tsSize; i++)
        newfs[i] = ts.get(i).getFeatures ();

    for (int i = 0; i < ts.size(); i++) {
      //System.out.println ("OffsetPropertyConjunctions: ts index="+i+", conjunction =");
      conjunctionList: for (int j = 0; j < conjunctions.length; j++) {
        // Make sure that the offsets in the conjunction are all available at this position
        for (int k = 0; k < conjunctions[j].length; k++) {
          if (conjunctions[j][k] + i < 0
              || conjunctions[j][k] + i > tsSize-1
              || oldfs[i+conjunctions[j][k]] == null)
            continue conjunctionList;
          //System.out.print (" "+conjunctions[j][k]);
        }
        //System.out.print ("\n");

        // Add the features for this conjunction
        if (conjunctions[j].length == 1) {
          int offset = conjunctions[j][0];
          if (offset == 0 && includeOriginalSingletons)
            throw new IllegalArgumentException ("Original singletons already there.");
          PropertyList.Iterator iter = oldfs[i+offset].iterator();
          while (iter.hasNext()) {
            iter.next();
            if (propertyKey != null && !propertyKey.equals(iter.getKey()))
              continue;
            String key = iter.getKey() + (offset==0 ? "" : "@"+offset);
            newfs[i] = PropertyList.add (key, iter.getNumericValue(), newfs[i]);
          }

        } else if (conjunctions[j].length == 2) {
          //System.out.println ("token="+ts.getToken(i).getText()+" conjunctionIndex="+j);
          int offset0 = conjunctions[j][0];
          int offset1 = conjunctions[j][1];
          PropertyList.Iterator iter0 = oldfs[i+offset0].iterator();
          int iter0i = -1;
          while (iter0.hasNext()) {
            iter0i++;
            iter0.next();
            if (propertyKey != null && !propertyKey.equals(iter0.getKey()))
              continue;
            PropertyList.Iterator iter1 = oldfs[i+offset1].iterator();
            int iter1i = -1;
            while (iter1.hasNext()) {
              iter1i++;
              iter1.next();
              if (propertyKey != null && !propertyKey.equals(iter1.getKey()))
                continue;
              // Avoid redundant doubling of feature space; include only upper triangle
              //System.out.println ("off0="+offset0+" off1="+offset1+" iter0i="+iter0i+" iter1i="+iter1i);
              if (offset0 == offset1 && iter1i <= iter0i) continue;
              //System.out.println (">off0="+offset0+" off1="+offset1+" iter0i="+iter0i+" iter1i="+iter1i);
              String key = iter0.getKey() + (offset0==0 ? "" : "@"+offset0)
                           +"&"+iter1.getKey() + (offset1==0 ? "" : "@"+offset1);
              newfs[i] = PropertyList.add (key, iter0.getNumericValue() * iter1.getNumericValue(), newfs[i]);
            }
          }

        } else if (conjunctions[j].length == 3) {
          int offset0 = conjunctions[j][0];
          int offset1 = conjunctions[j][1];
          int offset2 = conjunctions[j][2];
          PropertyList.Iterator iter0 = oldfs[i+offset0].iterator();
          int iter0i = -1;
          while (iter0.hasNext()) {
            iter0i++;
            iter0.next();
            if (propertyKey != null && !propertyKey.equals(iter0.getKey()))
              continue;
            PropertyList.Iterator iter1 = oldfs[i+offset1].iterator();
            int iter1i = -1;
            while (iter1.hasNext()) {
              iter1i++;
              iter1.next();
              if (propertyKey != null && !propertyKey.equals(iter1.getKey()))
                continue;
              // Avoid redundant doubling of feature space; include only upper triangle
              if (offset0 == offset1 && iter1i <= iter0i) continue;
              PropertyList.Iterator iter2 = oldfs[i+offset2].iterator();
              int iter2i = -1;
              while (iter2.hasNext()) {
                iter2i++;
                iter2.next();
                if (propertyKey != null && !propertyKey.equals(iter2.getKey()))
                  continue;
                // Avoid redundant doubling of feature space; include only upper triangle
                if (offset1 == offset2 && iter2i <= iter1i) continue;
                String key = iter0.getKey() + (offset0==0 ? "" : "@"+offset0)
                             +"&"+iter1.getKey() + (offset1==0 ? "" : "@"+offset1)
                             +"&"+iter2.getKey() + (offset2==0 ? "" : "@"+offset2);
                newfs[i] = PropertyList.add (key, iter0.getNumericValue() * iter1.getNumericValue()
                                             * iter2.getNumericValue(), newfs[i]);
              }
            }
          }
        } else {
          throw new UnsupportedOperationException ("Conjunctions of length 4 or more not yet implemented.");
        }
      }
    }

    // Put the new PropertyLists in place
    for (int i = 0; i < ts.size(); i++)
      ts.get(i).setFeatures (newfs[i]);
    return carrier;
  }

  // Serialization
 
  private static final long serialVersionUID = 1;
  private static final int CURRENT_SERIAL_VERSION = 0;
  private static final int NULL_INTEGER = -1;
 
  private void writeObject (ObjectOutputStream out) throws IOException {
    out.writeInt (CURRENT_SERIAL_VERSION);
    int size1, size2;
    size1 = (conjunctions == null) ? NULL_INTEGER : conjunctions.length;
    out.writeInt(size1);
    if (size1 != NULL_INTEGER) {
      for (int i = 0; i <size1; i++) {
        size2 = (conjunctions[i] == null) ? NULL_INTEGER: conjunctions.length;
        out.writeInt(size2);
        if (size2 != NULL_INTEGER) {
          for (int j = 0; j <size2; j++) {
            out.writeInt(conjunctions[i][j]);
          }
        }
      }
    }
    out.writeBoolean(includeOriginalSingletons);
  }
 
  private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
    int size1, size2;
    int version = in.readInt ();
    size1 = in.readInt();;
    if (size1 == NULL_INTEGER) {
      conjunctions = null;
    }
    else {
      conjunctions = new int[size1][];
      for (int i = 0; i < size1; i++) {
        size2 = in.readInt();
        if (size2 == NULL_INTEGER) {
          conjunctions[i] = null;
        }
        else {
          conjunctions[i] = new int[size2];
          for (int j = 0; j < size2; j++) {
            conjunctions[i][j] = in.readInt();
          }
        }
      }
    }
    includeOriginalSingletons = in.readBoolean();
  }

}
TOP

Related Classes of cc.mallet.pipe.tsf.OffsetPropertyConjunctions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.