Package weka.filters.unsupervised.attribute

Source Code of weka.filters.unsupervised.attribute.PropositionalToMultiInstance

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* PropositionalToMultiInstance.java
* Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
*
*/

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RelationalLocator;
import weka.core.RevisionUtils;
import weka.core.StringLocator;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
<!-- globalinfo-start -->
* Converts the propositional instance dataset into multi-instance dataset (with relational attribute). When normalize or standardize a multi-instance dataset, a MIToSingleInstance filter can be applied first to convert the multi-instance dataset into propositional instance dataset. After normalization or standardization, may use this PropositionalToMultiInstance filter to convert the data back to multi-instance format.<br/>
* <br/>
* Note: the first attribute of the original propositional instance dataset must be a nominal attribute which is expected to be bagId attribute.
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -S &lt;num&gt;
*  The seed for the randomization of the order of bags. (default 1)</pre>
*
* <pre> -R
*  Randomizes the order of the produced bags after the generation. (default off)</pre>
*
<!-- options-end -->
*
* @author Lin Dong (ld21@cs.waikato.ac.nz)
* @version $Revision: 1.7 $
* @see MultiInstanceToPropositional
*/
public class PropositionalToMultiInstance
  extends Filter
  implements OptionHandler, UnsupervisedFilter {

  /** for serialization */
  private static final long serialVersionUID = 5825873573912102482L;

  /** the seed for randomizing, default is 1 */
  protected int m_Seed = 1;
 
  /** whether to randomize the output data */
  protected boolean m_Randomize = false;

  /** Indices of string attributes in the bag */
  protected StringLocator m_BagStringAtts = null;

  /** Indices of relational attributes in the bag */
  protected RelationalLocator m_BagRelAtts = null;
 
  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return 
        "Converts the propositional instance dataset into multi-instance "
      + "dataset (with relational attribute). When normalize or standardize a "
      + "multi-instance dataset, a MIToSingleInstance filter can be applied "
      + "first to convert the multi-instance dataset into propositional "
      + "instance dataset. After normalization or standardization, may use "
      + "this PropositionalToMultiInstance filter to convert the data back to "
      + "multi-instance format.\n\n"
      + "Note: the first attribute of the original propositional instance "
      + "dataset must be a nominal attribute which is expected to be bagId "
      + "attribute.";

  }

  /**
   * Returns an enumeration describing the available options
   *
   * @return an enumeration of all the available options
   */
  public Enumeration listOptions() {
    Vector result = new Vector();
 
    result.addElement(new Option(
        "\tThe seed for the randomization of the order of bags."
        + "\t(default 1)",
        "S", 1, "-S <num>"));
 
    result.addElement(new Option(
        "\tRandomizes the order of the produced bags after the generation."
        + "\t(default off)",
        "R", 0, "-R"));
 
    return result.elements();
  }


  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -S &lt;num&gt;
   *  The seed for the randomization of the order of bags. (default 1)</pre>
   *
   * <pre> -R
   *  Randomizes the order of the produced bags after the generation. (default off)</pre>
   *
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String        tmpStr;
   
    setRandomize(Utils.getFlag('R', options));
   
    tmpStr = Utils.getOption('S', options);
    if (tmpStr.length() != 0)
      setSeed(Integer.parseInt(tmpStr));
    else
      setSeed(1);
  }

  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {
    Vector        result;
   
    result = new Vector();
   
    result.add("-S");
    result.add("" + getSeed());
   
    if (m_Randomize)
      result.add("-R");

    return (String[]) result.toArray(new String[result.size()]);
  }

  /**
   * Returns the tip text for this property
   *
   * @return     tip text for this property suitable for
   *       displaying in the explorer/experimenter gui
   */
  public String seedTipText() {
    return "The random seed used by the random number generator";
  }

  /**
   * Sets the new seed for randomizing the order of the generated data
   *
   * @param value     the new seed value
   */
  public void setSeed(int value) {
    m_Seed = value;
  }
 
  /**
   * Returns the current seed value for randomizing the order of the generated
   * data
   *
   * @return          the current seed value
   */
  public int getSeed() {
    return m_Seed;
  }
 
  /**
   * Sets whether the order of the generated data is randomized
   *
   * @param value     whether to randomize or not
   */
  public void setRandomize(boolean value) {
    m_Randomize = value;
  }
 
  /**
   * Gets whether the order of the generated is randomized
   *
   * @return      true if the order is randomized
   */
  public boolean getRandomize() {
    return m_Randomize;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String randomizeTipText() {
    return "Whether the order of the generated data is randomized.";
  }

  /**
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();

    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.DATE_ATTRIBUTES);
    result.enable(Capability.STRING_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);
   
    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
   
    return result;
  }
 
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input
   * instance structure (any instances contained in the object are
   * ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the input format can't be set
   * successfully
   */
  public boolean setInputFormat(Instances instanceInfo)
    throws Exception {

    if (instanceInfo.attribute(0).type()!= Attribute.NOMINAL) {
      throw new Exception("The first attribute type of the original propositional instance dataset must be Nominal!");
    }
    super.setInputFormat(instanceInfo);

    /* create a new output format (multi-instance format) */
    Instances newData = instanceInfo.stringFreeStructure();
    Attribute attBagIndex = (Attribute) newData.attribute(0).copy();
    Attribute attClass = (Attribute) newData.classAttribute().copy();
    // remove the bagIndex attribute
    newData.deleteAttributeAt(0);
    // remove the class attribute
    newData.setClassIndex(-1);
    newData.deleteAttributeAt(newData.numAttributes() - 1);

    FastVector attInfo = new FastVector(3);
    attInfo.addElement(attBagIndex);
    attInfo.addElement(new Attribute("bag", newData)); // relation-valued attribute
    attInfo.addElement(attClass);
    Instances data = new Instances("Multi-Instance-Dataset", attInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    super.setOutputFormat(data.stringFreeStructure());

    m_BagStringAtts = new StringLocator(data.attribute(1).relation());
    m_BagRelAtts    = new RelationalLocator(data.attribute(1).relation());
   
    return true;
  }

  /**
   * adds a new bag out of the given data and adds it to the output
   *
   * @param input       the intput dataset
   * @param output      the dataset this bag is added to
   * @param bagInsts    the instances in this bag
   * @param bagIndex    the bagIndex of this bag
   * @param classValue  the associated class value
   * @param bagWeight   the weight of the bag
   */
  protected void addBag(
      Instances input,
      Instances output,
      Instances bagInsts,
      int bagIndex,
      double classValue,
      double bagWeight) {
   
    // copy strings/relational values
    for (int i = 0; i < bagInsts.numInstances(); i++) {
      RelationalLocator.copyRelationalValues(
    bagInsts.instance(i), false,
    input, m_InputRelAtts,
    bagInsts, m_BagRelAtts);

      StringLocator.copyStringValues(
    bagInsts.instance(i), false,
    input, m_InputStringAtts,
    bagInsts, m_BagStringAtts);
    }
   
    int value = output.attribute(1).addRelation(bagInsts);
    Instance newBag = new Instance(output.numAttributes());       
    newBag.setValue(0, bagIndex);
    newBag.setValue(2, classValue);
    newBag.setValue(1, value);
    newBag.setWeight(bagWeight);
    newBag.setDataset(output);
    output.add(newBag);
  }

  /**
   * Adds an output instance to the queue. The derived class should use this
   * method for each output instance it makes available.
   *
   * @param instance the instance to be added to the queue.
   */
  protected void push(Instance instance) {
    if (instance != null) {
      super.push(instance);
      // set correct references
    }
  }
 
  /**
   * Signify that this batch of input to the filter is finished.
   * If the filter requires all instances prior to filtering,
   * output() may now be called to retrieve the filtered instances.
   *
   * @return true if there are instances pending output
   * @throws IllegalStateException if no input structure has been defined
   */
  public boolean batchFinished() {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }

    Instances input = getInputFormat();
    input.sort(0);   // make sure that bagID is sorted
    Instances output = getOutputFormat();
    Instances bagInsts = output.attribute(1).relation();
    Instance inst = new Instance(bagInsts.numAttributes());
    inst.setDataset(bagInsts);

    double bagIndex   = input.instance(0).value(0);
    double classValue = input.instance(0).classValue();
    double bagWeight  = 0.0;

    // Convert pending input instances
    for(int i = 0; i < input.numInstances(); i++) {
      double currentBagIndex = input.instance(i).value(0);

      // copy the propositional instance value, except the bagIndex and the class value
      for (int j = 0; j < input.numAttributes() - 2; j++)
        inst.setValue(j, input.instance(i).value(j + 1));
      inst.setWeight(input.instance(i).weight());

      if (currentBagIndex == bagIndex){
        bagInsts.add(inst);
        bagWeight += inst.weight();
      }
      else{
        addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

        bagInsts   = bagInsts.stringFreeStructure()
        bagInsts.add(inst);
        bagIndex   = currentBagIndex;
        classValue = input.instance(i).classValue();
        bagWeight  = inst.weight();
      }
    }

    // reach the last instance, create and add the last bag
    addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

    if (getRandomize())
      output.randomize(new Random(getSeed()));
   
    for (int i = 0; i < output.numInstances(); i++)
      push(output.instance(i));
   
    // Free memory
    flushInput();

    m_NewBatch = true;
    m_FirstBatchDone = true;
   
    return (numPendingOutput() != 0);
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.7 $");
  }

  /**
   * Main method for running this filter.
   *
   * @param args should contain arguments to the filter:
   * use -h for help
   */
  public static void main(String[] args) {
    runFilter(new PropositionalToMultiInstance(), args);
  }
}
TOP

Related Classes of weka.filters.unsupervised.attribute.PropositionalToMultiInstance

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.