Package weka.filters.unsupervised.instance

Source Code of weka.filters.unsupervised.instance.NonSparseToSparse

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
*    NonSparseToSparse.java
*    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
*
*/


package weka.filters.unsupervised.instance;

import java.util.Enumeration;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;

/**
<!-- globalinfo-start -->
* An instance filter that converts all incoming instances into sparse format.
* <p/>
<!-- globalinfo-end -->
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision: 5987 $
*/
public class NonSparseToSparse
  extends Filter
  implements UnsupervisedFilter, StreamableFilter, OptionHandler {

  /** for serialization */
  static final long serialVersionUID = 4694489111366063852L;
 
  protected boolean m_encodeMissingAsZero = false;
 
  protected boolean m_insertDummyNominalFirstValue = false;
 
  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "An instance filter that converts all incoming instances"
      + " into sparse format.";
  }

  /**
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enableAllAttributes();
    result.enable(Capability.MISSING_VALUES);
   
    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);
   
    return result;
  }
 
  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector result;
   
    result = new Vector();
    result.add(new Option("\tTreat missing values as zero.",
        "M", 0, "-M"));
    result.add(new Option("\tAdd a dummy first value for nominal attributes.",
        "F", 0, "-F"));
   
    return result.elements();
  }
 
  public void setOptions(String[] options) throws Exception {
    m_encodeMissingAsZero = Utils.getFlag('M', options);
    m_insertDummyNominalFirstValue = Utils.getFlag('F', options);
  }
 
  public String[] getOptions() {
    Vector result = new Vector();
   
    if (m_encodeMissingAsZero) {
      result.add("-M");     
    }
   
    if (m_insertDummyNominalFirstValue) {
      result.add("-F");
    }
   
    return (String[]) result.toArray(new String[result.size()]);
  }
 
  /**
   * Set whether missing values should be treated in the same
   * way as zeros
   *
   * @param m true if missing values are to be treated the same
   * as zeros
   */
  public void setTreatMissingValuesAsZero(boolean m) {
    m_encodeMissingAsZero = m;
  }
 
  /**
   * Get whether missing values are to be treated in the same
   * way as zeros
   *
   * @return true if missing values are to be treated in the
   * same way as zeros
   */
  public boolean getTreatMissingValuesAsZero() {
    return m_encodeMissingAsZero;
  }
 
  /**
   * Returns the tip text for this property
   *
   * @return            tip text for this property suitable for
   *                    displaying in the explorer/experimenter gui
   */
  public String treatMissingValuesAsZeroTipText() {
    return "Treat missing values in the same way as zeros.";
  }
 
  /**
   * Set whether to insert a dummy first value in the definition
   * for each nominal attribute or not.
   *
   * @param d true if a dummy value is to be inserted for
   * each nominal attribute.
   */
  public void setInsertDummyNominalFirstValue(boolean d) {
    m_insertDummyNominalFirstValue = d;
  }
 
  /**
   * Get whether a dummy first value will be inserted in the definition
   * of each nominal attribute.
   *
   * @return true if a dummy first value will be inserted for each nominal
   * attribute.
   */
  public boolean getInsertDummyNominalFirstValue() {
    return m_insertDummyNominalFirstValue;
  }
 
  /**
   * Returns the tip text for this property
   *
   * @return            tip text for this property suitable for
   *                    displaying in the explorer/experimenter gui
   */
  public String insertDummyNominalFirstValueTipText() {
    return "Insert a dummy value before the first declared value "
    + "for all nominal attributes. Useful when converting market "
    + "basket data that has been encoded for Apriori to sparse format. "
    + "Typically used in conjuction with treat missing values as zero.";
   
       
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance
   * structure (any instances contained in the object are ignored - only the
   * structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if format cannot be processed
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    super.setInputFormat(instanceInfo);
    Instances instNew = instanceInfo;
   
    if (m_insertDummyNominalFirstValue) {
      FastVector atts = new FastVector();
      for (int i = 0; i < instanceInfo.numAttributes(); i++) {
        if (instanceInfo.attribute(i).isNominal()) {
          FastVector labels = new FastVector();
          labels.addElement("_d");
          for (int j = 0; j < instanceInfo.attribute(j).numValues(); j++) {
            labels.addElement(instanceInfo.attribute(i).value(j));
          }
          Attribute newAtt = new Attribute(instanceInfo.attribute(i).name(),
              labels);
          atts.addElement(newAtt);
        } else {
          atts.addElement(instanceInfo.attribute(i));
        }
      }
      instNew = new Instances(instanceInfo.relationName(), atts, 0);
    }
   
    setOutputFormat(instNew);
    return true;
  }


  /**
   * Input an instance for filtering. Ordinarily the instance is processed
   * and made available for output immediately. Some filters require all
   * instances be read before producing output.
   *
   * @param instance the input instance.
   * @return true if the filtered instance may now be
   * collected with output().
   * @throws IllegalStateException if no input format has been set.
   */
  public boolean input(Instance instance) {

    Instance newInstance = null;
   
    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }
   
    if (m_encodeMissingAsZero && !m_insertDummyNominalFirstValue) {
      Instance tempInst = (Instance)instance.copy();
      tempInst.setDataset(getInputFormat());
     
      for (int i = 0; i < tempInst.numAttributes(); i++) {
        if (tempInst.isMissing(i)) {
          tempInst.setValue(i, 0);
        }
      }
      instance = tempInst;
    }
   
    if (m_insertDummyNominalFirstValue) {
      double[] values = instance.toDoubleArray();     
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (instance.attribute(i).isNominal()) {
          if (!Utils.isMissingValue(values[i])) {
            values[i]++;
          }
        }
        if (m_encodeMissingAsZero && Utils.isMissingValue(values[i])) {
          values[i] = 0;
        }
      }
      newInstance = new SparseInstance(instance.weight(), values);
      newInstance.setDataset(getOutputFormat());
      push(newInstance);
    } else {
      newInstance = new SparseInstance(instance);
      newInstance.setDataset(instance.dataset());
      push(newInstance);
    }
   
    /*Instance inst = new SparseInstance(instance);
    inst.setDataset(instance.dataset());
    push(inst); */
    return true;
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 5987 $");
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments to the filter: use -h for help
   */
  public static void main(String [] argv) {
    runFilter(new NonSparseToSparse(), argv);
  }
}
TOP

Related Classes of weka.filters.unsupervised.instance.NonSparseToSparse

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.