Package weka.attributeSelection

Source Code of weka.attributeSelection.ConsistencySubsetEval$hashKey

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
*    ConsistencySubsetEval.java
*    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
*
*/

package  weka.attributeSelection;

import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.filters.Filter;
import weka.filters.supervised.attribute.Discretize;

import java.io.Serializable;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;

/**
<!-- globalinfo-start -->
* ConsistencySubsetEval :<br/>
* <br/>
* Evaluates the worth of a subset of attributes by the level of consistency in the class values when the training instances are projected onto the subset of attributes. <br/>
* <br/>
* Consistency of any subset can never be lower than that of the full set of attributes, hence the usual practice is to use this subset evaluator in conjunction with a Random or Exhaustive search which looks for the smallest subset with consistency equal to that of the full set of attributes.<br/>
* <br/>
* For more information see:<br/>
* <br/>
* H. Liu, R. Setiono: A probabilistic approach to feature selection - A filter solution. In: 13th International Conference on Machine Learning, 319-327, 1996.
* <p/>
<!-- globalinfo-end -->
*
<!-- technical-bibtex-start -->
* BibTeX:
* <pre>
* &#64;inproceedings{Liu1996,
*    author = {H. Liu and R. Setiono},
*    booktitle = {13th International Conference on Machine Learning},
*    pages = {319-327},
*    title = {A probabilistic approach to feature selection - A filter solution},
*    year = {1996}
* }
* </pre>
* <p/>
<!-- technical-bibtex-end -->
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.17 $
* @see Discretize
*/
public class ConsistencySubsetEval
  extends ASEvaluation
  implements SubsetEvaluator,
             TechnicalInformationHandler {
 
  /** for serialization */
  static final long serialVersionUID = -2880323763295270402L;
 
  /** training instances */
  private Instances m_trainInstances;

  /** class index */
  private int m_classIndex;

  /** number of attributes in the training data */
  private int m_numAttribs;

  /** number of instances in the training data */
  private int m_numInstances;

  /** Discretise numeric attributes */
  private Discretize m_disTransform;

  /** Hash table for evaluating feature subsets */
  private Hashtable m_table;

  /**
   * Class providing keys to the hash table.
   */
  public class hashKey
    implements Serializable, RevisionHandler {
   
    /** for serialization */
    static final long serialVersionUID = 6144138512017017408L;
   
    /** Array of attribute values for an instance */
    private double [] attributes;
   
    /** True for an index if the corresponding attribute value is missing. */
    private boolean [] missing;

    /** The key */
    private int key;

    /**
     * Constructor for a hashKey
     *
     * @param t an instance from which to generate a key
     * @param numAtts the number of attributes
     * @throws Exception if something goes wrong
     */
    public hashKey(Instance t, int numAtts) throws Exception {

      int i;
      int cindex = t.classIndex();

      key = -999;
      attributes = new double [numAtts];
      missing = new boolean [numAtts];
      for (i=0;i<numAtts;i++) {
  if (i == cindex) {
    missing[i] = true;
  } else {
    if ((missing[i] = t.isMissing(i)) == false) {
      attributes[i] = t.value(i);
    }
  }
      }
    }

    /**
     * Convert a hash entry to a string
     *
     * @param t the set of instances
     * @param maxColWidth width to make the fields
     * @return the hash entry as string
     */
    public String toString(Instances t, int maxColWidth) {

      int i;
      int cindex = t.classIndex();
      StringBuffer text = new StringBuffer();
     
      for (i=0;i<attributes.length;i++) {
  if (i != cindex) {
    if (missing[i]) {
      text.append("?");
      for (int j=0;j<maxColWidth;j++) {
        text.append(" ");
      }
    } else {
      String ss = t.attribute(i).value((int)attributes[i]);
      StringBuffer sb = new StringBuffer(ss);
     
      for (int j=0;j < (maxColWidth-ss.length()+1); j++) {
    sb.append(" ");
      }
      text.append(sb);
    }
  }
      }
      return text.toString();
    }

    /**
     * Constructor for a hashKey
     *
     * @param t an array of feature values
     */
    public hashKey(double [] t) {

      int i;
      int l = t.length;

      key = -999;
      attributes = new double [l];
      missing = new boolean [l];
      for (i=0;i<l;i++) {
  if (t[i] == Double.MAX_VALUE) {
    missing[i] = true;
  } else {
    missing[i] = false;
    attributes[i] = t[i];
  }
      }
    }
   
    /**
     * Calculates a hash code
     *
     * @return the hash code as an integer
     */
    public int hashCode() {

      int hv = 0;
     
      if (key != -999)
  return key;
      for (int i=0;i<attributes.length;i++) {
  if (missing[i]) {
    hv += (i*13);
  } else {
    hv += (i * 5 * (attributes[i]+1));
  }
      }
      if (key == -999) {
  key = hv;
      }
      return hv;
    }

    /**
     * Tests if two instances are equal
     *
     * @param b a key to compare with
     * @return true if the objects are equal
     */
    public boolean equals(Object b) {
     
      if ((b == null) || !(b.getClass().equals(this.getClass()))) {
        return false;
      }
      boolean ok = true;
      boolean l;
      if (b instanceof hashKey) {
  hashKey n = (hashKey)b;
  for (int i=0;i<attributes.length;i++) {
    l = n.missing[i];
    if (missing[i] || l) {
      if ((missing[i] && !l) || (!missing[i] && l)) {
        ok = false;
        break;
      }
    } else {
      if (attributes[i] != n.attributes[i]) {
        ok = false;
        break;
      }
    }
  }
      } else {
  return false;
      }
      return ok;
    }
   
    /**
     * Prints the hash code
     */
    public void print_hash_code() {
     
      System.out.println("Hash val: "+hashCode());
    }
   
    /**
     * Returns the revision string.
     *
     * @return    the revision
     */
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.17 $");
    }
  }

  /**
   * Returns a string describing this search method
   * @return a description of the search suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "ConsistencySubsetEval :\n\nEvaluates the worth of a subset of "
      +"attributes by the level of consistency in the class values when the "
      +"training instances are projected onto the subset of attributes. "
      +"\n\nConsistency of any subset can never be lower than that of the "
      +"full set of attributes, hence the usual practice is to use this "
      +"subset evaluator in conjunction with a Random or Exhaustive search "
      +"which looks for the smallest subset with consistency equal to that "
      +"of the full set of attributes.\n\n"
      + "For more information see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   *
   * @return the technical information about this class
   */
  public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation   result;
   
    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "H. Liu and R. Setiono");
    result.setValue(Field.TITLE, "A probabilistic approach to feature selection - A filter solution");
    result.setValue(Field.BOOKTITLE, "13th International Conference on Machine Learning");
    result.setValue(Field.YEAR, "1996");
    result.setValue(Field.PAGES, "319-327");
   
    return result;
  }

  /**
   * Constructor. Calls restOptions to set default options
   **/
  public ConsistencySubsetEval () {
    resetOptions();
  }

  /**
   * reset to defaults
   */
  private void resetOptions () {
    m_trainInstances = null;
  }

  /**
   * Returns the capabilities of this evaluator.
   *
   * @return            the capabilities of this evaluator
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
   
    // attributes
    result.enable(Capability.NOMINAL_ATTRIBUTES);
    result.enable(Capability.NUMERIC_ATTRIBUTES);
    result.enable(Capability.DATE_ATTRIBUTES);
    result.enable(Capability.MISSING_VALUES);
   
    // class
    result.enable(Capability.NOMINAL_CLASS);
    result.enable(Capability.MISSING_CLASS_VALUES);
   
    return result;
  }

  /**
   * Generates a attribute evaluator. Has to initialize all fields of the
   * evaluator that are not being set via options.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the evaluator has not been
   * generated successfully
   */
  public void buildEvaluator (Instances data) throws Exception {
   
    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    m_trainInstances = new Instances(data);
    m_trainInstances.deleteWithMissingClass();
    m_classIndex = m_trainInstances.classIndex();
    m_numAttribs = m_trainInstances.numAttributes();
    m_numInstances = m_trainInstances.numInstances();

    m_disTransform = new Discretize();
    m_disTransform.setUseBetterEncoding(true);
    m_disTransform.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, m_disTransform);
  }

  /**
   * Evaluates a subset of attributes
   *
   * @param subset a bitset representing the attribute subset to be
   * evaluated
   * @throws Exception if the subset could not be evaluated
   */
  public double evaluateSubset (BitSet subset) throws Exception {
    int [] fs;
    int i;
    int count = 0;

    for (i=0;i<m_numAttribs;i++) {
      if (subset.get(i)) {
  count++;
      }
    }

    double [] instArray = new double[count];
    int index = 0;
    fs = new int[count];
    for (i=0;i<m_numAttribs;i++) {
      if (subset.get(i)) {
  fs[index++] = i;
      }
    }
   
    // create new hash table
    m_table = new Hashtable((int)(m_numInstances * 1.5));
   
    for (i=0;i<m_numInstances;i++) {
      Instance inst = m_trainInstances.instance(i);
      for (int j=0;j<fs.length;j++) {
  if (fs[j] == m_classIndex) {
    throw new Exception("A subset should not contain the class!");
  }
  if (inst.isMissing(fs[j])) {
    instArray[j] = Double.MAX_VALUE;
  } else {
    instArray[j] = inst.value(fs[j]);
  }
      }
      insertIntoTable(inst, instArray);
    }

    return consistencyCount();
  }

  /**
   * calculates the level of consistency in a dataset using a subset of
   * features. The consistency of a hash table entry is the total number
   * of instances hashed to that location minus the number of instances in
   * the largest class hashed to that location. The total consistency is
   * 1.0 minus the sum of the individual consistencies divided by the
   * total number of instances.
   * @return the consistency of the hash table as a value between 0 and 1.
   */
  private double consistencyCount() {
    Enumeration e = m_table.keys();
    double [] classDist;
    double count = 0.0;
   
    while (e.hasMoreElements()) {
      hashKey tt = (hashKey)e.nextElement();
      classDist = (double []) m_table.get(tt);
      count += Utils.sum(classDist);
      int max = Utils.maxIndex(classDist);
      count -= classDist[max];
    }

    count /= (double)m_numInstances;
    return (1.0 - count);
  }

  /**
   * Inserts an instance into the hash table
   *
   * @param inst instance to be inserted
   * @param instA the instance to be inserted as an array of attribute
   * values.
   * @throws Exception if the instance can't be inserted
   */
  private void insertIntoTable(Instance inst, double [] instA)
       throws Exception {

    double [] tempClassDist2;
    double [] newDist;
    hashKey thekey;

    thekey = new hashKey(instA);

    // see if this one is already in the table
    tempClassDist2 = (double []) m_table.get(thekey);
    if (tempClassDist2 == null) {
      newDist = new double [m_trainInstances.classAttribute().numValues()];
      newDist[(int)inst.classValue()] = inst.weight();
     
      // add to the table
      m_table.put(thekey, newDist);
    } else {
      // update the distribution for this instance
      tempClassDist2[(int)inst.classValue()]+=inst.weight();
     
      // update the table
      m_table.put(thekey, tempClassDist2);
    }
  }

  /**
   * returns a description of the evaluator
   * @return a description of the evaluator as a String.
   */
  public String toString() {
    StringBuffer text = new StringBuffer();

    if (m_trainInstances == null) {
      text.append("\tConsistency subset evaluator has not been built yet\n");
    }
    else {
      text.append("\tConsistency Subset Evaluator\n");
    }

    return text.toString();
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.17 $");
  }

  /**
   * Main method for testing this class.
   *
   * @param args the options
   */
  public static void main (String[] args) {
    runEvaluator(new ConsistencySubsetEval(), args);
  }
}
TOP

Related Classes of weka.attributeSelection.ConsistencySubsetEval$hashKey

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.