/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* SubsetByExpression.java
* Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
*/
package weka.filters.unsupervised.instance;
import weka.core.Capabilities;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.SimpleBatchFilter;
import weka.filters.unsupervised.instance.subsetbyexpression.Parser;
import java.util.Enumeration;
import java.util.Vector;
/**
<!-- globalinfo-start -->
* Filters instances according to a user-specified expression.<br/>
* <br/>
* Grammar:<br/>
* <br/>
* boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;<br/>
* <br/>
* boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;<br/>
* <br/>
* boolexpr ::= BOOLEAN <br/>
* | true<br/>
* | false<br/>
* | expr < expr<br/>
* | expr <= expr<br/>
* | expr > expr<br/>
* | expr >= expr<br/>
* | expr = expr<br/>
* | ( boolexpr )<br/>
* | not boolexpr<br/>
* | boolexpr and boolexpr<br/>
* | boolexpr or boolexpr<br/>
* | ATTRIBUTE is STRING<br/>
* ;<br/>
* <br/>
* expr ::= NUMBER<br/>
* | ATTRIBUTE<br/>
* | ( expr )<br/>
* | opexpr<br/>
* | funcexpr<br/>
* ;<br/>
* <br/>
* opexpr ::= expr + expr<br/>
* | expr - expr<br/>
* | expr * expr<br/>
* | expr / expr<br/>
* ;<br/>
* <br/>
* funcexpr ::= abs ( expr )<br/>
* | sqrt ( expr )<br/>
* | log ( expr )<br/>
* | exp ( expr )<br/>
* | sin ( expr )<br/>
* | cos ( expr )<br/>
* | tan ( expr )<br/>
* | rint ( expr )<br/>
* | floor ( expr )<br/>
* | pow ( expr for base , expr for exponent )<br/>
* | ceil ( expr )<br/>
* ;<br/>
* <br/>
* Notes:<br/>
* - NUMBER<br/>
* any integer or floating point number <br/>
* (but not in scientific notation!)<br/>
* - STRING<br/>
* any string surrounded by single quotes; <br/>
* the string may not contain a single quote though.<br/>
* - ATTRIBUTE<br/>
* the following placeholders are recognized for <br/>
* attribute values:<br/>
* - CLASS for the class value in case a class attribute is set.<br/>
* - ATTxyz with xyz a number from 1 to # of attributes in the<br/>
* dataset, representing the value of indexed attribute.<br/>
* <br/>
* Examples:<br/>
* - extracting only mammals and birds from the 'zoo' UCI dataset:<br/>
* (CLASS is 'mammal') or (CLASS is 'bird')<br/>
* - extracting only animals with at least 2 legs from the 'zoo' UCI dataset:<br/>
* (ATT14 >= 2)<br/>
* - extracting only instances with non-missing 'wage-increase-second-year'<br/>
* from the 'labor' UCI dataset:<br/>
* not ismissing(ATT3)<br/>
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -E <expr>
* The expression to use for filtering
* (default: true).</pre>
*
<!-- options-end -->
*
* @author fracpete (fracpete at waikato dot ac dot nz)
* @version $Revision: 6113 $
*/
public class SubsetByExpression
extends SimpleBatchFilter {
/** for serialization. */
private static final long serialVersionUID = 5628686110979589602L;
/** the expresion to use for filtering. */
protected String m_Expression = "true";
/**
* Returns a string describing this filter.
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return
"Filters instances according to a user-specified expression.\n\n"
+ "Grammar:\n\n"
+ "boolexpr_list ::= boolexpr_list boolexpr_part | boolexpr_part;\n"
+ "\n"
+ "boolexpr_part ::= boolexpr:e {: parser.setResult(e); :} ;\n"
+ "\n"
+ "boolexpr ::= BOOLEAN \n"
+ " | true\n"
+ " | false\n"
+ " | expr < expr\n"
+ " | expr <= expr\n"
+ " | expr > expr\n"
+ " | expr >= expr\n"
+ " | expr = expr\n"
+ " | ( boolexpr )\n"
+ " | not boolexpr\n"
+ " | boolexpr and boolexpr\n"
+ " | boolexpr or boolexpr\n"
+ " | ATTRIBUTE is STRING\n"
+ " ;\n"
+ "\n"
+ "expr ::= NUMBER\n"
+ " | ATTRIBUTE\n"
+ " | ( expr )\n"
+ " | opexpr\n"
+ " | funcexpr\n"
+ " ;\n"
+ "\n"
+ "opexpr ::= expr + expr\n"
+ " | expr - expr\n"
+ " | expr * expr\n"
+ " | expr / expr\n"
+ " ;\n"
+ "\n"
+ "funcexpr ::= abs ( expr )\n"
+ " | sqrt ( expr )\n"
+ " | log ( expr )\n"
+ " | exp ( expr )\n"
+ " | sin ( expr )\n"
+ " | cos ( expr )\n"
+ " | tan ( expr )\n"
+ " | rint ( expr )\n"
+ " | floor ( expr )\n"
+ " | pow ( expr for base , expr for exponent )\n"
+ " | ceil ( expr )\n"
+ " ;\n"
+ "\n"
+ "Notes:\n"
+ "- NUMBER\n"
+ " any integer or floating point number \n"
+ " (but not in scientific notation!)\n"
+ "- STRING\n"
+ " any string surrounded by single quotes; \n"
+ " the string may not contain a single quote though.\n"
+ "- ATTRIBUTE\n"
+ " the following placeholders are recognized for \n"
+ " attribute values:\n"
+ " - CLASS for the class value in case a class attribute is set.\n"
+ " - ATTxyz with xyz a number from 1 to # of attributes in the\n"
+ " dataset, representing the value of indexed attribute.\n"
+ "\n"
+ "Examples:\n"
+ "- extracting only mammals and birds from the 'zoo' UCI dataset:\n"
+ " (CLASS is 'mammal') or (CLASS is 'bird')\n"
+ "- extracting only animals with at least 2 legs from the 'zoo' UCI dataset:\n"
+ " (ATT14 >= 2)\n"
+ "- extracting only instances with non-missing 'wage-increase-second-year'\n"
+ " from the 'labor' UCI dataset:\n"
+ " not ismissing(ATT3)\n"
;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result;
result = new Vector();
result.addElement(new Option(
"\tThe expression to use for filtering\n"
+ "\t(default: true).",
"E", 1, "-E <expr>"));
return result.elements();
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -E <expr>
* The expression to use for filtering
* (default: true).</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
tmpStr = Utils.getOption('E', options);
if (tmpStr.length() != 0)
setExpression(tmpStr);
else
setExpression("true");
if (getInputFormat() != null)
setInputFormat(getInputFormat());
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector<String> result;
result = new Vector();
result.add("-E");
result.add("" + getExpression());
return result.toArray(new String[result.size()]);
}
/**
* Returns the Capabilities of this filter.
*
* @return the capabilities of this object
* @see Capabilities
*/
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities();
result.disableAll();
// attributes
result.enable(Capability.NOMINAL_ATTRIBUTES);
result.enable(Capability.NUMERIC_ATTRIBUTES);
result.enable(Capability.DATE_ATTRIBUTES);
result.enable(Capability.MISSING_VALUES);
// class
result.enable(Capability.NOMINAL_CLASS);
result.enable(Capability.NUMERIC_CLASS);
result.enable(Capability.DATE_CLASS);
result.enable(Capability.MISSING_CLASS_VALUES);
result.enable(Capability.NO_CLASS);
return result;
}
/**
* Sets the expression used for filtering.
*
* @param value the expression
*/
public void setExpression(String value) {
m_Expression = value;
}
/**
* Returns the expression used for filtering.
*
* @return the expression
*/
public String getExpression() {
return m_Expression;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String expressionTipText() {
return "The expression to used for filtering the dataset.";
}
/**
* Determines the output format based on the input format and returns
* this.
*
* @param inputFormat the input format to base the output format on
* @return the output format
* @throws Exception in case the determination goes wrong
*/
protected Instances determineOutputFormat(Instances inputFormat)
throws Exception {
return new Instances(inputFormat, 0);
}
/**
* Processes the given data (may change the provided dataset) and returns
* the modified version. This method is called in batchFinished().
*
* @param instances the data to process
* @return the modified data
* @throws Exception in case the processing goes wrong
* @see #batchFinished()
*/
protected Instances process(Instances instances) throws Exception {
if (!isFirstBatchDone())
return Parser.filter(m_Expression, instances);
else
return instances;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 6113 $");
}
/**
* Main method for running this filter.
*
* @param args arguments for the filter: use -h for help
*/
public static void main(String[] args) {
runFilter(new SubsetByExpression(), args);
}
}