/*
* Encog(tm) Core v3.0 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2011 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.wizard;
import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.encog.app.analyst.AnalystError;
import org.encog.app.analyst.AnalystFileFormat;
import org.encog.app.analyst.AnalystGoal;
import org.encog.app.analyst.EncogAnalyst;
import org.encog.app.analyst.missing.DiscardMissing;
import org.encog.app.analyst.missing.HandleMissingValues;
import org.encog.app.analyst.script.AnalystScript;
import org.encog.app.analyst.script.DataField;
import org.encog.app.analyst.script.normalize.AnalystField;
import org.encog.app.analyst.script.prop.ScriptProperties;
import org.encog.app.analyst.script.segregate.AnalystSegregateTarget;
import org.encog.app.analyst.script.task.AnalystTask;
import org.encog.ml.factory.MLMethodFactory;
import org.encog.ml.factory.MLTrainFactory;
import org.encog.util.arrayutil.NormalizationAction;
import org.encog.util.file.FileUtil;
/**
* The Encog Analyst Wizard can be used to create Encog Analyst script files
* from a CSV file. This class is typically used by the Encog Workbench, but it
* can easily be used from any program to create a starting point for an Encog
* Analyst Script.
*
* Several items must be provided to the wizard.
*
* Desired Machine Learning Method: This is the machine learning method that you
* would like the wizard to use. This might be a neural network, SVM or other
* supported method.
*
* Normalization Range: This is the range that the data should be normalized
* into. Some machine learning methods perform better with different ranges. The
* two ranges supported by the wizard are -1 to 1 and 0 to 1.
*
* Goal: What are we trying to accomplish. Is this a classification, regression
* or autoassociation problem.
*
*/
public class AnalystWizard {
/**
* The default training percent.
*/
public static final int DEFAULT_TRAIN_PERCENT = 75;
/**
* The default evaluation percent.
*/
public static final int DEFAULT_EVAL_PERCENT = 25;
/**
* The default training error.
*/
public static final double DEFAULT_TRAIN_ERROR = 0.05;
/**
* The raw file.
*/
public static final String FILE_RAW = "FILE_RAW";
/**
* The normalized file.
*/
public static final String FILE_NORMALIZE = "FILE_NORMALIZE";
/**
* The randomized file.
*/
public static final String FILE_RANDOM = "FILE_RANDOMIZE";
/**
* The training file.
*/
public static final String FILE_TRAIN = "FILE_TRAIN";
/**
* The evaluation file.
*/
public static final String FILE_EVAL = "FILE_EVAL";
/**
* The eval file normalization file.
*/
public static final String FILE_EVAL_NORM = "FILE_EVAL_NORM";
/**
* The training set.
*/
public static final String FILE_TRAINSET = "FILE_TRAINSET";
/**
* The machine learning file.
*/
public static final String FILE_ML = "FILE_ML";
/**
* The output file.
*/
public static final String FILE_OUTPUT = "FILE_OUTPUT";
/**
* The balanced file.
*/
public static final String FILE_BALANCE = "FILE_BALANCE";
/**
* The clustered file.
*/
public static final String FILE_CLUSTER = "FILE_CLUSTER";
/**
* The raw filename.
*/
private String filenameRaw;
/**
* The normalized filename.
*/
private String filenameNorm;
/**
* The random file name.
*/
private String filenameRandom;
/**
* The training filename.
*/
private String filenameTrain;
/**
* The evaluation filename.
*/
private String filenameEval;
/**
* The normalization eval file name.
*/
private String filenameEvalNorm;
/**
* The training set filename.
*/
private String filenameTrainSet;
/**
* The machine learning file name.
*/
private String filenameML;
/**
* The output filename.
*/
private String filenameOutput;
/**
* The balance filename.
*/
private String filenameBalance;
/**
* The cluster filename.
*/
private String filenameCluster;
/**
* The analyst script.
*/
private final AnalystScript script;
/**
* The analyst.
*/
private final EncogAnalyst analyst;
/**
* The machine learning method that we will be using.
*/
private WizardMethodType methodType;
/**
* Are we using single-field(direct) classification.
*/
private boolean directClassification = false;
/**
* The target field, or "" to detect.
*/
private String targetField;
/**
* The analyst goal.
*/
private AnalystGoal goal;
/**
* The size of the lag window, if we are doing time-series.
*/
private int lagWindowSize;
/**
* The size of the lead window, if we are doing time-series.
*/
private int leadWindowSize;
/**
* Should the target field be included int he input, if we are doing
* time-series.
*/
private boolean includeTargetField;
/**
* True if we are doing time-series.
*/
private boolean timeSeries;
/**
* True if the segregate command should be generated.
*/
private boolean taskSegregate = true;
/**
* True if the randomize command should be generated.
*/
private boolean taskRandomize = true;
/**
* True if the normalize command should be generated.
*/
private boolean taskNormalize = true;
/**
* True if the balance command should be generated.
*/
private boolean taskBalance = false;
/**
* True if the cluster command should be generated.
*/
private boolean taskCluster = true;
/**
* The normalization range.
*/
private NormalizeRange range = NormalizeRange.NegOne2One;
private HandleMissingValues missing = new DiscardMissing();
/**
* Construct the analyst wizard.
* @param theAnalyst The analyst to use.
*/
public AnalystWizard(final EncogAnalyst theAnalyst) {
this.analyst = theAnalyst;
this.script = analyst.getScript();
this.methodType = WizardMethodType.FeedForward;
this.targetField = "";
this.goal = AnalystGoal.Classification;
this.leadWindowSize = 0;
this.lagWindowSize = 0;
this.includeTargetField = false;
}
/**
* Create a "set" command to add to a task.
* @param setTarget The target.
* @param setSource The source.
* @return The "set" command.
*/
private String createSet(final String setTarget, final String setSource) {
final StringBuilder result = new StringBuilder();
result.append("set ");
result.append(ScriptProperties.toDots(setTarget));
result.append("=\"");
result.append(setSource);
result.append("\"");
return result.toString();
}
/**
* Determine the type of classification used.
*/
private void determineClassification() {
this.directClassification = false;
if ((this.methodType == WizardMethodType.SVM)
|| (this.methodType == WizardMethodType.SOM)) {
this.directClassification = true;
}
}
/**
* Determine the target field.
*/
private void determineTargetField() {
final List<AnalystField> fields = this.script.getNormalize()
.getNormalizedFields();
if (this.targetField.trim().length() == 0) {
boolean success = false;
if (this.goal == AnalystGoal.Classification) {
// first try to the last classify field
for (final AnalystField field : fields) {
final DataField df = this.script.findDataField(field
.getName());
if (field.getAction().isClassify() && df.isClass()) {
this.targetField = field.getName();
success = true;
}
}
} else {
// otherwise, just return the last regression field
for (final AnalystField field : fields) {
final DataField df = this.script.findDataField(field
.getName());
if (!df.isClass() && (df.isReal() || df.isInteger())) {
this.targetField = field.getName();
success = true;
}
}
}
if (!success) {
throw new AnalystError(
"Can't determine target field automatically, "
+ "please specify one.\nThis can also happen if you "
+ "specified the wrong file format.");
}
} else {
if (this.script.findDataField(this.targetField) == null) {
throw new AnalystError("Invalid target field: "
+ this.targetField);
}
}
this.script.getProperties().setProperty(
ScriptProperties.DATA_CONFIG_GOAL, this.goal);
if (!this.timeSeries && this.taskBalance) {
this.script.getProperties().setProperty(
ScriptProperties.BALANCE_CONFIG_BALANCE_FIELD,
this.targetField);
final DataField field = this.analyst.getScript().findDataField(
this.targetField);
if ((field != null) && field.isClass()) {
final int countPer = field.getMinClassCount();
this.script.getProperties().setProperty(
ScriptProperties.BALANCE_CONFIG_COUNT_PER, countPer);
}
}
// now that the target field has been determined, set the analyst fields
AnalystField af = null;
for (final AnalystField field : this.analyst.getScript().getNormalize()
.getNormalizedFields()) {
if ((field.getAction() != NormalizationAction.Ignore)
&& field.getName().equalsIgnoreCase(this.targetField)) {
if ((af == null) || (af.getTimeSlice() < field.getTimeSlice())) {
af = field;
}
}
}
if (af != null) {
af.setOutput(true);
}
// set the clusters count
if (this.taskCluster) {
if ((this.targetField.length() == 0)
|| (this.goal != AnalystGoal.Classification)) {
this.script.getProperties().setProperty(
ScriptProperties.CLUSTER_CONFIG_CLUSTERS, 2);
} else {
final DataField tf = this.script
.findDataField(this.targetField);
this.script.getProperties().setProperty(
ScriptProperties.CLUSTER_CONFIG_CLUSTERS,
tf.getClassMembers().size());
}
}
}
/**
* Expand the time-series fields.
*/
private void expandTimeSlices() {
final List<AnalystField> oldList = this.script.getNormalize()
.getNormalizedFields();
final List<AnalystField> newList = new ArrayList<AnalystField>();
// generate the inputs for the new list
for (final AnalystField field : oldList) {
if (!field.isIgnored()) {
if (this.includeTargetField || field.isInput()) {
for (int i = 0; i < this.lagWindowSize; i++) {
final AnalystField newField = new AnalystField(field);
newField.setTimeSlice(-i);
newField.setOutput(false);
newList.add(newField);
}
}
} else {
newList.add(field);
}
}
// generate the outputs for the new list
for (final AnalystField field : oldList) {
if (!field.isIgnored()) {
if (field.isOutput()) {
for (int i = 1; i <= this.leadWindowSize; i++) {
final AnalystField newField = new AnalystField(field);
newField.setTimeSlice(i);
newList.add(newField);
}
}
}
}
// generate the ignores for the new list
for (final AnalystField field : oldList) {
if (field.isIgnored()) {
newList.add(field);
}
}
// swap back in
oldList.clear();
oldList.addAll(newList);
}
/**
* Generate a feed forward machine learning method.
* @param inputColumns The input column count.
* @param outputColumns The output column count.
*/
private void generateFeedForward(final int inputColumns,
final int outputColumns) {
final int hidden = (int) ((inputColumns) * 1.5);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_TYPE,
MLMethodFactory.TYPE_FEEDFORWARD);
if (this.range == NormalizeRange.NegOne2One) {
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_ARCHITECTURE,
"?:B->TANH->" + hidden + ":B->TANH->?");
} else {
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_ARCHITECTURE,
"?:B->SIGMOID->" + hidden + ":B->SIGMOID->?");
}
this.script.getProperties().setProperty(ScriptProperties.ML_TRAIN_TYPE,
"rprop");
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_TARGET_ERROR, DEFAULT_TRAIN_ERROR);
}
/**
* Generate filenames.
* @param rawFile The raw filename.
*/
private void generateFilenames(final File rawFile) {
this.filenameRaw = rawFile.getName();
this.filenameNorm = FileUtil.addFilenameBase(rawFile, "_norm")
.getName();
this.filenameRandom = FileUtil.addFilenameBase(rawFile, "_random")
.getName();
this.filenameTrain = FileUtil.addFilenameBase(rawFile, "_train")
.getName();
this.filenameEval = FileUtil.addFilenameBase(rawFile, "_eval")
.getName();
this.filenameEvalNorm = FileUtil.addFilenameBase(rawFile, "_eval_norm")
.getName();
this.filenameTrainSet = FileUtil.forceExtension(this.filenameTrain,
"egb");
this.filenameML = FileUtil.forceExtension(this.filenameTrain, "eg");
this.filenameOutput = FileUtil.addFilenameBase(rawFile, "_output")
.getName();
this.filenameBalance = FileUtil.addFilenameBase(rawFile, "_balance")
.getName();
this.filenameCluster = FileUtil.addFilenameBase(rawFile, "_cluster")
.getName();
final ScriptProperties p = this.script.getProperties();
p.setFilename(AnalystWizard.FILE_RAW, this.filenameRaw);
if (this.taskNormalize) {
p.setFilename(AnalystWizard.FILE_NORMALIZE, this.filenameNorm);
}
if (this.taskRandomize) {
p.setFilename(AnalystWizard.FILE_RANDOM, this.filenameRandom);
}
if (this.taskCluster) {
p.setFilename(AnalystWizard.FILE_CLUSTER, this.filenameCluster);
}
if (this.taskSegregate) {
p.setFilename(AnalystWizard.FILE_TRAIN, this.filenameTrain);
p.setFilename(AnalystWizard.FILE_EVAL, this.filenameEval);
p.setFilename(AnalystWizard.FILE_EVAL_NORM, this.filenameEvalNorm);
}
if (this.taskBalance) {
p.setFilename(AnalystWizard.FILE_BALANCE, this.filenameBalance);
}
p.setFilename(AnalystWizard.FILE_TRAINSET, this.filenameTrainSet);
p.setFilename(AnalystWizard.FILE_ML, this.filenameML);
p.setFilename(AnalystWizard.FILE_OUTPUT, this.filenameOutput);
}
/**
* Generate the generate task.
*/
private void generateGenerate() {
determineTargetField();
if (this.targetField == null) {
throw new AnalystError(
"Failed to find normalized version of target field: "
+ this.targetField);
}
final int inputColumns = this.script.getNormalize()
.calculateInputColumns();
final int idealColumns = this.script.getNormalize()
.calculateOutputColumns();
switch (this.methodType) {
case FeedForward:
generateFeedForward(inputColumns, idealColumns);
break;
case SVM:
generateSVM(inputColumns, idealColumns);
break;
case RBF:
generateRBF(inputColumns, idealColumns);
break;
case SOM:
generateSOM(inputColumns);
break;
default:
throw new AnalystError("Unknown method type");
}
}
/**
* Generate the normalized fields.
*/
private void generateNormalizedFields() {
final List<AnalystField> norm = this.script.getNormalize()
.getNormalizedFields();
norm.clear();
final DataField[] dataFields = this.script.getFields();
for (int i = 0; i < this.script.getFields().length; i++) {
final DataField f = dataFields[i];
NormalizationAction action;
final boolean isLast = i == this.script.getFields().length - 1;
if ((f.isInteger() || f.isReal()) && !f.isClass()) {
action = NormalizationAction.Normalize;
AnalystField af;
if (this.range == NormalizeRange.NegOne2One) {
af = new AnalystField(f.getName(), action, 1, -1);
} else {
af = new AnalystField(f.getName(), action, 1, 0);
}
norm.add(af);
af.setActualHigh(f.getMax());
af.setActualLow(f.getMin());
} else if (f.isClass()) {
if (isLast && this.directClassification) {
action = NormalizationAction.SingleField;
} else if (f.getClassMembers().size() > 2) {
action = NormalizationAction.Equilateral;
} else {
action = NormalizationAction.OneOf;
}
if (this.range == NormalizeRange.NegOne2One) {
norm.add(new AnalystField(f.getName(), action, 1, -1));
} else {
norm.add(new AnalystField(f.getName(), action, 1, 0));
}
} else {
action = NormalizationAction.Ignore;
norm.add(new AnalystField(action, f.getName()));
}
}
this.script.getNormalize().init(this.script);
}
/**
* Generate a RBF machine learning method.
* @param inputColumns The number of input columns.
* @param outputColumns The number of output columns.
*/
private void generateRBF(final int inputColumns, final int outputColumns) {
final int hidden = (int) ((inputColumns) * 1.5);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_TYPE,
MLMethodFactory.TYPE_RBFNETWORK);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_ARCHITECTURE,
"?->GAUSSIAN(c=" + hidden + ")->?");
if (outputColumns > 1) {
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_TYPE, "rprop");
} else {
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_TYPE, "svd");
}
this.script.getProperties().setProperty(ScriptProperties.ML_TRAIN_TYPE,
DEFAULT_TRAIN_ERROR);
}
/**
* Generate the segregate task.
*/
private void generateSegregate() {
if (this.taskSegregate) {
final AnalystSegregateTarget[] array
= new AnalystSegregateTarget[2];
array[0] = new AnalystSegregateTarget(
AnalystWizard.FILE_TRAIN, DEFAULT_TRAIN_PERCENT);
array[1] = new AnalystSegregateTarget(
AnalystWizard.FILE_EVAL, DEFAULT_EVAL_PERCENT);
this.script.getSegregate().setSegregateTargets(array);
} else {
final AnalystSegregateTarget[] array
= new AnalystSegregateTarget[0];
this.script.getSegregate().setSegregateTargets(array);
}
}
/**
* Generate the settings.
*/
private void generateSettings() {
String target;
String evalSource;
// starting point
target = AnalystWizard.FILE_RAW;
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_RAW_FILE,
target);
// randomize
if (!this.timeSeries && this.taskRandomize) {
this.script.getProperties().setProperty(
ScriptProperties.RANDOMIZE_CONFIG_SOURCE_FILE,
AnalystWizard.FILE_RAW);
target = AnalystWizard.FILE_RANDOM;
this.script.getProperties().setProperty(
ScriptProperties.RANDOMIZE_CONFIG_TARGET_FILE,
target);
}
// balance
if (!this.timeSeries && this.taskBalance) {
this.script.getProperties().setProperty(
ScriptProperties.BALANCE_CONFIG_SOURCE_FILE, target);
target = AnalystWizard.FILE_BALANCE;
this.script.getProperties().setProperty(
ScriptProperties.BALANCE_CONFIG_TARGET_FILE,
target);
}
// segregate
if (this.taskSegregate) {
this.script.getProperties().setProperty(
ScriptProperties.SEGREGATE_CONFIG_SOURCE_FILE, target);
target = AnalystWizard.FILE_TRAIN;
}
// normalize
if (this.taskNormalize) {
this.script.getProperties().setProperty(
ScriptProperties.NORMALIZE_CONFIG_SOURCE_FILE, target);
target = AnalystWizard.FILE_NORMALIZE;
this.script.getProperties().setProperty(
ScriptProperties.NORMALIZE_CONFIG_TARGET_FILE, target);
this.script.getNormalize().setMissingValues(this.missing);
}
if (this.taskSegregate) {
evalSource = AnalystWizard.FILE_EVAL;
} else {
evalSource = target;
}
// cluster
if (this.taskCluster) {
this.script.getProperties().setProperty(
ScriptProperties.CLUSTER_CONFIG_SOURCE_FILE,
evalSource);
this.script.getProperties().setProperty(
ScriptProperties.CLUSTER_CONFIG_TARGET_FILE,
AnalystWizard.FILE_CLUSTER);
this.script.getProperties().setProperty(
ScriptProperties.CLUSTER_CONFIG_TYPE, "kmeans");
}
// generate
this.script.getProperties().setProperty(
ScriptProperties.GENERATE_CONFIG_SOURCE_FILE, target);
this.script.getProperties().setProperty(
ScriptProperties.GENERATE_CONFIG_TARGET_FILE,
AnalystWizard.FILE_TRAINSET);
// ML
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_TRAINING_FILE,
AnalystWizard.FILE_TRAINSET);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_MACHINE_LEARNING_FILE,
AnalystWizard.FILE_ML);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_OUTPUT_FILE,
AnalystWizard.FILE_OUTPUT);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_EVAL_FILE, evalSource);
// other
this.script.getProperties().setProperty(
ScriptProperties.SETUP_CONFIG_CSV_FORMAT,
AnalystFileFormat.DECPNT_COMMA);
}
/**
* Generate a SOM machine learning method.
* @param inputColumns The number of input columns.
*/
private void generateSOM(final int inputColumns) {
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_TYPE, MLMethodFactory.TYPE_SOM);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_ARCHITECTURE, "?->?");
this.script.getProperties().setProperty(ScriptProperties.ML_TRAIN_TYPE,
MLTrainFactory.TYPE_SOM_NEIGHBORHOOD);
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_ARGUMENTS,
"ITERATIONS=1000,NEIGHBORHOOD=rbf1d,RBF_TYPE=gaussian");
// ScriptProperties.ML_TRAIN_arguments
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_TARGET_ERROR, DEFAULT_TRAIN_ERROR);
}
/**
* Generate a SVM machine learning method.
* @param inputColumns The number of input columns.
* @param outputColumns The number of ideal columns.
*/
private void generateSVM(final int inputColumns, final int outputColumns) {
StringBuilder arch = new StringBuilder();
arch.append("?->");
if (this.goal == AnalystGoal.Classification) {
arch.append("C");
} else {
arch.append("R");
}
arch.append("(type=new,kernel=rbf)->?");
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_TYPE, MLMethodFactory.TYPE_SVM);
this.script.getProperties().setProperty(
ScriptProperties.ML_CONFIG_ARCHITECTURE,
arch.toString());
this.script.getProperties().setProperty(ScriptProperties.ML_TRAIN_TYPE,
MLTrainFactory.TYPE_SVM_SEARCH);
this.script.getProperties().setProperty(
ScriptProperties.ML_TRAIN_TARGET_ERROR, DEFAULT_TRAIN_ERROR);
}
/**
* Generate the tasks.
*/
private void generateTasks() {
final AnalystTask task1 = new AnalystTask(EncogAnalyst.TASK_FULL);
if (!this.timeSeries && this.taskRandomize) {
task1.getLines().add("randomize");
}
if (!this.timeSeries && this.taskBalance) {
task1.getLines().add("balance");
}
if (this.taskSegregate) {
task1.getLines().add("segregate");
}
if (this.taskNormalize) {
task1.getLines().add("normalize");
}
task1.getLines().add("generate");
task1.getLines().add("create");
task1.getLines().add("train");
task1.getLines().add("evaluate");
final AnalystTask task2 = new AnalystTask("task-generate");
if (!this.timeSeries && this.taskRandomize) {
task2.getLines().add("randomize");
}
if (this.taskSegregate) {
task2.getLines().add("segregate");
}
if (this.taskNormalize) {
task2.getLines().add("normalize");
}
task2.getLines().add("generate");
final AnalystTask task3 = new AnalystTask("task-evaluate-raw");
task3.getLines().add(
createSet(ScriptProperties.ML_CONFIG_EVAL_FILE,
AnalystWizard.FILE_EVAL_NORM));
task3.getLines().add(
createSet(ScriptProperties.NORMALIZE_CONFIG_SOURCE_FILE,
AnalystWizard.FILE_EVAL));
task3.getLines().add(
createSet(ScriptProperties.NORMALIZE_CONFIG_TARGET_FILE,
AnalystWizard.FILE_EVAL_NORM));
task3.getLines().add("normalize");
task3.getLines().add("evaluate-raw");
final AnalystTask task4 = new AnalystTask("task-create");
task4.getLines().add("create");
final AnalystTask task5 = new AnalystTask("task-train");
task5.getLines().add("train");
final AnalystTask task6 = new AnalystTask("task-evaluate");
task6.getLines().add("evaluate");
final AnalystTask task7 = new AnalystTask("task-cluster");
task7.getLines().add("cluster");
this.script.addTask(task1);
this.script.addTask(task2);
this.script.addTask(task3);
this.script.addTask(task4);
this.script.addTask(task5);
this.script.addTask(task6);
this.script.addTask(task7);
}
/**
* @return The analyst goal.
*/
public final AnalystGoal getGoal() {
return this.goal;
}
/**
* @return the lagWindowSize
*/
public final int getLagWindowSize() {
return this.lagWindowSize;
}
/**
* @return the leadWindowSize
*/
public final int getLeadWindowSize() {
return this.leadWindowSize;
}
/**
* @return the methodType
*/
public final WizardMethodType getMethodType() {
return this.methodType;
}
/**
* @return the range
*/
public final NormalizeRange getRange() {
return this.range;
}
/**
* @return Get the target field.
*/
public final String getTargetField() {
return this.targetField;
}
/**
* @return the includeTargetField
*/
public final boolean isIncludeTargetField() {
return this.includeTargetField;
}
/**
* @return the taskBalance
*/
public final boolean isTaskBalance() {
return this.taskBalance;
}
/**
* @return the taskCluster
*/
public final boolean isTaskCluster() {
return this.taskCluster;
}
/**
* @return the taskNormalize
*/
public final boolean isTaskNormalize() {
return this.taskNormalize;
}
/**
* @return the taskRandomize
*/
public final boolean isTaskRandomize() {
return this.taskRandomize;
}
/**
* @return the taskSegregate
*/
public final boolean isTaskSegregate() {
return this.taskSegregate;
}
/**
* Reanalyze column ranges.
*/
public final void reanalyze() {
final String rawID = this.script.getProperties().getPropertyFile(
ScriptProperties.HEADER_DATASOURCE_RAW_FILE);
final File rawFilename = this.analyst.getScript()
.resolveFilename(rawID);
this.analyst.analyze(
rawFilename,
this.script.getProperties().getPropertyBoolean(
ScriptProperties.SETUP_CONFIG_INPUT_HEADERS),
this.script.getProperties().getPropertyFormat(
ScriptProperties.SETUP_CONFIG_CSV_FORMAT));
}
/**
* Set the goal.
* @param theGoal The goal.
*/
public final void setGoal(final AnalystGoal theGoal) {
this.goal = theGoal;
}
/**
* @param theIncludeTargetField
* the includeTargetField to set
*/
public final void setIncludeTargetField(
final boolean theIncludeTargetField) {
this.includeTargetField = theIncludeTargetField;
}
/**
* @param theLagWindowSize
* the lagWindowSize to set
*/
public final void setLagWindowSize(final int theLagWindowSize) {
this.lagWindowSize = theLagWindowSize;
}
/**
* @param theLeadWindowSize
* the leadWindowSize to set
*/
public final void setLeadWindowSize(final int theLeadWindowSize) {
this.leadWindowSize = theLeadWindowSize;
}
/**
* @param theMethodType
* the methodType to set
*/
public final void setMethodType(final WizardMethodType theMethodType) {
this.methodType = theMethodType;
}
/**
* @param theRange
* the range to set
*/
public final void setRange(final NormalizeRange theRange) {
this.range = theRange;
}
/**
* Set the target field.
* @param theTargetField The target field.
*/
public final void setTargetField(final String theTargetField) {
this.targetField = theTargetField;
}
/**
* @param theTaskBalance
* the taskBalance to set
*/
public final void setTaskBalance(final boolean theTaskBalance) {
this.taskBalance = theTaskBalance;
}
/**
* @param theTaskCluster
* the taskCluster to set
*/
public final void setTaskCluster(final boolean theTaskCluster) {
this.taskCluster = theTaskCluster;
}
/**
* @param theTaskNormalize
* the taskNormalize to set
*/
public final void setTaskNormalize(final boolean theTaskNormalize) {
this.taskNormalize = theTaskNormalize;
}
/**
* @param theTaskRandomize
* the taskRandomize to set
*/
public final void setTaskRandomize(final boolean theTaskRandomize) {
this.taskRandomize = theTaskRandomize;
}
/**
* @param theTaskSegregate
* the taskSegregate to set
*/
public final void setTaskSegregate(final boolean theTaskSegregate) {
this.taskSegregate = theTaskSegregate;
}
/**
* Analyze a file.
* @param analyzeFile The file to analyze.
* @param b True if there are headers.
* @param format The file format.
*/
public final void wizard(final File analyzeFile, final boolean b,
final AnalystFileFormat format) {
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_SOURCE_FORMAT, format);
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_SOURCE_HEADERS, b);
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_RAW_FILE, analyzeFile);
this.timeSeries = ((this.lagWindowSize > 0)
|| (this.leadWindowSize > 0));
determineClassification();
generateFilenames(analyzeFile);
generateSettings();
this.analyst.analyze(analyzeFile, b, format);
generateNormalizedFields();
generateSegregate();
generateGenerate();
generateTasks();
if (this.timeSeries && (this.lagWindowSize > 0)
&& (this.leadWindowSize > 0)) {
expandTimeSlices();
}
}
/**
* Analyze a file at the specified URL.
* @param url The URL to analyze.
* @param saveFile The save file.
* @param analyzeFile The Encog analyst file.
* @param b True if there are headers.
* @param format The file format.
*/
public final void wizard(final URL url, final File saveFile,
final File analyzeFile, final boolean b,
final AnalystFileFormat format) {
this.script.setBasePath(saveFile.getParent());
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_SOURCE_FILE, url);
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_SOURCE_FORMAT, format);
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_SOURCE_HEADERS, b);
this.script.getProperties().setProperty(
ScriptProperties.HEADER_DATASOURCE_RAW_FILE, analyzeFile);
generateFilenames(analyzeFile);
generateSettings();
this.analyst.download();
wizard(analyzeFile, b, format);
}
/**
* @return the missing
*/
public HandleMissingValues getMissing() {
return missing;
}
/**
* @param missing the missing to set
*/
public void setMissing(HandleMissingValues missing) {
this.missing = missing;
}
}