/*
* Copyright (c) 2010 Pentaho Corporation. All rights reserved.
* This software was developed by Pentaho Corporation and is provided under the terms
* of the GNU Lesser General Public License, Version 2.1. You may not use
* this file except in compliance with the license. If you need a copy of the license,
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Time Series
* Forecasting. The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. Please refer to
* the license for the specific language governing your rights and limitations.
*/
/**
* ErrorBasedConfidenceIntervalEstimator.java
* Copyright (C) 2010 Pentaho Corporation
*/
package weka.classifiers.timeseries.core;
import java.io.PrintStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
import weka.classifiers.evaluation.NumericPrediction;
import weka.classifiers.timeseries.AbstractForecaster;
import weka.classifiers.timeseries.TSForecaster;
import weka.classifiers.timeseries.eval.ErrorModule;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Utils;
/**
* Class that computes confidence intervals for a time series forecaster
* using errors computed on the training data.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 50211 $
*
*/
public class ErrorBasedConfidenceIntervalEstimator implements Serializable {
/** For serialization */
private static final long serialVersionUID = -2748314799535071043L;
/**
* Holds confidence bound offsets for targets at a certain level.
* The outer list corresponds to the fields to forecast (in the same
* order as supplied to the TSForecaster.setFieldsToForecast() method. The inner
* lists hold two element arrays of doubles (upper, lower bounds). The
* first index of these lists are bounds for to a one-step-ahead forecast,
* the second a two-step-ahead forecast, and so on
*/
protected List<List<double[]>> m_confidenceLimitsForTargets;
/** The names of the target field(s) */
protected List<String> m_targetFields;
/** The confidence level for the limits */
protected double m_confidenceLevel;
/**
* Get the confidence bound offsets for each target at the supplied
* confidence level
*
* @param confidenceLevel the confidence level to use
* @return a List of confidence offsets - one for each target. Each
* target's confidence limits are stored in a two element array,
* where the first element stores the upper bound and the second
* the lower bound (both are expressed as an offset)
* @throws Exception if the confidence limits can't be computed
* for some reason
*/
public List<double[]> getConfidenceOffsets(double confidenceLevel,
List<List<NumericPrediction>> predictions)
throws Exception {
if (predictions == null || predictions.get(0).size() == 0) {
throw new Exception("No predictions have been seen yet!");
}
List<double[]> result = new ArrayList<double[]>();
for (int i = 0; i < m_targetFields.size(); i++) {
List<NumericPrediction> preds = predictions.get(i);
// need to separate the positive and negative errors
// into two separate lists
List<Double> posErrs = new ArrayList<Double>();
List<Double> negErrs = new ArrayList<Double>();
for (NumericPrediction p : preds) {
if (!Utils.isMissingValue(p.error())) {
if (p.error() < 0) {
negErrs.add(new Double(Math.abs(p.error())));
}
if (p.error() > 0) {
posErrs.add(new Double(p.error()));
}
}
}
// sort into ascending order
Collections.sort(posErrs);
Collections.sort(negErrs);
double[] bounds = new double[2];
bounds[0] = Utils.missingValue();
bounds[1] = Utils.missingValue();
if (posErrs.size() > 0 && negErrs.size() > 0) {
double cL = 1.0 - confidenceLevel;
int posPosition = (int)Math.round(posErrs.size() * cL);
if (posPosition < 1) {
posPosition = 1;
}
int negPosition = (int)Math.round(negErrs.size() * cL);
if (negPosition < 1) {
negPosition = 1;
}
//double upperBound = posErrs.get(posErrs.size() - posPosition);
double upperBound = negErrs.get(negErrs.size() - negPosition);
//double lowerBound = negErrs.get(negErrs.size() - negPosition);
double lowerBound = posErrs.get(posErrs.size() - posPosition);
lowerBound = -lowerBound;
bounds[0] = lowerBound;
bounds[1] = upperBound;
/*System.err.println("Neg pos " + negPosition);
System.err.println("lower: " + lowerBound + " Upper " + upperBound);*/
}
result.add(bounds);
}
return result;
}
/**
* Creates overlay data for "future" instances taken from the training
* data. All targets are set to missing value in this data. This method
* is used when the forecaster is being trained using overlay fields.
*
* @param forecaster the forecaster being used
* @param source the source data to create a set of overlay instances from
* @param start the index of the instance in the source data that will
* be the first instance in the overlay data
* @param numSteps the number of steps to be forecast
* @return the overlay data as an Instances object
*/
protected Instances createOverlayForecastData(TSForecaster forecaster,
Instances source, int start, int numSteps) {
int toCopy = Math.min(numSteps, source.numInstances() - start);
Instances overlay = new Instances(source, start, toCopy);
// set all targets to missing
List<String> fieldsToForecast =
AbstractForecaster.stringToList(forecaster.getFieldsToForecast());
for (int i = 0; i < overlay.numInstances(); i++) {
Instance current = overlay.instance(i);
for (String target : fieldsToForecast) {
current.setValue(overlay.attribute(target), Utils.missingValue());
}
}
return overlay;
}
/**
* Computes confidence intervals using the supplied forecster and
* training data.
*
* @param forecaster the forecaster to use
* @param insts the training data to use
* @param numPrime the number of instances to prime the forecaster with
* @param numSteps the number of steps to forecast (and hence compute
* intervals for)
* @param confidenceLevel the confidence level to use
* @param progress PrintStream objects to report progress to
* @throws Exception if a problem occurs
*/
public void calculateConfidenceOffsets(TSForecaster forecaster,
Instances insts, int numPrime, int numSteps, double confidenceLevel,
PrintStream... progress) throws Exception {
calculateConfidenceOffsets(forecaster, insts, numPrime, -1, numSteps,
confidenceLevel, progress);
}
// artificialTimeStartValue is assumed to be the time value for
// the first instance in the supplied set of instances
/**
* Computes confidence intervals using the supplied forecster and
* training data.
*
* @param forecaster the forecaster to use
* @param insts the training data
* @param numPrime the number of instances to prime with
* @param artificialTimeStartValue start value for the artificial time stamp
* (if one is being used or -1 otherwise)
* @param numSteps number of time steps to compute confidence intervals for
* @param confidenceLevel the confidence level to use
* @param progress varargs PrintStream object(s) to report progress to
* @throws Exception if something goes wrong.
*/
public void calculateConfidenceOffsets(TSForecaster forecaster,
Instances insts, int numPrime, int artificialTimeStartValue,
int numSteps, double confidenceLevel, PrintStream... progress)
throws Exception {
if (insts.numInstances() < (numPrime + numSteps)) {
throw new Exception("We need at least " + (numPrime + numSteps)
+ " instances in order to calculate confidence limits!");
}
if (confidenceLevel < 0 || confidenceLevel > 1) {
throw new Exception("Confidence level must lie between 0 and 1");
}
m_targetFields =
AbstractForecaster.stringToList(forecaster.getFieldsToForecast());
m_confidenceLevel = confidenceLevel;
List<ErrorModule> confidenceCalculators = new ArrayList<ErrorModule>();
for (int i = 0; i < numSteps; i++) {
ErrorModule m = new ErrorModule();
m.setTargetFields(m_targetFields);
confidenceCalculators.add(m);
}
Instances primeInsts = new Instances(insts, 0, numPrime);
/* for (int i = 0; i < numPrime; i++) {
primeInsts.add(insts.instance(i));
} */
primeInsts.compactify();
if (forecaster instanceof TSLagUser && artificialTimeStartValue >= 0) {
((TSLagUser)forecaster).getTSLagMaker().
setArtificialTimeStartValue(artificialTimeStartValue - 1 + numPrime);
}
for (int i = numPrime; i < insts.numInstances(); i++) {
forecaster.primeForecaster(primeInsts);
if (i % 10 == 0) {
for (PrintStream p : progress) {
p.println("Computing confidence intervals: processed " + i + " instances...");
}
}
List<List<NumericPrediction>> forecastForSteps = null;
if (forecaster instanceof OverlayForecaster &&
((OverlayForecaster)forecaster).isUsingOverlayData()) {
// can only generate forecasts for remaining training data that
// we can use as overlay data
Instances overlay =
createOverlayForecastData(forecaster, insts, i, numSteps);
forecastForSteps =
((OverlayForecaster)forecaster).forecast(numSteps, overlay);
} else {
forecastForSteps = forecaster.forecast(numSteps);
}
// single target only at present
//List<NumericPrediction> preds = forecastForTargets.get(0);
// update the error modules
for (int j = 0; j < numSteps &&
(i + j < insts.numInstances()); j++) {
Instance toPredict = insts.instance(i + j);
// double[] forecastsForStepJ = new double[m_targetFields.size()];
List<NumericPrediction> predsForTargets = forecastForSteps.get(j);
/* for (int k = 0; k < m_targetFields.size(); k++) {
forecastsForStepJ[k] = predsForTargets.get(k).predicted();
} */
confidenceCalculators.get(j).evaluateForInstance(predsForTargets, toPredict);
}
// remove the first instance from the primeInsts and then add instance i
// to the end
primeInsts.delete(0);
primeInsts.add(insts.instance(i));
primeInsts.compactify();
}
m_confidenceLimitsForTargets = new ArrayList<List<double[]>>();
for (int j = 0; j < m_targetFields.size(); j++) {
ArrayList<double[]> limitsForSingleTarget = new ArrayList<double[]>();
for (int i = 0; i < numSteps; i++) {
List<List<NumericPrediction>> predsForStepI =
confidenceCalculators.get(i).getPredictionsForAllTargets();
List<double[]> confOffsetsForStepI =
getConfidenceOffsets(confidenceLevel, predsForStepI);
double[] limitsAtStepI = confOffsetsForStepI.get(j);
limitsForSingleTarget.add(limitsAtStepI);
}
m_confidenceLimitsForTargets.add(limitsForSingleTarget);
}
}
/**
* Get the confidence level in use
*
* @return the confidence level
*/
public double getConfidenceLevel() {
return m_confidenceLevel;
}
/**
* Get the confidence limits (upper and lower bounds) for the named target
* at the given step number
*
* @param targetName the name of the target to return the limits for
* @param targetValue the predicted target value
* @param stepNum the step number to return the bounds for this target
* @return an array containing the lower and upper bounds for the supplied
* target value in elements 0 and 1 respectively.
*
* @throws Exception if a problem occurs while computing the bounds.
*/
public double[] getConfidenceLimitsForTarget(String targetName,
double targetValue, int stepNum)
throws Exception {
int index = m_targetFields.indexOf(targetName);
if (index < 0) {
throw new Exception("[ErrorBasedConfidenceLimitEstimator] " +
"unknown target: " + targetName);
}
List<double[]> confForTarget = m_confidenceLimitsForTargets.get(index);
if (stepNum > confForTarget.size()) {
throw new Exception("[ErrorBasedConfidenceLimitEstimator] no limits availalbe for" +
"requested step number: " + stepNum);
}
double[] offsets = confForTarget.get(stepNum - 1);
double[] limits = new double[2];
limits[0] = targetValue + offsets[0];
limits[1] = targetValue + offsets[1];
//return confForTarget.get(stepNum - 1);
return limits;
}
}