Package aima.core.learning.neural

Source Code of aima.core.learning.neural.NNDataSet

package aima.core.learning.neural;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import aima.core.learning.data.DataResource;
import aima.core.learning.framework.DataSet;
import aima.core.learning.framework.Example;
import aima.core.util.Util;
import aima.core.util.datastructure.Pair;

/**
* @author Ravi Mohan
*
*/
public abstract class NNDataSet {
  /*
   * This class represents a source of examples to the rest of the nn
   * framework. Assumes only one function approximator works on an instance at
   * a given point in time
   */
  /*
   * the parsed and preprocessed form of the dataset.
   */
  private List<NNExample> dataset;
  /*
   * a copy from which examples are drawn.
   */
  private List<NNExample> presentlyProcessed = new ArrayList<NNExample>();;

  /*
   * list of mean Values for all components of raw data set
   */
  private List<Double> means;

  /*
   * list of stdev Values for all components of raw data set
   */
  private List<Double> stdevs;
  /*
   * the normalized data set
   */
  protected List<List<Double>> nds;

  /*
   * the column numbers of the "target"
   */

  protected List<Integer> targetColumnNumbers;

  /*
   * population delegated to subclass because only subclass knows which
   * column(s) is target
   */
  public abstract void setTargetColumns();

  /*
   * create a normalized data "table" from the data in the file. At this
   * stage, the data isnot split into input pattern and tragets
   */
  public void createNormalizedDataFromFile(String filename) throws Exception {

    List<List<Double>> rds = new ArrayList<List<Double>>();

    // create raw data set
    BufferedReader reader = new BufferedReader(new InputStreamReader(
        DataResource.class.getResourceAsStream(filename + ".csv")));
    String line;
    while ((line = reader.readLine()) != null) {
      rds.add(exampleFromString(line, ","));
    }

    // normalize raw dataset
    nds = normalize(rds);
  }

  /*
   * create a normalized data "table" from the DataSet using numerizer. At
   * this stage, the data isnot split into input pattern and targets TODO
   * remove redundancy of recreating the target columns. the numerizer has
   * already isolated the targets
   */
  public void createNormalizedDataFromDataSet(DataSet ds, Numerizer numerizer)
      throws Exception {

    List<List<Double>> rds = rawExamplesFromDataSet(ds, numerizer);
    // normalize raw dataset
    nds = normalize(rds);
  }

  /*
   * Gets (and removes) a random example from the 'presentlyProcessed'
   */
  public NNExample getExampleAtRandom() {

    int i = Util.randomNumberBetween(0, (presentlyProcessed.size() - 1));
    return presentlyProcessed.remove(i);
  }

  /*
   * Gets (and removes) a random example from the 'presentlyProcessed'
   */
  public NNExample getExample(int index) {

    return presentlyProcessed.remove(index);
  }

  /*
   * check if any more examples remain to be processed
   */
  public boolean hasMoreExamples() {
    return presentlyProcessed.size() > 0;
  }

  /*
   * check how many examples remain to be processed
   */
  public int howManyExamplesLeft() {
    return presentlyProcessed.size();
  }

  /*
   * refreshes the presentlyProcessed dataset so it can be used for a new
   * epoch of training.
   */
  public void refreshDataset() {
    presentlyProcessed = new ArrayList<NNExample>();
    for (NNExample e : dataset) {
      presentlyProcessed.add(e.copyExample());
    }
  }

  /*
   * method called by clients to set up data set and make it ready for
   * processing
   */
  public void createExamplesFromFile(String filename) throws Exception {
    createNormalizedDataFromFile(filename);
    setTargetColumns();
    createExamples();

  }

  /*
   * method called by clients to set up data set and make it ready for
   * processing
   */
  public void createExamplesFromDataSet(DataSet ds, Numerizer numerizer)
      throws Exception {
    createNormalizedDataFromDataSet(ds, numerizer);
    setTargetColumns();
    createExamples();

  }

  public List<List<Double>> getNormalizedData() {
    return nds;
  }

  public List<Double> getMeans() {
    return means;
  }

  public List<Double> getStdevs() {
    return stdevs;
  }

  //
  // PRIVATE METHODS
  //

  /*
   * create Example instances from a normalized data "table".
   */
  private void createExamples() {
    dataset = new ArrayList<NNExample>();
    for (List<Double> dataLine : nds) {
      List<Double> input = new ArrayList<Double>();
      List<Double> target = new ArrayList<Double>();
      for (int i = 0; i < dataLine.size(); i++) {
        if (targetColumnNumbers.contains(i)) {
          target.add(dataLine.get(i));
        } else {
          input.add(dataLine.get(i));
        }
      }
      dataset.add(new NNExample(input, target));
    }
    refreshDataset();// to populate the preentlyProcessed dataset
  }

  private List<List<Double>> normalize(List<List<Double>> rds) {
    int rawDataLength = rds.get(0).size();
    List<List<Double>> nds = new ArrayList<List<Double>>();

    means = new ArrayList<Double>();
    stdevs = new ArrayList<Double>();

    List<List<Double>> normalizedColumns = new ArrayList<List<Double>>();
    // clculate means for each coponent of example data
    for (int i = 0; i < rawDataLength; i++) {
      List<Double> columnValues = new ArrayList<Double>();
      for (List<Double> rawDatum : rds) {
        columnValues.add(rawDatum.get(i));
      }
      double mean = Util.calculateMean(columnValues);
      means.add(mean);

      double stdev = Util.calculateStDev(columnValues, mean);
      stdevs.add(stdev);

      normalizedColumns.add(Util.normalizeFromMeanAndStdev(columnValues,
          mean, stdev));

    }
    // re arrange data from columns
    // TODO Assert normalized columns have same size etc

    int columnLength = normalizedColumns.get(0).size();
    int numberOfColumns = normalizedColumns.size();
    for (int i = 0; i < columnLength; i++) {
      List<Double> lst = new ArrayList<Double>();
      for (int j = 0; j < numberOfColumns; j++) {
        lst.add(normalizedColumns.get(j).get(i));
      }
      nds.add(lst);
    }
    return nds;
  }

  private List<Double> exampleFromString(String line, String separator) {
    // assumes all values for inout and target are doubles
    List<Double> rexample = new ArrayList<Double>();
    List<String> attributeValues = Arrays.asList(line.split(separator));
    for (String valString : attributeValues) {
      rexample.add(Double.parseDouble(valString));
    }
    return rexample;
  }

  private List<List<Double>> rawExamplesFromDataSet(DataSet ds,
      Numerizer numerizer) {
    // assumes all values for inout and target are doubles
    List<List<Double>> rds = new ArrayList<List<Double>>();
    for (int i = 0; i < ds.size(); i++) {
      List<Double> rexample = new ArrayList<Double>();
      Example e = ds.getExample(i);
      Pair<List<Double>, List<Double>> p = numerizer.numerize(e);
      List<Double> attributes = p.getFirst();
      for (Double d : attributes) {
        rexample.add(d);
      }
      List<Double> targets = p.getSecond();
      for (Double d : targets) {
        rexample.add(d);
      }
      rds.add(rexample);
    }
    return rds;
  }
}
TOP

Related Classes of aima.core.learning.neural.NNDataSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.