Source Code of org.encog.ml.data.buffer.BufferedNeuralDataSet

/*
 * Encog(tm) Core v3.0 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2011 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */
package org.encog.ml.data.buffer;


import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.ml.data.MLlDataError;
import org.encog.ml.data.basic.BasicMLDataSet;


/**
 * This class is not memory based, so very long files can be used, without
 * running out of memory. This dataset uses a Encog binary training file as a
 * buffer.
 * 
 * When used with a slower access dataset, such as CSV, XML or SQL, where
 * parsing must occur, this dataset can be used to load from the slower dataset
 * and train at much higher speeds.
 * 
 * This class makes use of Java file channels for maximum file access
 * performance.
 * 
 * If you are going to create a binary file, by using the add methods, you must
 * call beginLoad to cause Encog to open an output file. Once the data has been
 * loaded, call endLoad. You can also use the BinaryDataLoader class, with a
 * CODEC, to load many other popular external formats.
 * 
 * The binary files produced by this class are in the Encog binary training
 * format, and can be used with any Encog platform. Encog binary files are
 * stored using "little endian" numbers.
 */
public class BufferedNeuralDataSet implements
  MLDataSet, Serializable {


  /**
   * The version.
   */
  private static final long serialVersionUID = 2577778772598513566L;


  /**
   * Error message for ADD.
   */
  public static final String ERROR_ADD 
    = "Add can only be used after calling beginLoad.";


  /**
   * Error message for REMOVE.
   */
  public static final String ERROR_REMOVE 
    = "Remove is not supported for BufferedNeuralDataSet.";


  /**
   * True, if we are in the process of loading.
   */
  private transient boolean loading;


  /**
   * The file being used.
   */
  private File file;


  /**
   * The EGB file we are working wtih.
   */
  private transient EncogEGBFile egb;


  /**
   * Additional sets that were opened.
   */
  private transient List<BufferedNeuralDataSet> additional 
    = new ArrayList<BufferedNeuralDataSet>();


  /**
   * The owner.
   */
  private transient BufferedNeuralDataSet owner;


  /**
   * Construct the dataset using the specified binary file.
   * 
   * @param binaryFile
   *            The file to use.
   */
  public BufferedNeuralDataSet(final File binaryFile) {
    this.file = binaryFile;
    this.egb = new EncogEGBFile(binaryFile);
    if (file.exists()) {
      this.egb.open();
    }
  }


  /**
   * Open the binary file for reading.
   */
  public final void open() {
    this.egb.open();
  }


  /**
   * @return An iterator.
   */
  @Override
  public final Iterator<MLDataPair> iterator() {
    return new BufferedDataSetIterator(this);
  }


  /**
   * @return The record count.
   */
  @Override
  public final long getRecordCount() {
    if (this.egb == null) {
      return 0;
    } else {
      return this.egb.getNumberOfRecords();
    }
  }


  /**
   * Read an individual record.
   * 
   * @param index
   *            The zero-based index. Specify 0 for the first record, 1 for
   *            the second, and so on.
   * @param pair
   *            THe data to read.
   */
  @Override
  public final void getRecord(final long index, final MLDataPair pair) {
    this.egb.setLocation((int) index);
    double[] inputTarget = pair.getInputArray();
    this.egb.read(inputTarget);


    if (pair.getIdealArray() != null) {
      double[] idealTarget = pair.getIdealArray();
      this.egb.read(idealTarget);
    }
    
    this.egb.read();
  }


  /**
   * @return An additional training set.
   */
  @Override
  public final BufferedNeuralDataSet openAdditional() {


    BufferedNeuralDataSet result = new BufferedNeuralDataSet(this.file);
    result.setOwner(this);
    this.additional.add(result);
    return result;
  }


  /**
   * Add only input data, for an unsupervised dataset.
   * 
   * @param data1
   *            The data to be added.
   */
  public final void add(final MLData data1) {
    if (!this.loading) {
      throw new MLlDataError(BufferedNeuralDataSet.ERROR_ADD);
    }


    egb.write(data1.getData());
    egb.write(1.0);
  }


  /**
   * Add both the input and ideal data.
   * 
   * @param inputData
   *            The input data.
   * @param idealData
   *            The ideal data.
   */
  public final void add(final MLData inputData, final MLData idealData) {


    if (!this.loading) {
      throw new MLlDataError(BufferedNeuralDataSet.ERROR_ADD);
    }


    this.egb.write(inputData.getData());
    this.egb.write(idealData.getData());
    this.egb.write((double)1.0);
  }


  /**
   * Add a data pair of both input and ideal data.
   * 
   * @param pair
   *            The pair to add.
   */
  public final void add(final MLDataPair pair) {
    if (!this.loading) {
      throw new MLlDataError(BufferedNeuralDataSet.ERROR_ADD);
    }


    this.egb.write(pair.getInputArray());
    this.egb.write(pair.getIdealArray());
    this.egb.write(pair.getSignificance());


  }


  /**
   * Close the dataset.
   */
  @Override
  public final void close() {


    Object[] obj = this.additional.toArray();


    for (int i = 0; i < obj.length; i++) {
      BufferedNeuralDataSet set = (BufferedNeuralDataSet) obj[i];
      set.close();
    }


    this.additional.clear();


    if (this.owner != null) {
      this.owner.removeAdditional(this);
    }


    this.egb.close();
    this.egb = null;
  }


  /**
   * @return The ideal data size.
   */
  @Override
  public final int getIdealSize() {
    if (this.egb == null) {
      return 0;
    } else {
      return this.egb.getIdealCount();
    }
  }


  /**
   * @return The input data size.
   */
  @Override
  public final int getInputSize() {
    if (this.egb == null) {
      return 0;
    } else {
      return this.egb.getInputCount();
    }
  }


  /**
   * @return True if this dataset is supervised.
   */
  @Override
  public final boolean isSupervised() {
    if (this.egb == null) {
      return false;
    } else {
      return this.egb.getIdealCount() > 0;
    }
  }


  /**
   * @return If this dataset was created by openAdditional, the set that
   *         created this object is the owner. Return the owner.
   */
  public final BufferedNeuralDataSet getOwner() {
    return owner;
  }


  /**
   * Set the owner of this dataset.
   * 
   * @param theOwner
   *            The owner.
   */
  public final void setOwner(final BufferedNeuralDataSet theOwner) {
    this.owner = theOwner;
  }


  /**
   * Remove an additional dataset that was created.
   * 
   * @param child
   *            The additional dataset to remove.
   */
  public final void removeAdditional(final BufferedNeuralDataSet child) {
    synchronized (this) {
      this.additional.remove(child);
    }
  }


  /**
   * Begin loading to the binary file. After calling this method the add
   * methods may be called.
   * 
   * @param inputSize
   *            The input size.
   * @param idealSize
   *            The ideal size.
   */
  public final void beginLoad(final int inputSize, final int idealSize) {
    this.egb.create(inputSize, idealSize);
    this.loading = true;
  }


  /**
   * This method should be called once all the data has been loaded. The
   * underlying file will be closed. The binary fill will then be opened for
   * reading.
   */
  public final void endLoad() {
    if (!this.loading) {
      throw new BufferedDataError("Must call beginLoad, before endLoad.");
    }


    this.egb.close();


    open();


  }


  /**
   * @return The binary file used.
   */
  public final File getFile() {
    return this.file;
  }


  /**
   * @return The EGB file to use.
   */
  public final EncogEGBFile getEGB() {
    return this.egb;
  }


  /**
   * Load the binary dataset to memory. Memory access is faster.
   * 
   * @return A memory dataset.
   */
  public final MLDataSet loadToMemory() {
    BasicMLDataSet result = new BasicMLDataSet();


    for (MLDataPair pair : this) {
      result.add(pair);
    }


    return result;
  }


  /**
   * Load the specified training set.
   * 
   * @param training
   *            The training set to load.
   */
  public final void load(final MLDataSet training) {
    beginLoad(training.getInputSize(), training.getIdealSize());
    for (final MLDataPair pair : training) {
      add(pair);
    }
    endLoad();


  }
}
Source Code of org.encog.ml.data.buffer.BufferedNeuralDataSet

Related Classes of org.encog.ml.data.buffer.BufferedNeuralDataSet