Package org.encog.app.analyst.analyze

Source Code of org.encog.app.analyst.analyze.AnalyzedField

/*
* Encog(tm) Core v3.3 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2014 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*  
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.analyze;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.encog.Encog;
import org.encog.app.analyst.script.AnalystClassItem;
import org.encog.app.analyst.script.AnalystScript;
import org.encog.app.analyst.script.DataField;
import org.encog.app.analyst.script.prop.ScriptProperties;
import org.encog.util.csv.CSVFormat;

/**
* This class represents a field that the Encog Analyst is in the process of
* analyzing. This class is used to track statistical information on the field
* that will help the Encog analyst determine what type of field this is, and
* how to normalize it.
*
*/
public class AnalyzedField extends DataField {

  /**
   * Tge sum of all values of this field.
   */
  private double total;
 
  /**
   * The number of instances of this field.
   */
  private int instances;
 
  /**
   * The total for standard deviation calculation.
   */
  private double devTotal;
 
  /**
   * A mapping between the class names that the class items.
   */
  private final Map<String, AnalystClassItem> classMap
    = new HashMap<String, AnalystClassItem>();
 
  /**
   * The analyst script that the results are saved to.
   */
  private final AnalystScript script;

  /**
   * The numeric format.
   */
  private CSVFormat fmt;

  /**
   * Construct an analyzed field.
   * @param theScript The script being analyzed.
   * @param name The name of the field.
   */
  public AnalyzedField(final AnalystScript theScript, final String name) {
    super(name);
    this.instances = 0;
    this.script = theScript;
    this.fmt = this.script.determineFormat();
  }

  /**
   * Perform a pass one analysis of this field.
   * @param v The current value.
   */
  public void analyze1(final String v) {

    boolean accountedFor = false;
    String str = v.trim();

    if (str.trim().length() == 0 || str.equals("?")) {
      setComplete(false);
      return;
    }

    this.instances++;

    if (isReal()) {
      if (this.fmt.isValid(str)) {
        final double d = this.fmt.parse(str);
        setMax(Math.max(d, getMax()));
        setMin(Math.min(d, getMin()));
        this.total += d;
        accountedFor = true;
      } else {
        setReal(false);
        if (!isInteger()) {
          setMax(0);
          setMin(0);
          setStandardDeviation(0);
        }
      }
    }

    if (isInteger()) {
      try {
        final int i = Integer.parseInt(str);
        setMax(Math.max(i, getMax()));
        setMin(Math.min(i, getMin()));
        if (!accountedFor) {
          this.total += i;
        }
      } catch (final NumberFormatException ex) {
        setInteger(false);
        if (!isReal()) {
          setMax(0);
          setMin(0);
          setStandardDeviation(0);
        }
      }
    }

    if (isClass()) {
      AnalystClassItem item;

      // is this a new class?
      if (!this.classMap.containsKey(str)) {
        item = new AnalystClassItem(str, str, 1);
        this.classMap.put(str, item);

        // do we have too many different classes?
        final int max = this.script.getProperties().getPropertyInt(
            ScriptProperties.SETUP_CONFIG_MAX_CLASS_COUNT);
        if (this.classMap.size() > max) {
          setClass(false);
        }
      } else {
        item = this.classMap.get(str);
        item.increaseCount();
      }

    }
  }

  /**
   * Perform a pass two analysis of this field.
   * @param str The current value.
   */
  public void analyze2(final String str) {
    if (str.trim().length() == 0) {
      return;
    }

    if (isReal() || isInteger()) {
      if (!str.equals("") && !str.equals("?")) {
        final double d = this.fmt.parse(str);
        this.devTotal += Math.pow((d - getMean()), 2);
      }
    }
  }

  /**
   * Complete pass 1.
   */
  public void completePass1() {

    this.devTotal = 0;

    if (this.instances == 0) {
      setMean(0);
    } else {
      setMean(this.total / this.instances);
    }
  }

  /**
   * Complete pass 2.
   */
  public void completePass2() {
    setStandardDeviation(Math.sqrt(this.devTotal / this.instances));
  }

  /**
   * Finalize the field, and create a DataField.
   * @return The new DataField.
   */
  public DataField finalizeField() {
    final DataField result = new DataField(getName());

    // if max and min are the same, we are dealing with a zero-sized range,
    // which will cause other issues.  This is caused by ever number in the
    // column having exactly (or nearly exactly) the same value.  Provide a
    // small range around that value so that every value in this column normalizes
    // to the midpoint of the desired normalization range, typically 0 or 0.5.
    if( Math.abs(getMax()-getMin())<Encog.DEFAULT_DOUBLE_EQUAL ) {
      result.setMin(getMin()-0.0001);
      result.setMax(getMin()+0.0001);
    } else {
      result.setMin(getMin());
      result.setMax(getMax());     
    }
   
    result.setName(getName());
    result.setMean(getMean());
    result.setStandardDeviation(getStandardDeviation());
    result.setInteger(isInteger());
    result.setReal(isReal());
    result.setClass(isClass());
    result.setComplete(isComplete());
    result.setSource(getSource());

    result.getClassMembers().clear();

    if (result.isClass()) {
      final List<AnalystClassItem> list = getAnalyzedClassMembers();
      result.getClassMembers().addAll(list);
    }

    return result;
  }

  /**
   * Get the class members.
   * @return The class members.
   */
  public List<AnalystClassItem> getAnalyzedClassMembers() {
    final List<String> sorted = new ArrayList<String>();
    sorted.addAll(this.classMap.keySet());
    Collections.sort(sorted);

    final List<AnalystClassItem> result = new ArrayList<AnalystClassItem>();
    for (final String str : sorted) {
      result.add(this.classMap.get(str));
    }

    return result;
  }

  /** {@inheritDoc} */
  @Override
  public String toString() {
    final StringBuilder result = new StringBuilder("[");
    result.append(getClass().getSimpleName());
    result.append(" total=");
    result.append(this.total);
    result.append(", instances=");
    result.append(this.instances);
    result.append("]");
    return result.toString();
  }
}
TOP

Related Classes of org.encog.app.analyst.analyze.AnalyzedField

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.