Package weka.core.converters

Source Code of weka.core.converters.DatabaseLoader

/**
*
*  LICENSE: see COPYING file
*
**/

/*
*    DatabaseLoader.java
*    Copyright (C) 2004 Stefan Mutter
*
*/

package weka.core.converters;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.Date;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.Vector;

import org.apache.log4j.Logger;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
* Reads from a database. Can read a database in batch or incremental mode. In
* inremental mode MySQL and HSQLDB are supported. For all other DBMS set a
* pseudoincremental mode is used: In pseudo incremental mode the instances are
* read into main memory all at once and then incrementally provided to the
* user. For incremental loading the rows in the database table have to be
* ordered uniquely. The reason for this is that every time only a single row is
* fetched by extending the user" query by a LIMIT clause. If this extension is
* impossible instances will be loaded pseudoincrementally. To ensure that every
* row is fetched exaclty once, they have to ordered. Therefore a (primary) key
* is necessary.This approach is chosen, instead of using JDBC driver
* facilities, because the latter one differ betweeen different drivers. If you
* use the DatabaseSaver and save instances by generating automatically a
* primary key (its name is defined in DtabaseUtils), this primary key will be
* used for ordering but will not be part of the output. The user defined SQL
* query to extract the instances should not contain LIMIT and ORDER BY clauses
* (see -Q option). In addition, for incremental loading, you can define in the
* DatabaseUtils file how many distinct values a nominal attribute is allowed to
* have. If this number is exceeded, the column will become a string attribute.
* In batch mode no string attributes will be created.
*
* Available options are: -Q the query to specify which tuples to load<br>
* The query must have the form: SELECT *|<column-list> FROM
* <table>
* [WHERE} (default: SELECT * FROM Results0).
* <p>
*
* -P comma separted list of columns that are a unqiue key <br>
* Only needed for incremental loading, if it cannot be detected automatically
* <p>
*
* -I <br>
* Sets incremental loading
*
* WARNING:This class was taken from the weka_src.jar and modified in order to
* put some logs and to make possible the connection to the DB through SpagoBI
*
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
* @version $Revision: 1.3 $
* @see Loader
*/
public class DatabaseLoader extends AbstractLoader implements BatchConverter,
    IncrementalConverter, DatabaseConverter, OptionHandler {

  /**
   * The header information that is retrieved in the beginning of incremental
   * loading
   */
  protected Instances m_structure;

  /**
   * Used in pseudoincremental mode. The whole dataset from which instances
   * will be read incrementally.
   */
  private Instances m_datasetPseudoInc;

  /**
   * Set of instances that equals m_structure except that the
   * auto_generated_id column is not included as an attribute
   */
  private Instances m_oldStructure;

  /** The database connection */
  private DatabaseConnection m_DataBaseConnection;

  /**
   * The user defined query to load instances. (form: SELECT *|<column-list>
   * FROM
   * <table>
   * [WHERE <condition>])
   */
  private String m_query = "Select * from Results0";;

  /**
   * Flag indicating that pseudo incremental mode is used (all instances load
   * at once into main memeory and then incrementally from main memory instead
   * of the database)
   */
  private boolean m_pseudoIncremental;

  /**
   * Limit when an attribute is treated as string attribute and not as a
   * nominal one because it has to many values.
   */
  private int m_nominalToStringLimit;

  /**
   * The number of rows obtained by m_query, eg the size of the ResultSet to
   * load
   */
  private int m_rowCount;

  /** Indicates how many rows has already been loaded incrementally */
  private int m_counter;

  /**
   * Decides which SQL statement to limit the number of rows should be used.
   * DBMS dependent. Algorithm just tries several possibilities.
   */
  private int m_choice;

  /** Flag indicating that incremental process wants to read first instance */
  private boolean m_firstTime;

  /**
   * Flag indicating that incremental mode is chosen (for command line use
   * only)
   */
  private boolean m_inc;

  /**
   * Contains the name of the columns that uniquely define a row in the
   * ResultSet. Ensures a unique ordering of instances for indremental
   * loading.
   */
  private FastVector m_orderBy;

  /** Stores the index of a nominal value */
  private Hashtable[] m_nominalIndexes;

  /** Stores the nominal value */
  private FastVector[] m_nominalStrings;

  /**
   * Name of the primary key column that will allow unique ordering necessary
   * for incremental loading. The name is specified in the DatabaseUtils file.
   */
  private String m_idColumn;

  /* Type mapping used for reading */
  public static final int STRING = 0;
  public static final int BOOL = 1;
  public static final int DOUBLE = 2;
  public static final int BYTE = 3;
  public static final int SHORT = 4;
  public static final int INTEGER = 5;
  public static final int LONG = 6;
  public static final int FLOAT = 7;
  public static final int DATE = 8;

  /** The property file for the database connection */
  protected static String PROPERTY_FILE = "weka/experiment/DatabaseUtils.props";

  /** Properties associated with the database connection */
  protected static Properties PROPERTIES;

  private static transient Logger logger = Logger
      .getLogger(DatabaseLoader.class);

  /** reads the property file */
  static {

    try {
      PROPERTIES = Utils.readProperties(PROPERTY_FILE);
    } catch (Exception ex) {
      logger.error("Problem reading properties. Fix before continuing.",
          ex);
    }
  }

  /**
   * Constructor.
   *
   * @throws Exception
   *             the exception
   */
  public DatabaseLoader() throws Exception {

    reset();
    m_pseudoIncremental = false;
    String props = PROPERTIES.getProperty("nominalToStringLimit");
    m_nominalToStringLimit = Integer.parseInt(props);
    m_idColumn = PROPERTIES.getProperty("idColumn");
  }

  /**
   * Returns a string describing this Loader.
   *
   * @return a description of the Loader suitable for displaying in the
   *         explorer/experimenter gui
   */
  public String globalInfo() {
    return "Reads Instances from a Database";
  }

  /**
   * Returns the revision string.
   *
   * @return the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.16 $");
  }

  /**
   * Resets the Loader ready to read a new data set.
   *
   * @throws Exception
   *             if an error occurs while disconnecting from the database
   */
  public void reset() throws Exception {
    logger.debug("IN");
    resetStructure();
    if (m_DataBaseConnection != null && m_DataBaseConnection.isConnected())
      m_DataBaseConnection.disconnectFromDatabase();
    m_DataBaseConnection = new DatabaseConnection();
    m_orderBy = new FastVector();
    // m_query = "Select * from Results0";
    m_inc = false;
    logger.debug("OUT");

  }

  /**
   * Resets the structure of instances.
   */
  public void resetStructure() {

    m_structure = null;
    m_datasetPseudoInc = null;
    m_oldStructure = null;
    m_rowCount = 0;
    m_counter = 0;
    m_choice = 0;
    m_firstTime = true;
    setRetrieval(NONE);
  }

  /**
   * Sets the query to execute against the database.
   *
   * @param q
   *            the query to execute
   */
  public void setQuery(String q) {
    q = q.replaceAll("[fF][rR][oO][mM]", "FROM");
    q = q.replaceFirst("[sS][eE][lL][eE][cC][tT]", "SELECT");
    logger.debug("Set query [" + q + "]");
    m_query = q;
  }

  /**
   * Gets the query to execute against the database.
   *
   * @return the query
   */
  public String getQuery() {
    return m_query;
  }

  /**
   * the tip text for this property.
   *
   * @return the tip text
   */
  public String queryTipText() {

    return "The query that should load the instances."
        + "\n The query has to be of the form SELECT <column-list>|* FROM <table> [WHERE <conditions>]";
  }

  /**
   * Sets the key columns of a database table.
   *
   * @param keys
   *            a String containing the key columns in a comma separated list.
   */
  public void setKeys(String keys) {

    m_orderBy.removeAllElements();
    StringTokenizer st = new StringTokenizer(keys, ",");
    while (st.hasMoreTokens()) {
      String column = st.nextToken();
      column = column.replaceAll(" ", "");
      m_orderBy.addElement(column);
    }
  }

  /**
   * Gets the key columns' name.
   *
   * @return name of the key columns'
   */
  public String getKeys() {

    StringBuffer key = new StringBuffer();
    for (int i = 0; i < m_orderBy.size(); i++) {
      key.append((String) m_orderBy.elementAt(i));
      if (i != m_orderBy.size() - 1)
        key.append(", ");
    }
    return key.toString();
  }

  /**
   * the tip text for this property.
   *
   * @return the tip text
   */
  public String keysTipText() {

    return "For incremental loading a unique identiefer has to be specified."
        + "\nIf the query includes all columns of a table (SELECT *...) a primary key"
        + "\ncan be detected automatically depending on the JDBC driver. If that is not possible"
        + "\nspecify the key columns here in a comma separated list.";
  }

  /**
   * Sets the database URL.
   *
   * @param url
   *            the url
   */
  public void setUrl(String url) {

    m_DataBaseConnection.setDatabaseURL(url);

  }

  /**
   * Gets the URL.
   *
   * @return the URL
   */
  public String getUrl() {

    return m_DataBaseConnection.getDatabaseURL();
  }

  /**
   * the tip text for this property.
   *
   * @return the tip text
   */
  public String urlTipText() {

    return "The URL of the database";
  }

  /**
   * Sets the database user.
   *
   * @param user
   *            the user
   */
  public void setUser(String user) {

    m_DataBaseConnection.setUsername(user);
  }

  /**
   * Gets the user name.
   *
   * @return name of database user
   */
  public String getUser() {

    return m_DataBaseConnection.getUsername();
  }

  /**
   * the tip text for this property.
   *
   * @return the tip text
   */
  public String userTipText() {

    return "The user name for the database";
  }

  /**
   * Sets user password for the database.
   *
   * @param password
   *            the password
   */
  public void setPassword(String password) {

    m_DataBaseConnection.setPassword(password);
  }

  /**
   * the tip text for this property.
   *
   * @return the tip text
   */
  public String passwordTipText() {

    return "The database password";
  }

  /**
   * Sets the database url.
   *
   * @param url
   *            the database url
   * @param userName
   *            the user name
   * @param password
   *            the password
   */
  public void setSource(String url, String userName, String password) {
    logger.debug("IN");
    try {
      m_DataBaseConnection = new DatabaseConnection();
      m_DataBaseConnection.setDatabaseURL(url);
      m_DataBaseConnection.setUsername(userName);
      m_DataBaseConnection.setPassword(password);
    } catch (Exception ex) {
      printException(ex);
    }
    logger.debug("OUT");
  }

  /**
   * Sets the database url using the given connection.
   *
   * @param connection
   *            the connection
   */
  public void setSource(Connection connection) {
    logger.debug("IN");
    try {
      m_DataBaseConnection = new DatabaseConnection();
      m_DataBaseConnection.setConnection(connection);
    } catch (Exception ex) {
      printException(ex);
    }
    logger.debug("OUT");
  }

  /**
   * Sets the database url.
   *
   * @param url
   *            the database url
   */
  public void setSource(String url) {
    logger.debug("IN");
    try {
      m_DataBaseConnection = new DatabaseConnection();
      m_DataBaseConnection.setDatabaseURL(url);
    } catch (Exception ex) {
      printException(ex);
    }
    logger.debug("OUT");
  }

  /**
   * Sets the database url using the DatabaseUtils file.
   *
   * @throws Exception
   *             the exception
   */
  public void setSource() throws Exception {
    logger.debug("IN");
    m_DataBaseConnection = new DatabaseConnection();
    logger.debug("OUT");
  }

  /**
   * Opens a connection to the database.
   */
  public void connectToDatabase() {
    logger.debug("IN");
    try {
      if (!m_DataBaseConnection.isConnected()) {
        logger.debug("Was not Connected");
        m_DataBaseConnection.connectToDatabase();
        logger.debug("Now is connected");
      }
      logger.debug("OUT");
    } catch (Exception ex) {
      logger.error(ex);
      printException(ex);
    }
  }

  /**
   * Returns the table name or all after the FROM clause of the user specified
   * query to retrieve instances.
   *
   * @param onlyTableName
   *            true if only the table name should be returned, false
   *            otherwise
   * @return the end of the query
   */
  private String endOfQuery(boolean onlyTableName) {
    String table;
    int beginIndex, endIndex;

    beginIndex = m_query.indexOf("FROM ") + 5;
    while (m_query.charAt(beginIndex) == ' ')
      beginIndex++;
    endIndex = m_query.indexOf(" ", beginIndex);
    if (endIndex != -1 && onlyTableName)
      table = m_query.substring(beginIndex, endIndex);
    else
      table = m_query.substring(beginIndex);
    logger.debug("table [" + table + "]");
    if (m_DataBaseConnection.getUpperCase()) {
      logger.debug("convert table names to uppercase");
      table = table.toUpperCase();
    }
    return table;
  }

  /**
   * Checks for a unique key using the JDBC driver's method: getPrimaryKey(),
   * getBestRowIdentifier(). Depending on their implementation a key can be
   * detected. The key is needed to order the instances uniquely for an
   * inremental loading. If an existing key cannot be detected, use -P option.
   *
   * @throws Exception
   *             if database error occurs
   * @return true, if a key could have been detected, false otherwise
   */
  private boolean checkForKey() throws Exception {
    logger.debug("IN");
    String query = m_query;

    query = query.replaceAll(" +", " ");
    // query has to use all columns
    if (!query.startsWith("SELECT *"))
      return false;
    m_orderBy.removeAllElements();
    if (!m_DataBaseConnection.isConnected())
      m_DataBaseConnection.connectToDatabase();
    DatabaseMetaData dmd = m_DataBaseConnection.getMetaData();
    String table = endOfQuery(true);
    ResultSet rs = dmd.getPrimaryKeys(null, null, table);
    while (rs.next()) {
      m_orderBy.addElement(rs.getString(4));
    }
    rs.close();
    if (m_orderBy.size() != 0)
      return true;
    // check for unique keys
    rs = dmd.getBestRowIdentifier(null, null, table,
        DatabaseMetaData.bestRowSession, false);
    ResultSetMetaData rmd = rs.getMetaData();
    int help = 0;
    while (rs.next()) {
      m_orderBy.addElement(rs.getString(2));
      help++;
    }
    rs.close();
    if (help == rmd.getColumnCount()) {
      m_orderBy.removeAllElements();
    }
    if (m_orderBy.size() != 0)
      return true;
    logger.debug("OUT");
    return false;
  }

  /**
   * Converts string attribute into nominal ones for an instance read during
   * incremental loading
   *
   * @param rs
   *            The result set
   * @param i
   *            the index of the nominal value
   * @throws Exception
   *             exception if it cannot be converted
   */
  private void stringToNominal(ResultSet rs, int i) throws Exception {

    while (rs.next()) {
      String str = rs.getString(1);
      if (!rs.wasNull()) {
        Double index = (Double) m_nominalIndexes[i - 1].get(str);
        if (index == null) {
          index = new Double(m_nominalStrings[i - 1].size());
          m_nominalIndexes[i - 1].put(str, index);
          m_nominalStrings[i - 1].addElement(str);
        }
      }
    }
  }

  /**
   * Used in incremental loading. Modifies the SQL statement, so that only one
   * instance per time is tretieved and the instances are ordered uniquely.
   *
   * @param query
   *            the query to modify for incremental loading
   * @param offset
   *            sets which tuple out of the uniquely ordered ones should be
   *            returned
   * @param choice
   *            the kind of query that is suitable for the used DBMS
   * @return the modified query that returns only one result tuple.
   */
  private String limitQuery(String query, int offset, int choice) {

    String limitedQuery;
    StringBuffer order = new StringBuffer();
    String orderByString = "";

    if (m_orderBy.size() != 0) {
      order.append(" ORDER BY ");
      for (int i = 0; i < m_orderBy.size() - 1; i++) {
        if (m_DataBaseConnection.getUpperCase())
          order.append(((String) m_orderBy.elementAt(i))
              .toUpperCase());
        else
          order.append((String) m_orderBy.elementAt(i));
        order.append(", ");
      }
      if (m_DataBaseConnection.getUpperCase())
        order.append(((String) m_orderBy
            .elementAt(m_orderBy.size() - 1)).toUpperCase());
      else
        order
            .append((String) m_orderBy
                .elementAt(m_orderBy.size() - 1));
      orderByString = order.toString();
    }
    if (choice == 0) {
      limitedQuery = query.replaceFirst("SELECT", "SELECT LIMIT "
          + offset + " 1");
      limitedQuery = limitedQuery.concat(orderByString);
      return limitedQuery;
    }
    if (choice == 1) {
      limitedQuery = query.concat(orderByString + " LIMIT 1 OFFSET "
          + offset);
      return limitedQuery;
    }
    limitedQuery = query.concat(orderByString + " LIMIT " + offset + ", 1");
    return limitedQuery;
  }

  /**
   * Counts the number of rows that are loaded from the database
   *
   * @throws Exception
   *             if the number of rows cannot be calculated
   * @return the entire number of rows
   */
  private int getRowCount() throws Exception {

    String query = "SELECT COUNT(*) FROM " + endOfQuery(false);
    if (m_DataBaseConnection.execute(query) == false) {
      throw new Exception("Cannot count results tuples.");
    }
    ResultSet rs = m_DataBaseConnection.getResultSet();
    rs.next();
    int i = rs.getInt(1);
    rs.close();
    return i;
  }

  /**
   * Determines and returns (if possible) the structure (internally the
   * header) of the data set as an empty set of instances.
   *
   * @return the structure of the data set as an empty set of Instances
   *
   * @throws IOException
   *             Signals that an I/O exception has occurred.
   *
   * @exception IOException
   *                if an error occurs
   */
  public Instances getStructure() throws IOException {
    logger.debug("IN");
    if (m_DataBaseConnection == null) {
      throw new IOException("No source database has been specified");
    }
    connectToDatabase();
    logger.debug("Connected To Database");
    pseudo: try {
      if (m_pseudoIncremental && m_structure == null) {
        if (getRetrieval() == BATCH) {
          throw new IOException(
              "Cannot mix getting instances in both incremental and batch modes");
        }
        setRetrieval(NONE);
        m_datasetPseudoInc = getDataSet();
        logger.debug("Dataset Retrieved");
        m_structure = new Instances(m_datasetPseudoInc, 0);
        setRetrieval(NONE);
        return m_structure;
      }
      if (m_structure == null) {
        if (!m_DataBaseConnection.tableExists(endOfQuery(true)))
          throw new IOException("Table does not exist.");
        // finds out which SQL statement to use for the DBMS to limit
        // the number of resulting rows to one
        int choice = 0;
        boolean rightChoice = false;
        while (!rightChoice) {
          try {
            if (m_DataBaseConnection.execute(limitQuery(m_query, 0,
                choice)) == false) {
              logger.error("Query didn't produce results");
              throw new IOException(
                  "Query didn't produce results");
            }
            m_choice = choice;
            rightChoice = true;
          } catch (SQLException ex) {
            choice++;
            if (choice == 3) {
              m_pseudoIncremental = true;
              break pseudo;
            }
            logger.error("Sql exception", ex);
          }
        }
        String end = endOfQuery(false);
        ResultSet rs = m_DataBaseConnection.getResultSet();
        logger.debug("ResultSet Retrieved");
        ResultSetMetaData md = rs.getMetaData();
        rs.close();
        int numAttributes = md.getColumnCount();
        int[] attributeTypes = new int[numAttributes];
        m_nominalIndexes = new Hashtable[numAttributes];
        m_nominalStrings = new FastVector[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {
          switch (m_DataBaseConnection.translateDBColumnType(md
              .getColumnTypeName(i))) {
          case STRING:
            ResultSet rs1;
            String columnName = md.getColumnName(i);
            if (m_DataBaseConnection.getUpperCase())
              columnName = columnName.toUpperCase();
            m_nominalIndexes[i - 1] = new Hashtable();
            m_nominalStrings[i - 1] = new FastVector();
            String query = "SELECT COUNT(DISTINCT( " + columnName
                + " )) FROM " + end;
            if (m_DataBaseConnection.execute(query) == true) {
              rs1 = m_DataBaseConnection.getResultSet();
              rs1.next();
              int count = rs1.getInt(1);
              rs1.close();
              if (count > m_nominalToStringLimit
                  || m_DataBaseConnection
                      .execute("SELECT DISTINCT ( "
                          + columnName + " ) FROM "
                          + end) == false) {
                attributeTypes[i - 1] = Attribute.STRING;
                break;
              }
              rs1 = m_DataBaseConnection.getResultSet();
            } else {
              attributeTypes[i - 1] = Attribute.STRING;
              break;
            }
            attributeTypes[i - 1] = Attribute.NOMINAL;
            stringToNominal(rs1, i);
            rs1.close();
            break;
          case BOOL:
            attributeTypes[i - 1] = Attribute.NOMINAL;
            m_nominalIndexes[i - 1] = new Hashtable();
            m_nominalIndexes[i - 1].put("false", new Double(0));
            m_nominalIndexes[i - 1].put("true", new Double(1));
            m_nominalStrings[i - 1] = new FastVector();
            m_nominalStrings[i - 1].addElement("false");
            m_nominalStrings[i - 1].addElement("true");
            break;
          case DOUBLE:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case BYTE:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case SHORT:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case INTEGER:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case LONG:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case FLOAT:
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
          case DATE:
            attributeTypes[i - 1] = Attribute.DATE;
            break;
          default:
            attributeTypes[i - 1] = Attribute.STRING;
          }
        }
        FastVector attribInfo = new FastVector();
        for (int i = 0; i < numAttributes; i++) {
          /* Fix for databases that uppercase column names */
          // String attribName = attributeCaseFix(md.getColumnName(i +
          // 1));
          String attribName = md.getColumnName(i + 1);
          switch (attributeTypes[i]) {
          case Attribute.NOMINAL:
            attribInfo.addElement(new Attribute(attribName,
                m_nominalStrings[i]));
            break;
          case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
          case Attribute.STRING:
            attribInfo.addElement(new Attribute(attribName,
                (FastVector) null));
            break;
          case Attribute.DATE:
            attribInfo.addElement(new Attribute(attribName,
                (String) null));
            break;
          default:
            throw new IOException("Unknown attribute type");
          }
        }
        m_structure = new Instances(endOfQuery(true), attribInfo, 0);
        // get rid of m_idColumn
        if (m_DataBaseConnection.getUpperCase())
          m_idColumn = m_idColumn.toUpperCase();
        if (m_structure.attribute(0).name().equals(m_idColumn)) {
          m_oldStructure = new Instances(m_structure, 0);
          m_oldStructure.deleteAttributeAt(0);
        } else
          m_oldStructure = new Instances(m_structure, 0);
      } else {
        if (m_oldStructure == null)
          m_oldStructure = new Instances(m_structure, 0);
      }
      m_DataBaseConnection.disconnectFromDatabase();
    } catch (Exception ex) {
      ex.printStackTrace();
      printException(ex);
    }
    logger.debug("OUT");
    return m_oldStructure;

  }

  /**
   * Return the full data set in batch mode (header and all intances at once).
   *
   * @return the structure of the data set as an empty set of Instances
   *
   * @throws IOException
   *             Signals that an I/O exception has occurred.
   *
   * @exception IOException
   *                if there is no source or parsing fails
   */
  public Instances getDataSet() throws IOException {
    logger.debug("IN");
    if (m_DataBaseConnection == null) {
      logger.error("No source database has been specified");
      throw new IOException("No source database has been specified");
    }
    if (getRetrieval() == INCREMENTAL) {
      logger
          .error("Cannot mix getting Instances in both incremental and batch modes");
      throw new IOException(
          "Cannot mix getting Instances in both incremental and batch modes");
    }
    setRetrieval(BATCH);
    connectToDatabase();

    Instances result = null;
    try {
      if (m_DataBaseConnection.execute(m_query) == false) {
        logger.error("Query didn't produce results");
        throw new Exception("Query didn't produce results");
      }
      ResultSet rs = m_DataBaseConnection.getResultSet();
      logger.debug("RS: " + (rs != null ? rs.toString() : "null"));
      ResultSetMetaData md = rs.getMetaData();
      // Determine structure of the instances
      int numAttributes = md.getColumnCount();
      int[] attributeTypes = new int[numAttributes];
      m_nominalIndexes = new Hashtable[numAttributes];
      m_nominalStrings = new FastVector[numAttributes];
      for (int i = 1; i <= numAttributes; i++) {
        switch (m_DataBaseConnection.translateDBColumnType(md
            .getColumnTypeName(i))) {

        case STRING:
          ResultSet rs1;
          String columnName = md.getColumnName(i);
          logger.debug("Column Name: "
              + (columnName != null ? columnName : "null"));
          if (m_DataBaseConnection.getUpperCase())
            columnName = columnName.toUpperCase();
          String end = endOfQuery(false);
          m_nominalIndexes[i - 1] = new Hashtable();
          m_nominalStrings[i - 1] = new FastVector();
          if (m_DataBaseConnection.execute("SELECT DISTINCT ( "
              + columnName + " ) FROM " + end) == false) {
            logger.error("Nominal values cannot be retrieved");
            throw new Exception(
                "Nominal values cannot be retrieved");
          }
          rs1 = m_DataBaseConnection.getResultSet();
          attributeTypes[i - 1] = Attribute.NOMINAL;
          stringToNominal(rs1, i);
          rs1.close();
          break;
        case BOOL:
          attributeTypes[i - 1] = Attribute.NOMINAL;
          m_nominalIndexes[i - 1] = new Hashtable();
          m_nominalIndexes[i - 1].put("false", new Double(0));
          m_nominalIndexes[i - 1].put("true", new Double(1));
          m_nominalStrings[i - 1] = new FastVector();
          m_nominalStrings[i - 1].addElement("false");
          m_nominalStrings[i - 1].addElement("true");
          break;
        case DOUBLE:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case BYTE:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case SHORT:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case INTEGER:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case LONG:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case FLOAT:
          attributeTypes[i - 1] = Attribute.NUMERIC;
          break;
        case DATE:
          attributeTypes[i - 1] = Attribute.DATE;
          break;
        default:
          attributeTypes[i - 1] = Attribute.STRING;
        }
      }

      // Step through the tuples
      FastVector instances = new FastVector();
      int ii = 0;
      while (rs.next()) {
        /*
        logger.debug("Row num: " + ii++);
        if(ii == 384){
          logger.debug("We are in trouble");
        }
        */
        double[] vals = new double[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {
          //logger.debug("Column num: " + i);
          switch (m_DataBaseConnection.translateDBColumnType(md.getColumnTypeName(i))) {
          case STRING:
            String str = rs.getString(i);

            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              Double index = (Double) m_nominalIndexes[i - 1]
                  .get(str);
              if (index == null) {
                // Null pointer is here
                index = new Double(m_structure.attribute(i - 1)
                    .addStringValue(str));
              }
              vals[i - 1] = index.doubleValue();
            }
            break;
          case BOOL:
            boolean boo = rs.getBoolean(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (boo ? 1.0 : 0.0);
            }
            break;
          case DOUBLE:
            double dd = rs.getDouble(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = dd;
            }
            break;
          case BYTE:
            byte by = rs.getByte(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (double) by;
            }
            break;
          case SHORT:
            short sh = rs.getByte(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (double) sh;
            }
            break;
          case INTEGER:
            int in = rs.getInt(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (double) in;
            }
            break;
          case LONG:
            long lo = rs.getLong(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (double) lo;
            }
            break;
          case FLOAT:
            float fl = rs.getFloat(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              vals[i - 1] = (double) fl;
            }
            break;
          case DATE:
            Date date = rs.getDate(i);
            if (rs.wasNull()) {
              vals[i - 1] = Instance.missingValue();
            } else {
              // TODO: Do a value check here.
              vals[i - 1] = (double) date.getTime();
            }
            break;
          default:
            vals[i - 1] = Instance.missingValue();
          }
        }
        Instance newInst;
        newInst = new Instance(1.0, vals);
        instances.addElement(newInst);
      }

      // Create the header and add the instances to the dataset
      FastVector attribInfo = new FastVector();
      for (int i = 0; i < numAttributes; i++) {
        /* Fix for databases that uppercase column names */
        // String attribName = attributeCaseFix(md.getColumnName(i +
        // 1));
        String attribName = md.getColumnName(i + 1);
        switch (attributeTypes[i]) {
        case Attribute.NOMINAL:
          attribInfo.addElement(new Attribute(attribName,
              m_nominalStrings[i]));
          break;
        case Attribute.NUMERIC:
          attribInfo.addElement(new Attribute(attribName));
          break;
        case Attribute.STRING:
          attribInfo.addElement(new Attribute(attribName,
              (FastVector) null));
          break;
        case Attribute.DATE:
          attribInfo.addElement(new Attribute(attribName,
              (String) null));
          break;
        default:
          throw new IOException("Unknown attribute type");
        }
      }
      result = new Instances(endOfQuery(true), attribInfo, instances
          .size());
      for (int i = 0; i < instances.size(); i++) {
        result.add((Instance) instances.elementAt(i));
      }
      rs.close();
      m_DataBaseConnection.disconnectFromDatabase();
      // get rid of m_idColumn
      if (m_DataBaseConnection.getUpperCase())
        m_idColumn = m_idColumn.toUpperCase();
      if (result.attribute(0).name().equals(m_idColumn)) {
        result.deleteAttributeAt(0);
      }
      m_structure = new Instances(result, 0);
    } catch (Exception ex) {
      logger.error(ex);
      printException(ex);
      StringBuffer text = new StringBuffer();
      if (m_query.equals("Select * from Results0")) {
        text.append("\n\nDatabaseLoader options:\n");
        Enumeration enumi = listOptions();
        while (enumi.hasMoreElements()) {
          Option option = (Option) enumi.nextElement();
          text.append(option.synopsis() + '\n');
          text.append(option.description() + '\n');
        }
      }
    }
    logger.debug("OUT: " + result);
    return result;
  }

  /**
   * Reads an instance from a database.
   *
   * @param rs
   *            the ReusltSet to load
   * @throws Exception
   *             if instance cannot be read
   * @return an instance read from the database
   */
  private Instance readInstance(ResultSet rs) throws Exception {
    logger.debug("IN");
    FastVector instances = new FastVector();
    ResultSetMetaData md = rs.getMetaData();
    int numAttributes = md.getColumnCount();
    double[] vals = new double[numAttributes];
    m_structure.delete();
    for (int i = 1; i <= numAttributes; i++) {
      switch (m_DataBaseConnection.translateDBColumnType(md
          .getColumnTypeName(i))) {
      case STRING:
        String str = rs.getString(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          Double index = (Double) m_nominalIndexes[i - 1].get(str);
          if (index == null) {
            index = new Double(m_structure.attribute(i - 1)
                .addStringValue(str));
          }
          vals[i - 1] = index.doubleValue();
        }
        break;
      case BOOL:
        boolean boo = rs.getBoolean(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (boo ? 1.0 : 0.0);
        }
        break;
      case DOUBLE:
        // BigDecimal bd = rs.getBigDecimal(i, 4);
        double dd = rs.getDouble(i);
        // Use the column precision instead of 4?
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          // newInst.setValue(i - 1, bd.doubleValue());
          vals[i - 1] = dd;
        }
        break;
      case BYTE:
        byte by = rs.getByte(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (double) by;
        }
        break;
      case SHORT:
        short sh = rs.getByte(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (double) sh;
        }
        break;
      case INTEGER:
        int in = rs.getInt(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (double) in;
        }
        break;
      case LONG:
        long lo = rs.getLong(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (double) lo;
        }
        break;
      case FLOAT:
        float fl = rs.getFloat(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          vals[i - 1] = (double) fl;
        }
        break;
      case DATE:
        Date date = rs.getDate(i);
        if (rs.wasNull()) {
          vals[i - 1] = Instance.missingValue();
        } else {
          // TODO: Do a value check here.
          vals[i - 1] = (double) date.getTime();
        }
        break;
      default:
        vals[i - 1] = Instance.missingValue();
      }
    }
    Instance inst = new Instance(1.0, vals);
    // get rid of m_idColumn
    if (m_DataBaseConnection.getUpperCase())
      m_idColumn = m_idColumn.toUpperCase();
    if (m_structure.attribute(0).name().equals(m_idColumn)) {
      inst.deleteAttributeAt(0);
      m_oldStructure.add(inst);
      inst = m_oldStructure.instance(0);
      m_oldStructure.delete(0);
    } else {
      // instances is added to and deleted from the structure to get the
      // true nominal values instead of the index of the values.
      m_structure.add(inst);
      inst = m_structure.instance(0);
      m_structure.delete(0);
    }
    logger.debug("OUT");
    return inst;

  }

  /**
   * Read the data set incrementally---get the next instance in the data set
   * or returns null if there are no more instances to get. If the structure
   * hasn't yet been determined by a call to getStructure then method does so
   * before returning the next instance in the data set.
   *
   * @param structure
   *            the dataset header information, will get updated in case of
   *            string or relational attributes
   * @return the next instance in the data set as an Instance object or null
   *         if there are no more instances to be read
   * @throws IOException
   *             if there is an error during parsing
   */
  public Instance getNextInstance(Instances structure) throws IOException {
    logger.debug("IN");
    m_structure = structure;

    if (m_DataBaseConnection == null)
      throw new IOException("No source database has been specified");
    if (getRetrieval() == BATCH) {
      throw new IOException(
          "Cannot mix getting Instances in both incremental and batch modes");
    }
    // pseudoInremental: Load all instances into main memory in batch mode
    // and give them incrementally to user
    if (m_pseudoIncremental) {
      setRetrieval(INCREMENTAL);
      if (m_datasetPseudoInc.numInstances() > 0) {
        Instance current = m_datasetPseudoInc.instance(0);
        m_datasetPseudoInc.delete(0);
        return current;
      } else {
        resetStructure();
        return null;
      }
    }
    // real incremental mode. At the moment(version 1.0) only for MySQL and
    // HSQLDB (Postgres not tested, should work)
    setRetrieval(INCREMENTAL);
    try {
      if (!m_DataBaseConnection.isConnected())
        connectToDatabase();
      // if no key columns specified by user, try to detect automatically
      if (m_firstTime && m_orderBy.size() == 0) {
        if (!checkForKey())
          throw new Exception(
              "A unique order cannot be detected automatically.\nYou have to use SELECT * in your query to enable this feature.\nMaybe JDBC driver is not able to detect key.\nDefine primary key in your database or use -P option (command line) or enter key columns in the GUI.");
      }
      if (m_firstTime) {
        m_firstTime = false;
        m_rowCount = getRowCount();
      }
      // as long as not all rows has been loaded
      if (m_counter < m_rowCount) {
        if (m_DataBaseConnection.execute(limitQuery(m_query, m_counter,
            m_choice)) == false) {
          throw new Exception("Tuple could not be retrieved.");
        }
        m_counter++;
        ResultSet rs = m_DataBaseConnection.getResultSet();
        rs.next();
        Instance current = readInstance(rs);
        rs.close();
        return current;
      } else {
        m_DataBaseConnection.disconnectFromDatabase();
        resetStructure();
        return null;
      }

    } catch (Exception ex) {
      printException(ex);
    }
    logger.debug("OUT");
    return null;
  }

  /**
   * Gets the setting.
   *
   * @return the current setting
   */
  public String[] getOptions() {

    Vector options = new Vector();

    options.add("-Q");
    options.add(getQuery());

    StringBuffer text = new StringBuffer();
    for (int i = 0; i < m_orderBy.size(); i++) {
      if (i > 0)
        text.append(", ");
      text.append((String) m_orderBy.elementAt(i));
    }
    options.add("-P");
    options.add(text.toString());

    if (m_inc)
      options.add("-I");

    return (String[]) options.toArray(new String[options.size()]);
  }

  /**
   * Lists the available options.
   *
   * @return an enumeration of the available options
   */
  public java.util.Enumeration listOptions() {

    FastVector newVector = new FastVector(3);

    newVector
        .addElement(new Option(
            "\tSQL query of the form SELECT <list of columns>|* FROM <table> [WHERE] to execute (default Select * From Results0).",
            "Q", 1, "-Q <query>"));
    newVector
        .addElement(new Option(
            "\tList of column names uniquely defining a DB row (separated by ', ').\n\tUsed for incremental loading."
                + "\n\tIf not specified, the key will be determined automatically, if possible with the used JDBC driver.\n\tThe auto ID column created by the DatabaseSaver won't be loaded.",
            "P", 1, "-P<list of column names>"));
    newVector.addElement(new Option("\tSets incremental loading", "I", 0,
        "-I"));

    return newVector.elements();
  }

  /**
   * Sets the options.
   *
   * Available options are: -Q the query to specify which tuples to load<br>
   * The query must have the form: SELECT *|<column-list> FROM
   * <table>
   * [WHERE} (default: SELECT * FROM Results0).
   * <p>
   *
   * -P comma separted list of columns that are a unqiue key <br>
   * Only needed for incremental loading, if it cannot be detected
   * automatically
   * <p>
   *
   * -I <br>
   * Sets incremental loading
   *
   * @param options
   *            the options
   *
   * @throws Exception
   *             if options cannot be set
   */
  public void setOptions(String[] options) throws Exception {

    String optionString, keyString;
    optionString = Utils.getOption('Q', options);
    keyString = Utils.getOption('P', options);
    reset();
    if (optionString.length() != 0)
      setQuery(optionString);
    m_orderBy.removeAllElements();
    m_inc = Utils.getFlag('I', options);
    if (m_inc) {
      StringTokenizer st = new StringTokenizer(keyString, ",");
      while (st.hasMoreTokens()) {
        String column = st.nextToken();
        column = column.replaceAll(" ", "");
        m_orderBy.addElement(column);
      }
    }
  }

  /**
   * Returns the database password
   *
   * @return the database password
   */
  public String getPassword() {
    return m_DataBaseConnection.getPassword();
  }

  /**
   * Prints an exception
   *
   * @param ex
   *            the exception to print
   */
  private void printException(Exception ex) {

    while (ex != null) {
      if (ex instanceof SQLException) {
        ex = ((SQLException) ex).getNextException();
      } else
        ex = null;
    }

  }

  /**
   * Main method.
   *
   * @param options
   *            the options
   */
  public static void main(String[] options) {

    DatabaseLoader atf;
    String a = "";
    try {
      atf = new DatabaseLoader();
      atf.setOptions(options);
      atf.setSource(atf.getUrl(), atf.getUser(), atf.getPassword());
      if (!atf.m_inc)
        System.out.println(atf.getDataSet());
      else {
        Instances structure = atf.getStructure();
        System.out.println(structure);
        Instance temp;
        do {
          temp = atf.getNextInstance(structure);
          if (temp != null) {
            System.out.println(temp);
          }
        } while (temp != null);
      }
    } catch (Exception e) {
      e.printStackTrace();
      System.out.println("\n" + e.getMessage());
    }
  }

}
TOP

Related Classes of weka.core.converters.DatabaseLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.