Package co.nubetech.hiho.job

Source Code of co.nubetech.hiho.job.DBQueryInputJob

/**
* Copyright 2010 Nube Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package co.nubetech.hiho.job;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;

import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroJob;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import co.nubetech.hiho.mapreduce.lib.db.apache.DBConfiguration;
import co.nubetech.hiho.mapreduce.lib.db.apache.DataDrivenDBInputFormat;
import co.nubetech.hiho.mapreduce.lib.db.apache.MRJobConfig;
import co.nubetech.hiho.avro.DBMapper;
import co.nubetech.hiho.common.HIHOConf;
import co.nubetech.hiho.common.HIHOException;
import co.nubetech.hiho.common.OutputStrategyEnum;
import co.nubetech.hiho.hive.HiveUtility;
//import co.nubetech.hiho.mapred.avro.GenericRecordAvroOutputFormat;
import co.nubetech.hiho.mapreduce.DBInputAvroMapper;
import co.nubetech.hiho.mapreduce.DBInputDelimMapper;
import co.nubetech.hiho.mapreduce.lib.db.DBQueryInputFormat;
import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable;
import co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat;
import co.nubetech.hiho.pig.PigUtility;

public class DBQueryInputJob extends Configured implements Tool {

  private final static Logger logger = Logger
      .getLogger(co.nubetech.hiho.job.DBQueryInputJob.class);

  private ArrayList params;

  public void populateConfiguration(String[] args, Configuration conf) {
    for (int i = 0; i < args.length - 1; i++) {
      if ("-jdbcDriver".equals(args[i])) {
        conf.set(DBConfiguration.DRIVER_CLASS_PROPERTY, args[++i]);
      } else if ("-jdbcUrl".equals(args[i])) {
        conf.set(DBConfiguration.URL_PROPERTY, args[++i]);
      } else if ("-jdbcUsername".equals(args[i])) {
        conf.set(DBConfiguration.USERNAME_PROPERTY, args[++i]);
      } else if ("-jdbcPassword".equals(args[i])) {
        conf.set(DBConfiguration.PASSWORD_PROPERTY, args[++i]);
      } else if ("-inputQuery".equals(args[i])) {
        conf.set(DBConfiguration.INPUT_QUERY, args[++i]);
      } else if ("-inputBoundingQuery".equals(args[i])) {
        conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, args[++i]);
      } else if ("-outputPath".equals(args[i])) {
        conf.set(HIHOConf.INPUT_OUTPUT_PATH, args[++i]);
      } else if ("-outputStrategy".equals(args[i])) {
        conf.set(HIHOConf.INPUT_OUTPUT_STRATEGY, args[++i]);
      } else if ("-delimiter".equals(args[i])) {
        conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, args[++i]);
      } else if ("-numberOfMappers".equals(args[i])) {
        conf.set(HIHOConf.NUMBER_MAPPERS, args[++i]);
      } else if ("-inputTableName".equals(args[i])) {
        conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, args[++i]);
      } else if ("-inputFieldNames".equals(args[i])) {
        conf.set(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, args[++i]);
      } else if ("-inputOrderBy".equals(args[i])) {
        conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, args[++i]);
      } else if ("-inputLoadTo".equals(args[i])) {
        conf.set(HIHOConf.INPUT_OUTPUT_LOADTO, args[++i]);
      } else if ("-inputLoadToPath".equals(args[i])) {
        conf.set(HIHOConf.INPUT_OUTPUT_LOADTO_PATH, args[++i]);
      } else if ("-hiveDriver".equals(args[i])) {
        conf.set(HIHOConf.HIVE_DRIVER, args[++i]);
      } else if ("-hiveUrl".equals(args[i])) {
        conf.set(HIHOConf.HIVE_URL, args[++i]);
      } else if ("-hiveUsername".equals(args[i])) {
        conf.set(HIHOConf.HIVE_USR_NAME, args[++i]);
      } else if ("-hivePassword".equals(args[i])) {
        conf.set(HIHOConf.HIVE_PASSWORD, args[++i]);
      } else if ("-hivePartitionBy".equals(args[i])) {
        conf.set(HIHOConf.HIVE_PARTITION_BY, args[++i]);
      } else if ("-hiveIfNotExists".equals(args[i])) {
        conf.set(HIHOConf.HIVE_TABLE_OVERWRITE, args[++i]);
      } else if ("-hiveTableName".equals(args[i])) {
        conf.set(HIHOConf.HIVE_TABLE_NAME, args[++i]);
      } else if ("-hiveSortedBy".equals(args[i])) {
        conf.set(HIHOConf.HIVE_SORTED_BY, args[++i]);
      } else if ("-hiveClusteredBy".equals(args[i])) {
        conf.set(HIHOConf.HIVE_CLUSTERED_BY, args[++i]);
      }
    }
  }

  public void checkMandatoryConfs(Configuration conf) throws HIHOException {
    if (conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY) == null) {
      throw new HIHOException(
          "JDBC driver configuration is not specified,please specify JDBC driver class");
    }
    if (conf.get(DBConfiguration.URL_PROPERTY) == null) {
      throw new HIHOException(
          "JDBC url path configuration is empty,please specify JDBC url path");
    }
    if (!conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY).contains("hsqldb")) {
      if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
        throw new HIHOException(
            "JDBC user name configuration is empty,please specify JDBC user name");
      }
      if (conf.get(DBConfiguration.PASSWORD_PROPERTY) == null) {
        throw new HIHOException(
            "JDBC password configuration is empty,please specify JDBC password");
      }
    }
    if (conf.get(HIHOConf.INPUT_OUTPUT_PATH) == null) {
      throw new HIHOException(
          "Output path is not specified,please specify output path");
    }
    if (conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY) != null
        && conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY).equals("DELIMITED")) {
      if (conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER) == null) {
        throw new HIHOException(
            "Delimiter is not specified,please specify delimiter");
      }

    }
    if (conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY) == null
        && conf.get(DBConfiguration.INPUT_QUERY) == null) {
      throw new HIHOException(
          "Input table name and input query both configurations are empty, please specify anyone of them");
    }
    if (conf.get(DBConfiguration.INPUT_QUERY) != null
        && conf.get(DBConfiguration.INPUT_BOUNDING_QUERY) == null) {
      throw new HIHOException(
          "Please specify input bounding query as it is mandatory to be defined with input query ");
    }
    if (conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY) != null
        && conf.get(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY) == null) {
      conf.set(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "*");
    }

    if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null
        && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH) == null) {
      throw new HIHOException(
          "Load to path configuration is empty, please specify path to load script in loadTOPath configuration");
    }
    if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null
        && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) {
      if (conf.get(HIHOConf.HIVE_URL) == null) {
        throw new HIHOException(
            "The Hive url is not defined, please specify hive url");
      }
      if (conf.get(HIHOConf.HIVE_DRIVER) == null) {
        throw new HIHOException(
            "The Hive driver is not defined, please specify hive driver");
      }
      if (checkForMultiplePartition(conf.get(HIHOConf.HIVE_PARTITION_BY))
          && conf.get(HIHOConf.HIVE_TABLE_NAME) == null) {
        throw new HIHOException("please specify hive table name");
      }

    }

  }

  @Override
  public int run(String[] args) throws IOException {

    Configuration conf = getConf();
    populateConfiguration(args, conf);

    boolean isMultiplePartition = false;

    if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
      if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) {
        conf.set("hadoop.job.history.user.location", "none");
        if (conf.get(HIHOConf.HIVE_PARTITION_BY) != null) {
          try {
            isMultiplePartition = checkForMultiplePartition(conf
                .get(HIHOConf.HIVE_PARTITION_BY));
          } catch (HIHOException e) {
            e.printStackTrace();
          }

        }
      }

      if (isMultiplePartition
          && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) {
        populateHiveConfigurationForMultiplePartition(conf);
      } else {
        runJobs(conf, 0);
      }

    } else {
      runJobs(conf, 0);
    }
    return 0;
  }

  private boolean checkForMultiplePartition(String partitionBy)
      throws HIHOException {
    // ArrayList<PartitionBy> partitionByValues= new
    // ArrayList<PartitionBy>();
    boolean isMultiplePartition = false;
    StringTokenizer partitionToken = new StringTokenizer(partitionBy, ";");
    StringTokenizer partitionData = new StringTokenizer(
        partitionToken.nextToken(), ":");
    try {
      partitionData.nextToken();
      partitionData.nextToken();
    } catch (NoSuchElementException e) {
      throw new HIHOException(
          "Data not defined properly in partitionBy configuration");
    }
    if (partitionData.hasMoreTokens()) {
      int index = partitionData.nextToken().indexOf(",");
      if (index > -1) {
        isMultiplePartition = true;
        if (partitionToken.hasMoreTokens()) {
          throw new HIHOException(
              "Data not defined properly in partitionBy configuration");

        }

      }
    }
    return isMultiplePartition;
  }

  private void generatePigScript(Configuration conf, Job job)
      throws HIHOException {
    // /see if import to pig or hive
    if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("pig")) {
      try {
        String pigScript = PigUtility.getLoadScript(
            HIHOConf.INPUT_OUTPUT_PATH, getDBWritable(conf));
        // //jobId
        File file = new File(new File(
            conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH)),
            "pigScript" + ".txt");
        FileOutputStream fos = new FileOutputStream(file);
        BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
            fos));
        w.write(pigScript);
        w.close();
        fos.close();
      } catch (Exception h) {
        throw new HIHOException("Unable to generate Pig script", h);
      }
    }

  }

  private void generateHiveScript(Configuration conf, Job job, int jobCounter)
      throws HIHOException {
    // /see if import to pig or hive
    if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) {
      try {
        HiveUtility.createTable(conf, job, getDBWritable(conf),
            jobCounter);
      } catch (Exception h) {
        throw new HIHOException("Unable to generate Hive script", h);
      }
    }

  }

  public static void main(String[] args) throws Exception {
    // setUp();
    DBQueryInputJob job = new DBQueryInputJob();
    // ArrayList params = new ArrayList();
    // params.add(false);
    // job.setParams(params);
    int res = ToolRunner.run(new Configuration(), job, args);
    System.exit(res);
  }

  public ArrayList getParams() {
    return params;
  }

  public void setParams(ArrayList params) {
    this.params = params;
  }

  /*
   * this will move to the junit once everything is properly done
   *
   * public static void setUp() { // set up the database String db =
   * "cumulus"; String root = "root"; String pwd = "newpwd";
   *
   * String user = "tester"; String password = "password";
   *
   * Connection conn; String url = "jdbc:mysql://localhost:3306/"; String
   * driverName = "com.mysql.jdbc.Driver";
   *
   * try { Class.forName(driverName).newInstance(); conn =
   * DriverManager.getConnection(url, root, pwd); try { Statement st =
   * conn.createStatement();
   *
   * String dbDrop = "drop database if exists " + db;
   * st.executeUpdate(dbDrop); logger.debug("Dropped database");
   *
   * String dbCreate = "create database " + db; st.executeUpdate(dbCreate);
   * logger.debug("Created database");
   *
   * // Register a new user named tester on the // database named cumulus with
   * a password // password enabling several different // privileges.
   * st.executeUpdate("GRANT SELECT,INSERT,UPDATE,DELETE," + "CREATE,DROP " +
   * "ON " + db + ".* TO '" + user + "'@'localhost' " + "IDENTIFIED BY '" +
   * password + "';"); logger.debug("Created user tester"); st.close();
   *
   * // now connect to the relevent db and create the schema conn =
   * DriverManager.getConnection(url + db, root, pwd); st =
   * conn.createStatement();
   *
   * String desTable =
   * "Create table if not exists designations(id integer, designation varchar(30));\n"
   * ; st = conn.createStatement(); st.executeUpdate(desTable);
   *
   * logger.debug(desTable);
   *
   * String desTableData =
   * "insert into designations(id, designation) values("; desTableData +=
   * "0, 'Manager');\n"; st.executeUpdate(desTableData); desTableData =
   * "insert into designations(id, designation) values("; desTableData +=
   * "1, 'Accountant');\n"; st.executeUpdate(desTableData); desTableData =
   * "insert into designations(id, designation) values("; desTableData +=
   * "2, 'Assistant');\n"; st.executeUpdate(desTableData); desTableData =
   * "insert into designations(id, designation) values("; desTableData +=
   * "3, 'Sr. Manager');\n"; logger.debug(desTableData);
   * st.executeUpdate(desTableData);
   * logger.debug("Data inserte4d into designations");
   *
   * String table =
   * "CREATE TABLE if not exists extractJobEmployee(id integer, name varchar(50), age integer"
   * ; table += ", isMarried boolean, salary double, designationId integer);";
   * st = conn.createStatement(); st.executeUpdate(table);
   *
   * logger.debug("Schema creation process successfull!");
   * logger.debug("Inserting table data");
   *
   * for (int i = 0; i < 50; ++i) { int designation = i % 4; String tableData
   * =
   * "Insert into extractJobEmployee(id, name, age, isMarried, salary, designationId) values("
   * ; tableData += i + ", 'Employee" + i; tableData += "', 25, false, 349.9,"
   * + designation + ");\n"; logger.debug(tableData); st =
   * conn.createStatement(); st.executeUpdate(tableData); } // conn.commit();
   * logger.debug("Table data insertion process successfull!"); } catch
   * (SQLException s) { s.printStackTrace(); } conn.close(); } catch
   * (Exception e) { e.printStackTrace(); }
   *
   * }
   */

  public static GenericDBWritable getDBWritable(Configuration conf)
      throws HIHOException {
    try {
      String driverName = conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY);
      String url = conf.get(DBConfiguration.URL_PROPERTY);
      String user = conf.get(DBConfiguration.USERNAME_PROPERTY);
      String password = conf.get(DBConfiguration.PASSWORD_PROPERTY);
      Class.forName(driverName).newInstance();
      Connection conn = DriverManager.getConnection(url, user, password);
      DatabaseMetaData dbMeta = conn.getMetaData();
      String dbProductName = dbMeta.getDatabaseProductName()
          .toUpperCase();
      String query = getSelectQuery(conf, dbProductName);
      PreparedStatement st = conn.prepareStatement(query);
      logger.warn("\n Query for GenericDBWritable is " + query);
      GenericDBWritable writable = new GenericDBWritable(
          GenericDBWritable.populateColumnInfo(st.getMetaData()),
          null);
      return writable;

    } catch (Exception e) {
      e.printStackTrace();
      throw new HIHOException("Unable to get metadata for the query", e);
    }
  }

  /**
   * Returns the query for selecting the records, subclasses can override this
   * for custom behaviour. A lot is actually a copy from
   * DataDrivenDBRecordReader
   */
  @SuppressWarnings("unchecked")
  public static String getSelectQuery(Configuration conf, String dbProductName)
      throws HIHOException {

    StringBuilder query = new StringBuilder();
    DBConfiguration dbConf = new DBConfiguration(conf);
    String[] fieldNames = dbConf.getInputFieldNames();
    String tableName = dbConf.getInputTableName();
    String conditions = dbConf.getInputConditions();
    StringBuilder conditionClauses = new StringBuilder();

    if (dbConf.getInputQuery() == null) {
      // We need to generate the entire query.
      query.append("SELECT ");

      for (int i = 0; i < fieldNames.length; i++) {
        query.append(fieldNames[i]);
        if (i != fieldNames.length - 1) {
          query.append(", ");
        }
      }

      query.append(" FROM ").append(tableName);
      if (!dbProductName.startsWith("ORACLE")) {
        // Seems to be necessary for hsqldb? Oracle explicitly does
        // *not*
        // use this clause.
        query.append(" AS ").append(tableName);
      }

    } else {
      // User provided the query. We replace the special token with our
      // WHERE clause.
      String inputQuery = dbConf.getInputQuery();
      if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
        logger.error("Could not find the clause substitution token "
            + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN
            + " in the query: [" + inputQuery
            + "]. Parallel splits may not work correctly.");
      }
      // bad bad hack, but we dont have the split here..
      conditionClauses.append("( 1=1 )");

      query.append(inputQuery.replace(
          DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
          conditionClauses.toString()));
    }

    return query.toString();

  }

  public void populateHiveConfigurationForMultiplePartition(Configuration conf)
      throws IOException {

    String columnName = null, columnType = null;
    ArrayList columnsValues = new ArrayList();

    ArrayList query = new ArrayList();
    ArrayList table = new ArrayList();

    String partitionByData = conf.get(HIHOConf.HIVE_PARTITION_BY);
    boolean isInputQueryDelimited = false;
    String queries = null;
    if (conf.get(DBConfiguration.INPUT_QUERY) != null) {
      isInputQueryDelimited = true;
      queries = conf.get(DBConfiguration.INPUT_QUERY);
    }
    // /
    StringTokenizer partitionByTokens = new StringTokenizer(
        partitionByData, ":");
    columnName = partitionByTokens.nextToken();
    columnType = partitionByTokens.nextToken();
    StringTokenizer partitionByValues = new StringTokenizer(
        partitionByTokens.nextToken(), ",");
    int counter = 0;
    int tokenCounts = partitionByValues.countTokens();
    while (partitionByValues.hasMoreTokens()) {
      columnsValues.add(counter, partitionByValues.nextToken());
      counter++;
    }
    // /
    if (isInputQueryDelimited) {
      StringTokenizer queryTokens = new StringTokenizer(queries, ";");
      counter = 0;
      while (queryTokens.hasMoreTokens()) {
        query.add(counter, queryTokens.nextToken());
        counter++;
      }
    } else {
      StringTokenizer tableTokens = new StringTokenizer(
          conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY), ";");
      counter = 0;
      while (tableTokens.hasMoreTokens()) {
        table.add(counter, tableTokens.nextToken());
        counter++;
      }

    }
    for (int jobCounter = 0; jobCounter < tokenCounts; jobCounter++) {
      if (isInputQueryDelimited) {
        ;
        conf.set(DBConfiguration.INPUT_QUERY, query.get(jobCounter)
            .toString());
      } else {
        conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY,
            table.get(jobCounter).toString());
      }
      conf.set(HIHOConf.INPUT_OUTPUT_PATH,
          conf.get(HIHOConf.INPUT_OUTPUT_PATH) + jobCounter);
      // conf.set(HIHOConf.HIVE_TABLE_OVERWRITE, "true");
      String partitionBy = columnName + ":" + columnType + ":"
          + columnsValues.get(jobCounter).toString();
      conf.set(HIHOConf.HIVE_PARTITION_BY, partitionBy);
      runJobs(conf, jobCounter);

    }

  }

  public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {
      checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
      logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
        conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
      throw new IllegalArgumentException(
          "Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
      switch (os) {

      case DUMP: {
        // job.setMapperClass(DBImportMapper.class);
        break;
      }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         *
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
      case DELIMITED: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
      }
      case JSON: {
        // job.setMapperClass(DBImportJsonMapper.class);
        // job.setJarByClass(ObjectMapper.class);
        break;
      }
      default: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        break;
      }
      }

      String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
      String inputBoundingQuery = conf
          .get(DBConfiguration.INPUT_BOUNDING_QUERY);
      logger.debug("About to set the params");
      DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery,
          params);
      logger.debug("Set the params");

      job.setNumReduceTasks(0);

      try {
        // job.setJarByClass(Class.forName(conf.get(
        // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
        logger.debug("OUTPUT format class is "
            + job.getOutputFormatClass());

        /*
         * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
         * ReflectionUtils.newInstance(job.getOutputFormatClass(),
         * job.getConfiguration()); output.checkOutputSpecs(job);
         */
        logger.debug("Class is "
            + ReflectionUtils
                .newInstance(job.getOutputFormatClass(),
                    job.getConfiguration()).getClass()
                .getName());
        job.waitForCompletion(false);
        if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
          generateHiveScript(conf, job, jobCounter);
          generatePigScript(conf, job);
        }

      }
      /*
       * catch (HIHOException h) { h.printStackTrace(); }
       */
      catch (Exception e) {
        e.printStackTrace();
      } catch (HIHOException e) {
        e.printStackTrace();
      }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    /*else {
      String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
      String inputBoundingQuery = conf
          .get(DBConfiguration.INPUT_BOUNDING_QUERY);
      logger.debug("About to set the params");
      // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
      // inputQuery, inputBoundingQuery, params);
      logger.debug("Set the params");

      JobConf jobConf = new JobConf(conf);

      try {
        GenericDBWritable queryWritable = getDBWritable(jobConf);
        Schema pair = DBMapper
            .getPairSchema(queryWritable.getColumns());

        AvroJob.setMapOutputSchema(jobConf, pair);
        GenericRecordAvroOutputFormat.setOutputPath(jobConf, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

        DBQueryInputFormat.setInput(
            jobConf, inputQuery, inputBoundingQuery, params);
        jobConf.setInputFormat(DBQueryInputFormat.class);
        jobConf.setMapperClass(DBInputAvroMapper.class);
        jobConf.setMapOutputKeyClass(NullWritable.class);
        jobConf.setMapOutputValueClass(AvroValue.class);
        jobConf.setOutputKeyClass(NullWritable.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
        jobConf.setJarByClass(DBQueryInputJob.class);
        jobConf.setStrings(
            "io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
        jobConf.setNumReduceTasks(0);
        /*
         * jobConf.setOutputFormat(org.apache.hadoop.mapred.
         * SequenceFileOutputFormat.class);
         * org.apache.hadoop.mapred.SequenceFileOutputFormat
         * .setOutputPath(jobConf, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        
        JobClient.runJob(jobConf);
      } catch (Throwable e) {
        e.printStackTrace();
      }

    }*/

  }

TOP

Related Classes of co.nubetech.hiho.job.DBQueryInputJob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.