Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.Job


public class TestDelimitedTextInputFormat {
  @Test
  public void testSetProperties() throws IOException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    DelimitedTextInputFormat.setProperties(job, ",", 1);
    assertEquals(
        ",",
        job.getConfiguration().get(
            DelimitedTextInputFormat.DELIMITER_CONF));
    assertEquals("1",
        job.getConfiguration()
            .get(DelimitedTextInputFormat.COLUMN_CONF));
  }
View Full Code Here


      checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }   
    Job job = new Job(conf);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
        conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    job.setJobName("HihoDBExport");

    job.setMapperClass(GenericDBLoadDataMapper.class);
    job.setJarByClass(ExportToDB.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    GenericDBOutputFormat.setOutput(job, tableName, columnNames);

    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
View Full Code Here

    for (Entry<String, String> entry : conf) {
      logger.debug("key, value " + entry.getKey() + "="
          + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    int ret = 0;

    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
View Full Code Here

      checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new IOException(e1);
    }
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(MySQLLoadDataMapper.class);
    job.setJarByClass(MySQLLoadDataMapper.class);
    for (Entry<String, String> entry : conf) {
      logger.debug("key, value " + entry.getKey() + "="
          + entry.getValue());
    }
    // verify required properties are loaded
    logger.debug(conf.get(DBConfiguration.URL_PROPERTY));
    logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY));
    logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY));

    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }
View Full Code Here

public class ExportDelimitedToDB extends Configured implements Tool {

  public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(DelimitedLoadMapper.class);
    job.setJarByClass(DelimitedLoadMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputFormatClass(DBOutputFormat.class);
    int ret = 0;

    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
View Full Code Here

    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }

    Job job = new Job(conf);
    job.setJobName("SalesforceLoading");
    //job.setMapperClass(SalesForceLoadMapper.class);
    //job.setJarByClass(SalesForceLoadMapper.class);
    job.setNumReduceTasks(0);

    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapperClass(ExportSalesforceMapper.class);
    job.setJarByClass(FileStreamInputFormat.class);
    job.setReducerClass(ExportSalesforceReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputFormatClass(NullOutputFormat.class);

    int ret = 0;

    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
View Full Code Here

      .getLogger(co.nubetech.hiho.similarity.ngram.ScoreJob.class);

  @Override
  public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("Score job");
    job.setJarByClass(ScoreJob.class);

    Class inputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat");
    Class outputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class inputValueClass = Class
        .forName("org.apache.hadoop.io.IntWritable");
    Class outputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class
        .forName("org.apache.hadoop.io.LongWritable");

    job.setMapperClass(ScoreMapper.class);
    job.setReducerClass(ScoreReducer.class);

    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);

    FileInputFormat.setInputPaths(job, "outputOfNGramJob");
    FileOutputFormat.setOutputPath(job, new Path("outputOfScoreJob"));

    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }
View Full Code Here

      checkMandatoryConfs();
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("NGram job");
    job.setJarByClass(NGramJob.class);

    Class inputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat");
    Class outputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class.forName("org.apache.hadoop.io.Text");
    Class inputValueClass = Class.forName("org.apache.hadoop.io.Text");
    Class outputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class
        .forName("org.apache.hadoop.io.IntWritable");

    job.setMapperClass(NGramMapper.class);
    job.setReducerClass(NGramReducer.class);

    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path("outputOfNGramJob"));

    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }
View Full Code Here

    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
      logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
        conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
      throw new IllegalArgumentException(
          "Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
      switch (os) {

      case DUMP: {
        // job.setMapperClass(DBImportMapper.class);
        break;
      }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         *
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
      case DELIMITED: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
      }
      case JSON: {
        // job.setMapperClass(DBImportJsonMapper.class);
        // job.setJarByClass(ObjectMapper.class);
        break;
      }
      default: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        break;
      }
      }

      String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
      String inputBoundingQuery = conf
          .get(DBConfiguration.INPUT_BOUNDING_QUERY);
      logger.debug("About to set the params");
      DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery,
          params);
      logger.debug("Set the params");

      job.setNumReduceTasks(0);

      try {
        // job.setJarByClass(Class.forName(conf.get(
        // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
        logger.debug("OUTPUT format class is "
            + job.getOutputFormatClass());

        /*
         * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
         * ReflectionUtils.newInstance(job.getOutputFormatClass(),
         * job.getConfiguration()); output.checkOutputSpecs(job);
         */
        logger.debug("Class is "
            + ReflectionUtils
                .newInstance(job.getOutputFormatClass(),
                    job.getConfiguration()).getClass()
                .getName());
        job.waitForCompletion(false);
        if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
          generateHiveScript(conf, job, jobCounter);
          generatePigScript(conf, job);
        }

View Full Code Here

    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);

    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);

    if (mergeBy.equals("key")) {
      job.setMapperClass(MergeKeyMapper.class);
      job.setReducerClass(MergeKeyReducer.class);

    } else if (mergeBy.equals("value")) {
      job.setMapperClass(MergeValueMapper.class);
      job.setReducerClass(MergeValueReducer.class);
    }

    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
      logger.debug("Output format class is " + job.getOutputFormatClass());
      logger.debug("Class is "
          + ReflectionUtils
              .newInstance(job.getOutputFormatClass(),
                  job.getConfiguration()).getClass()
              .getName());
      job.waitForCompletion(false);
      if (job.isComplete()) {
        Counters counters = job.getCounters();
        totalRecordsOld = counters.findCounter(
            MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
        totalRecordsNew = counters.findCounter(
            MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
        badRecords = counters.findCounter(
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.