Examples of org.apache.hadoop.mapreduce.Job

org.apache.hadoop.mapreduce.Job
The job submitter's view of the Job. It allows the user to configure the job, submit it, control its execution, and query the state. The set methods only work until the job is submitted, afterwards they will throw an IllegalStateException.


public class TestDelimitedTextInputFormat {
  @Test
  public void testSetProperties() throws IOException {
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    DelimitedTextInputFormat.setProperties(job, ",", 1);
    assertEquals(
        ",",
        job.getConfiguration().get(
            DelimitedTextInputFormat.DELIMITER_CONF));
    assertEquals("1",
        job.getConfiguration()
            .get(DelimitedTextInputFormat.COLUMN_CONF));
  }

View Full Code Here

      checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }    
    Job job = new Job(conf);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
        conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    job.setJobName("HihoDBExport");


    job.setMapperClass(GenericDBLoadDataMapper.class);
    job.setJarByClass(ExportToDB.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    GenericDBOutputFormat.setOutput(job, tableName, columnNames);


    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;

View Full Code Here


    for (Entry<String, String> entry : conf) {
      logger.debug("key, value " + entry.getKey() + "="
          + entry.getValue());
    }
    Job job = new Job(conf);
    job.setMapperClass(TokenCounterMapper.class);
    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(inputPath));
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(FTPTextOutputFormat.class);
    FTPTextOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setJarByClass(ExportToFTPServer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);


    int ret = 0;


    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;

View Full Code Here

      checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new IOException(e1);
    }
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(MySQLLoadDataMapper.class);
    job.setJarByClass(MySQLLoadDataMapper.class);
    for (Entry<String, String> entry : conf) {
      logger.debug("key, value " + entry.getKey() + "="
          + entry.getValue());
    }
    // verify required properties are loaded
    logger.debug(conf.get(DBConfiguration.URL_PROPERTY));
    logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY));
    logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY));


    job.setNumReduceTasks(0);
    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    // job.setJarByClass(com.mysql.jdbc.Driver.class);
    job.setOutputFormatClass(NullOutputFormat.class);


    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }

View Full Code Here


public class ExportDelimitedToDB extends Configured implements Tool {


  public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("MySQLBulkLoading");
    job.setMapperClass(DelimitedLoadMapper.class);
    job.setJarByClass(DelimitedLoadMapper.class);
    job.setNumReduceTasks(0);


    job.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(args[0]));


    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);


    job.setOutputFormatClass(DBOutputFormat.class);
    int ret = 0;


    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;

View Full Code Here

    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }


    Job job = new Job(conf);
    job.setJobName("SalesforceLoading");
    //job.setMapperClass(SalesForceLoadMapper.class);
    //job.setJarByClass(SalesForceLoadMapper.class);
    job.setNumReduceTasks(0);


    job.setInputFormatClass(FileStreamInputFormat.class);
    FileStreamInputFormat.addInputPath(job, new Path(inputPath));
    job.setMapperClass(ExportSalesforceMapper.class);
    job.setJarByClass(FileStreamInputFormat.class);
    job.setReducerClass(ExportSalesforceReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);


    job.setOutputFormatClass(NullOutputFormat.class);


    int ret = 0;


    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;

View Full Code Here

      .getLogger(co.nubetech.hiho.similarity.ngram.ScoreJob.class);


  @Override
  public int run(String[] arg0) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJobName("Score job");
    job.setJarByClass(ScoreJob.class);


    Class inputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat");
    Class outputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class inputValueClass = Class
        .forName("org.apache.hadoop.io.IntWritable");
    Class outputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class
        .forName("org.apache.hadoop.io.LongWritable");


    job.setMapperClass(ScoreMapper.class);
    job.setReducerClass(ScoreReducer.class);


    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);


    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);


    FileInputFormat.setInputPaths(job, "outputOfNGramJob");
    FileOutputFormat.setOutputPath(job, new Path("outputOfScoreJob"));


    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }

View Full Code Here

      checkMandatoryConfs();
    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("NGram job");
    job.setJarByClass(NGramJob.class);


    Class inputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat");
    Class outputFormatClass = Class
        .forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat");
    // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
    // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
    Class inputKeyClass = Class.forName("org.apache.hadoop.io.Text");
    Class inputValueClass = Class.forName("org.apache.hadoop.io.Text");
    Class outputKeyClass = Class
        .forName("co.nubetech.hiho.similarity.ngram.ValuePair");
    Class outputValueClass = Class
        .forName("org.apache.hadoop.io.IntWritable");


    job.setMapperClass(NGramMapper.class);
    job.setReducerClass(NGramReducer.class);


    job.setInputFormatClass(inputFormatClass);
    job.setMapOutputKeyClass(inputKeyClass);
    job.setMapOutputValueClass(inputValueClass);


    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    job.setOutputFormatClass(outputFormatClass);


    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path("outputOfNGramJob"));


    int ret = 0;
    try {
      ret = job.waitForCompletion(true) ? 0 : 1;
    } catch (Exception e) {
      e.printStackTrace();
    }
    return ret;
  }

View Full Code Here

    } catch (HIHOException e1) {
      e1.printStackTrace();
      throw new IOException(e1);
    }


    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
      logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }


    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS,
        conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));


    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);


    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
      throw new IllegalArgumentException(
          "Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
      switch (os) {


      case DUMP: {
        // job.setMapperClass(DBImportMapper.class);
        break;
      }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
      case DELIMITED: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);


        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
      }
      case JSON: {
        // job.setMapperClass(DBImportJsonMapper.class);
        // job.setJarByClass(ObjectMapper.class);
        break;
      }
      default: {
        job.setMapperClass(DBInputDelimMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);


        NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(
            getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        break;
      }
      }


      String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
      String inputBoundingQuery = conf
          .get(DBConfiguration.INPUT_BOUNDING_QUERY);
      logger.debug("About to set the params");
      DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery,
          params);
      logger.debug("Set the params");


      job.setNumReduceTasks(0);


      try {
        // job.setJarByClass(Class.forName(conf.get(
        // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
        logger.debug("OUTPUT format class is "
            + job.getOutputFormatClass());


        /*
         * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
         * ReflectionUtils.newInstance(job.getOutputFormatClass(),
         * job.getConfiguration()); output.checkOutputSpecs(job);
         */
        logger.debug("Class is "
            + ReflectionUtils
                .newInstance(job.getOutputFormatClass(),
                    job.getConfiguration()).getClass()
                .getName());
        job.waitForCompletion(false);
        if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
          generateHiveScript(conf, job, jobCounter);
          generatePigScript(conf, job);
        }

View Full Code Here


    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);


    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);


    if (mergeBy.equals("key")) {
      job.setMapperClass(MergeKeyMapper.class);
      job.setReducerClass(MergeKeyReducer.class);


    } else if (mergeBy.equals("value")) {
      job.setMapperClass(MergeValueMapper.class);
      job.setReducerClass(MergeValueReducer.class);
    }


    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);


    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));


    try {
      logger.debug("Output format class is " + job.getOutputFormatClass());
      logger.debug("Class is "
          + ReflectionUtils
              .newInstance(job.getOutputFormatClass(),
                  job.getConfiguration()).getClass()
              .getName());
      job.waitForCompletion(false);
      if (job.isComplete()) {
        Counters counters = job.getCounters();
        totalRecordsOld = counters.findCounter(
            MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
        totalRecordsNew = counters.findCounter(
            MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
        badRecords = counters.findCounter(

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

bulkimport.BulkImportJobExample

co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService

com.alimama.quanjingmonitor.kmeans.KMeansDriver

com.asakusafw.runtime.stage.AbstractStageClient

com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient

com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs

com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat

com.datasalt.pangool.tuplemr.mapred.TestRollup

com.datasalt.pangool.tuplemr.mapred.TestTupleMRJob

com.hadoop.compression.lzo.DistributedLzoIndexer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.