Examples of org.apache.hadoop.mapreduce.Job

org.apache.hadoop.mapreduce.Job
The job submitter's view of the Job. It allows the user to configure the job, submit it, control its execution, and query the state. The set methods only work until the job is submitted, afterwards they will throw an IllegalStateException.

        for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
        {
            String columnName = "text" + i;
            getConf().set(CONF_COLUMN_NAME, columnName);


            Job job = new Job(getConf(), "wordcount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(TokenizerMapper.class);


            if (outputReducerType.equalsIgnoreCase("filesystem"))
            {
                job.setCombinerClass(ReducerToFilesystem.class);
                job.setReducerClass(ReducerToFilesystem.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
            }
            else
            {
                job.setReducerClass(ReducerToCassandra.class);


                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                job.setOutputKeyClass(ByteBuffer.class);
                job.setOutputValueClass(List.class);


                job.setOutputFormatClass(ColumnFamilyOutputFormat.class);


                ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            }


            job.setInputFormatClass(ColumnFamilyInputFormat.class);




            ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
            ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
            ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
            SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
            ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);


            job.waitForCompletion(true);
        }
        return 0;
    }

View Full Code Here

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: wordcount <in> [<in>...] <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job,
      new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

View Full Code Here

    // put the dataset into the DistributedCache
    // use setCacheFiles() to overwrite the first-step cache files
    URI[] files = {datasetPath.toUri()};
    DistributedCache.setCacheFiles(files, conf);
    
    Job job = new Job(conf);
    job.setJarByClass(Step0Job.class);
    
    FileInputFormat.setInputPaths(job, dataPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Step0Output.class);
    
    job.setMapperClass(Step0Mapper.class);
    job.setNumReduceTasks(0); // no reducers
    
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    // run the job
    job.waitForCompletion(true);
    
    return parseOutput(job);
  }

View Full Code Here

    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    
    String input = params.get("output") + "/fpgrowth";
    Job job = new Job(conf, "PFP Aggregator Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);
    
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/frequentPatterns");
    FileOutputFormat.setOutputPath(job, outPath);
    
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(AggregatorMapper.class);
    job.setCombinerClass(AggregatorReducer.class);
    job.setReducerClass(AggregatorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    job.waitForCompletion(true);
  }

View Full Code Here

    
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    
    String input = params.get("input");
    Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/parallelcounting");
    FileOutputFormat.setOutputPath(job, outPath);
    
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelCountingMapper.class);
    job.setCombinerClass(ParallelCountingReducer.class);
    job.setReducerClass(ParallelCountingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    job.waitForCompletion(true);
    
  }

View Full Code Here

    params.set("gList", "");
    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    String input = params.get("input");
    Job job = new Job(conf, "PFP Transaction Sorting running over input" + input);
    job.setJarByClass(PFPGrowth.class);
    
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TransactionTree.class);
    
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(TransactionTree.class);
    
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/sortedoutput");
    FileOutputFormat.setOutputPath(job, outPath);
    
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TransactionSortingMapper.class);
    job.setReducerClass(TransactionSortingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    job.waitForCompletion(true);
    params.set("gList", gList);
  }

View Full Code Here

    Configuration conf = new Configuration();
    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    String input = params.get("output") + "/sortedoutput";
    Job job = new Job(conf, "PFP Growth Driver running over input" + input);
    job.setJarByClass(PFPGrowth.class);
    
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TransactionTree.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);
    
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/fpgrowth");
    FileOutputFormat.setOutputPath(job, outPath);
    
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(ParallelFPGrowthMapper.class);
    job.setCombinerClass(ParallelFPGrowthCombiner.class);
    job.setReducerClass(ParallelFPGrowthReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    job.waitForCompletion(true);
  }

View Full Code Here

    setOobEstimate(conf, callback != null);
    
    // put the dataset into the DistributedCache
    DistributedCache.addCacheFile(datasetPath.toUri(), conf);
    
    Job job = new Job(conf, "decision forest builder");
    
    log.debug("Configuring the job...");
    configureJob(job, nbTrees, callback != null);
    
    log.debug("Running the job...");

View Full Code Here

    
    // put the dataset into the DistributedCache
    URI[] files = {datasetPath.toUri()};
    DistributedCache.setCacheFiles(files, conf);
    
    Job job = new Job(conf);
    job.setJarByClass(FrequenciesJob.class);
    
    FileInputFormat.setInputPaths(job, dataPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Frequencies.class);
    
    job.setMapperClass(FrequenciesMapper.class);
    job.setReducerClass(FrequenciesReducer.class);
    
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    
    // run the job
    job.waitForCompletion(true);
    
    int[][] counts = parseOutput(job);
    
    // delete the output path
    fs.delete(outputPath, true);

View Full Code Here

    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
    
    String input = params.get("input");
    Job job = new Job(conf, "Generating dataset based from input" + input);
    job.setJarByClass(KeyBasedStringTupleGrouper.class);
    
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringTuple.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output"));
    FileOutputFormat.setOutputPath(job, outPath);
    
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(KeyBasedStringTupleMapper.class);
    job.setCombinerClass(KeyBasedStringTupleCombiner.class);
    job.setReducerClass(KeyBasedStringTupleReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    
    job.waitForCompletion(true);
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

bulkimport.BulkImportJobExample

co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService

com.alimama.quanjingmonitor.kmeans.KMeansDriver

com.asakusafw.runtime.stage.AbstractStageClient

com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient

com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs

com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat

com.datasalt.pangool.tuplemr.mapred.TestRollup

com.datasalt.pangool.tuplemr.mapred.TestTupleMRJob

com.hadoop.compression.lzo.DistributedLzoIndexer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.