Package org.apache.hadoop.mapreduce

Examples of org.apache.hadoop.mapreduce.Job


        for (int i = 0; i < WordCountSetup.TEST_COUNT; i++)
        {
            String columnName = "text" + i;
            getConf().set(CONF_COLUMN_NAME, columnName);

            Job job = new Job(getConf(), "wordcount");
            job.setJarByClass(WordCount.class);
            job.setMapperClass(TokenizerMapper.class);

            if (outputReducerType.equalsIgnoreCase("filesystem"))
            {
                job.setCombinerClass(ReducerToFilesystem.class);
                job.setReducerClass(ReducerToFilesystem.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i));
            }
            else
            {
                job.setReducerClass(ReducerToCassandra.class);

                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                job.setOutputKeyClass(ByteBuffer.class);
                job.setOutputValueClass(List.class);

                job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

                ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
            }

            job.setInputFormatClass(ColumnFamilyInputFormat.class);


            ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
            ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost");
            ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
            ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
            SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName)));
            ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

            job.waitForCompletion(true);
        }
        return 0;
    }
View Full Code Here


    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: wordcount <in> [<in>...] <out>");
      System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job,
      new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
View Full Code Here

    // put the dataset into the DistributedCache
    // use setCacheFiles() to overwrite the first-step cache files
    URI[] files = {datasetPath.toUri()};
    DistributedCache.setCacheFiles(files, conf);
   
    Job job = new Job(conf);
    job.setJarByClass(Step0Job.class);
   
    FileInputFormat.setInputPaths(job, dataPath);
    FileOutputFormat.setOutputPath(job, outputPath);
   
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Step0Output.class);
   
    job.setMapperClass(Step0Mapper.class);
    job.setNumReduceTasks(0); // no reducers
   
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    // run the job
    job.waitForCompletion(true);
   
    return parseOutput(job);
  }
View Full Code Here

    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
   
    String input = params.get("output") + "/fpgrowth";
    Job job = new Job(conf, "PFP Aggregator Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);
   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);
   
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/frequentPatterns");
    FileOutputFormat.setOutputPath(job, outPath);
   
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(AggregatorMapper.class);
    job.setCombinerClass(AggregatorReducer.class);
    job.setReducerClass(AggregatorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
    job.waitForCompletion(true);
  }
View Full Code Here

   
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
   
    String input = params.get("input");
    Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);
   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
   
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/parallelcounting");
    FileOutputFormat.setOutputPath(job, outPath);
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelCountingMapper.class);
    job.setCombinerClass(ParallelCountingReducer.class);
    job.setReducerClass(ParallelCountingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    job.waitForCompletion(true);
   
  }
View Full Code Here

    params.set("gList", "");
    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    String input = params.get("input");
    Job job = new Job(conf, "PFP Transaction Sorting running over input" + input);
    job.setJarByClass(PFPGrowth.class);
   
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TransactionTree.class);
   
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(TransactionTree.class);
   
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/sortedoutput");
    FileOutputFormat.setOutputPath(job, outPath);
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TransactionSortingMapper.class);
    job.setReducerClass(TransactionSortingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    job.waitForCompletion(true);
    params.set("gList", gList);
  }
View Full Code Here

    Configuration conf = new Configuration();
    conf.set("pfp.parameters", params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    String input = params.get("output") + "/sortedoutput";
    Job job = new Job(conf, "PFP Growth Driver running over input" + input);
    job.setJarByClass(PFPGrowth.class);
   
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TransactionTree.class);
   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);
   
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output") + "/fpgrowth");
    FileOutputFormat.setOutputPath(job, outPath);
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(ParallelFPGrowthMapper.class);
    job.setCombinerClass(ParallelFPGrowthCombiner.class);
    job.setReducerClass(ParallelFPGrowthReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    job.waitForCompletion(true);
  }
View Full Code Here

    setOobEstimate(conf, callback != null);
   
    // put the dataset into the DistributedCache
    DistributedCache.addCacheFile(datasetPath.toUri(), conf);
   
    Job job = new Job(conf, "decision forest builder");
   
    log.debug("Configuring the job...");
    configureJob(job, nbTrees, callback != null);
   
    log.debug("Running the job...");
View Full Code Here

   
    // put the dataset into the DistributedCache
    URI[] files = {datasetPath.toUri()};
    DistributedCache.setCacheFiles(files, conf);
   
    Job job = new Job(conf);
    job.setJarByClass(FrequenciesJob.class);
   
    FileInputFormat.setInputPaths(job, dataPath);
    FileOutputFormat.setOutputPath(job, outputPath);
   
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Frequencies.class);
   
    job.setMapperClass(FrequenciesMapper.class);
    job.setReducerClass(FrequenciesReducer.class);
   
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
   
    // run the job
    job.waitForCompletion(true);
   
    int[][] counts = parseOutput(job);
   
    // delete the output path
    fs.delete(outputPath, true);
View Full Code Here

    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
                                  + "org.apache.hadoop.io.serializer.WritableSerialization");
   
    String input = params.get("input");
    Job job = new Job(conf, "Generating dataset based from input" + input);
    job.setJarByClass(KeyBasedStringTupleGrouper.class);
   
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringTuple.class);
   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
   
    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get("output"));
    FileOutputFormat.setOutputPath(job, outPath);
   
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
      dfs.delete(outPath, true);
    }
   
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(KeyBasedStringTupleMapper.class);
    job.setCombinerClass(KeyBasedStringTupleCombiner.class);
    job.setReducerClass(KeyBasedStringTupleReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
   
    job.waitForCompletion(true);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapreduce.Job

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.