Package org.apache.hadoop.mapred

Examples of org.apache.hadoop.mapred.JobConf


    /**
     * Run the job
     */
    public void build() {
        try {
            JobConf conf = new JobConf(config);
            conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
            conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
            conf.set("stores.xml",
                     new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
            conf.setBoolean("save.keys", saveKeys);
            conf.setBoolean("reducer.per.bucket", reducerPerBucket);
            if(!isAvro) {
                conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
                conf.setMapperClass(mapperClass);
                conf.setMapOutputKeyClass(BytesWritable.class);
                conf.setMapOutputValueClass(BytesWritable.class);
                if(reducerPerBucket) {
                    conf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class);
                } else {
                    conf.setReducerClass(HadoopStoreBuilderReducer.class);
                }
            }
            conf.setInputFormat(inputFormatClass);
            conf.setOutputFormat(SequenceFileOutputFormat.class);
            conf.setOutputKeyClass(BytesWritable.class);
            conf.setOutputValueClass(BytesWritable.class);
            conf.setJarByClass(getClass());
            conf.setReduceSpeculativeExecution(false);
            FileInputFormat.setInputPaths(conf, inputPath);
            conf.set("final.output.dir", outputDir.toString());
            conf.set("checksum.type", CheckSum.toString(checkSumType));
            FileOutputFormat.setOutputPath(conf, tempDir);

            FileSystem outputFs = outputDir.getFileSystem(conf);
            if(outputFs.exists(outputDir)) {
                throw new IOException("Final output directory already exists.");
            }

            // delete output dir if it already exists
            FileSystem tempFs = tempDir.getFileSystem(conf);
            tempFs.delete(tempDir, true);

            long size = sizeOfPath(tempFs, inputPath);
            logger.info("Data size = " + size + ", replication factor = "
                        + storeDef.getReplicationFactor() + ", numNodes = "
                        + cluster.getNumberOfNodes() + ", chunk size = " + chunkSizeBytes);

            // Derive "rough" number of chunks and reducers
            int numReducers;
            if(saveKeys) {

                if(this.numChunks == -1) {
                    this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size
                                                     / cluster.getNumberOfPartitions()
                                                     / storeDef.getReplicationFactor() / chunkSizeBytes),
                                              1);
                } else {
                    logger.info("Overriding chunk size byte and taking num chunks ("
                                + this.numChunks + ") directly");
                }

                if(reducerPerBucket) {
                    numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor();
                } else {
                    numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor()
                                  * numChunks;
                }
            } else {

                if(this.numChunks == -1) {
                    this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size
                                                     / cluster.getNumberOfPartitions() / chunkSizeBytes),
                                              1);
                } else {
                    logger.info("Overriding chunk size byte and taking num chunks ("
                                + this.numChunks + ") directly");
                }

                if(reducerPerBucket) {
                    numReducers = cluster.getNumberOfPartitions();
                } else {
                    numReducers = cluster.getNumberOfPartitions() * numChunks;
                }
            }
            conf.setInt("num.chunks", numChunks);
            conf.setNumReduceTasks(numReducers);

            if(isAvro) {
                conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
                // conf.setMapperClass(mapperClass);
                conf.setMapOutputKeyClass(ByteBuffer.class);
                conf.setMapOutputValueClass(ByteBuffer.class);

                conf.setInputFormat(inputFormatClass);

                conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
                conf.setOutputKeyClass(ByteBuffer.class);
                conf.setOutputValueClass(ByteBuffer.class);

                // AvroJob confs for the avro mapper
                AvroJob.setInputSchema(conf, Schema.parse(config.get("avro.rec.schema")));

                AvroJob.setOutputSchema(conf,
                                        Pair.getPairSchema(Schema.create(Schema.Type.BYTES),
                                                           Schema.create(Schema.Type.BYTES)));

                AvroJob.setMapperClass(conf, mapperClass);

                if(reducerPerBucket) {
                    conf.setReducerClass(AvroStoreBuilderReducerPerBucket.class);
                } else {
                    conf.setReducerClass(AvroStoreBuilderReducer.class);
                }

            }

            logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers
View Full Code Here


  @Override
  public RecordReader<NullWritable, NullWritable> getRecordReader(InputSplit arg0, JobConf arg1,
        Reporter arg2) throws IOException
  {
    final String filename = ((FileSplit)arg0).getPath().toString();
    final JobConf job = arg1;
   
    return new RecordReader<NullWritable, NullWritable>()
    {
      private boolean unread = true;

      @Override
      public void close() throws IOException
      {}

      @Override
      public NullWritable createKey()
      {
        return NullWritable.get();
      }

      @Override
      public NullWritable createValue()
      {
        return NullWritable.get();
      }

      @Override
      public long getPos() throws IOException
      {
        return 0;
      }

      @Override
      public float getProgress() throws IOException
      {
        return unread ? 0 : 1;
      }

      @Override
      /* spawn a cpimport process for each input file */
      public boolean next(NullWritable arg0, NullWritable arg1) throws IOException
      {
        InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
        String schemaName = dbConf.getOutputSchemaName();
        String tableName = (filename.substring(filename.lastIndexOf('/')+1, filename.length()));
        tableName = tableName.substring(0, tableName.lastIndexOf('.'));
        String output = job.get("mapred.output.dir");
        if (unread)
        {
          try
          {
            StringBuilder loadCmdStr = new StringBuilder();
            loadCmdStr.append(dbConf.getInfiniDBHome());
            loadCmdStr.append("/bin/");
            loadCmdStr.append("infinidoop_load.sh ");
            loadCmdStr.append(filename);
            loadCmdStr.append(" ");
            loadCmdStr.append(schemaName);
            loadCmdStr.append(" ");
            loadCmdStr.append(tableName);

            Process lChldProc = Runtime.getRuntime().exec(loadCmdStr.toString());
            
            // Wait for the child to exit
            lChldProc.waitFor();
            BufferedReader lChldProcOutStream = new BufferedReader(new InputStreamReader(lChldProc.getInputStream()));
            BufferedReader stdError = new BufferedReader(new InputStreamReader(lChldProc.getErrorStream()));
           
            String lChldProcOutPutStr = null;
            StringBuffer outpath = new StringBuffer();
            outpath.append(job.getWorkingDirectory());
            outpath.append("/");
            outpath.append(output);
            outpath.append("/");
            outpath.append(tableName);
            outpath.append(".log");
View Full Code Here

public class InfiniDBOutputDriver extends Configured implements Tool
{
  public int run (String[] args) throws Exception
 
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf,
             "com.mysql.jdbc.Driver",
             "jdbc:mysql://srvswint4/tpch1","root", "");
    String [] fields = { "n_nationkey", "n_name" };
    String [] outFields = {"id", "name"};
    jobconf.setInputFormat(IDBFileInputFormat.class);
    jobconf.setOutputFormat(InfiniDBOutputFormat.class);
    jobconf.setOutputKeyClass(NullWritable.class);
    jobconf.setOutputValueClass(Text.class);
    InfiniDBOutputFormat.setOutput(jobconf, "db", outFields);
    InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf);
    idbconf.setInputPath("input");
    idbconf.setOutputPath("output");
    idbconf.setInfiniDBHome("/usr/local/Calpont");

    jobconf.setMapperClass(InfiniDoopMapper.class);
    jobconf.setNumMapTasks(1);
    jobconf.setNumReduceTasks(2);
    JobClient client = new JobClient();
    client.setConf(jobconf);
    try {
      JobClient.runJob(jobconf);
    } catch (Exception e) {
View Full Code Here

public class InfiniDoopDriver extends Configured implements Tool
{
  public int run (String[] args) throws Exception
 
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf,
             "com.mysql.jdbc.Driver",
             "jdbc:mysql://srvswint4/tpch1","root", "");
    String [] fields = { "n_nationkey", "n_name" };
    jobconf.setInputFormat(InfiniDBInputFormat.class);

    jobconf.setOutputKeyClass(LongWritable.class);
    jobconf.setOutputValueClass(Text.class);

    InfiniDBInputFormat.setInput(jobconf, InfiniDoopRecord.class, "nation",
       null,  "n_nationkey", fields);

    InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf);
    idbconf.setOutputPath("output2");
    jobconf.setMapperClass(InfiniDoopInputMapper.class);
    jobconf.setNumMapTasks(4);
    jobconf.setNumReduceTasks(1);
    jobconf.set("mapred.textoutputformat.separator", "|");
    JobClient client = new JobClient();

    client.setConf(jobconf);
    try {
      JobClient.runJob(jobconf);
View Full Code Here

public class InfiniDBOutputDriver extends Configured implements Tool
{
  public int run (String[] args) throws Exception
 
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf,
             "com.mysql.jdbc.Driver",
             "jdbc:mysql://srvswint4/tpch1","root", "");
    String [] fields = { "n_nationkey", "n_name" };
    String [] outFields = {"id", "name"};
    jobconf.setInputFormat(IDBFileInputFormat.class);
    jobconf.setOutputFormat(InfiniDBOutputFormat.class);
    //Class.forName(jobconf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
    //Connection connection = DriverManager.getConnection("jdbc:mysql://srvswint4/tpch1?user=root");
   
        //Job job = new Job(jobconf, "infinidb_driver");
        //jobconf.setOutputKeyClass(InfiniDoopRecord.class);
    jobconf.setOutputKeyClass(NullWritable.class);
        jobconf.setOutputValueClass(NullWritable.class);
        //jobconf.setMapOutputKeyClass(InfiniDoopRecord.class);
        //jobconf.setMapOutputValueClass(LongWritable.class);
       
        //InfiniDBInputFormat.setInput(jobconf, InfiniDoopRecord.class, "nation",
     //      null,  "n_nationkey", fields);
        InfiniDBOutputFormat.setOutput(jobconf, "t1", outFields);
        //InfiniDBInputFormat.setInput(jobconf, InfiniDBRecord.class, "select * from nation", "select count(*) from nation");
    Job job = new Job(jobconf);
    jobconf.set("mapred.input.dir", "output1");
    jobconf.set("mapred.output.dir", "output");
   
        jobconf.setMapperClass(InfiniDoopMapper.class);
        jobconf.setNumMapTasks(1);
        jobconf.setNumReduceTasks(2);
        JobClient client = new JobClient();
        //System.out.println(jobconf.getUser());
        //System.out.println(FileOutputFormat.getOutputPath(jobconf));
        jobconf.setInt("mapred.min.split.size", 2147483647);

        //int i = (job.waitForCompletion(true)? 1: 0);
        //return i;
        client.setConf(jobconf);
    try {
View Full Code Here

public class InfiniDoopDriver extends Configured implements Tool
{
  public int run (String[] args) throws Exception
 
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf,
             "com.mysql.jdbc.Driver",
             "jdbc:mysql://srvswint4/tpch1","root", "");
    String [] fields = { "n_nationkey", "n_name" };
    String [] outFields = {"id", "name"};
    jobconf.setInputFormat(InfiniDBInputFormat.class);
    //Class.forName(jobconf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
    //Connection connection = DriverManager.getConnection("jdbc:mysql://srvswint4/tpch1?user=root");
 
        //Job job = new Job(jobconf, "infinidb_driver");
        jobconf.setOutputKeyClass(LongWritable.class);
        jobconf.setOutputValueClass(Text.class);
        //jobconf.setMapOutputKeyClass(LongWritable.class);
        //jobconf.setMapOutputValueClass(LongWritable.class);
       
        InfiniDBInputFormat.setInput(jobconf, InfiniDoopRecord.class, "nation",
           null,  "n_nationkey", fields);
       
        //InfiniDBInputFormat.setInput(jobconf, InfiniDBRecord.class, "select * from nation", "select count(*) from nation");
    Job job = new Job(jobconf);
    jobconf.set("mapred.output.dir", "output1");
        jobconf.setMapperClass(InfiniDoopInputMapper.class);
        jobconf.setNumMapTasks(4);
        jobconf.setNumReduceTasks(8);
        JobClient client = new JobClient();
        //System.out.println(jobconf.getUser());
        //System.out.println(FileOutputFormat.getOutputPath(jobconf));

        //int i = (job.waitForCompletion(true)? 1: 0);
View Full Code Here

public class InfiniDBOutputDriver extends Configured implements Tool
{
  public int run (String[] args) throws Exception
 
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf,
             "com.mysql.jdbc.Driver",
             "jdbc:mysql://srvswint4/tpch1","root", "");
    String [] fields = { "n_nationkey", "n_name" };
    String [] outFields = {"id", "name"};
    jobconf.setInputFormat(IDBFileInputFormat.class);
    jobconf.setOutputFormat(InfiniDBOutputFormat.class);
    //Class.forName(jobconf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
    //Connection connection = DriverManager.getConnection("jdbc:mysql://srvswint4/tpch1?user=root");
   
        //Job job = new Job(jobconf, "infinidb_driver");
        //jobconf.setOutputKeyClass(InfiniDoopRecord.class);
    jobconf.setOutputKeyClass(NullWritable.class);
        jobconf.setOutputValueClass(Text.class);
        //jobconf.setMapOutputKeyClass(InfiniDoopRecord.class);
        //jobconf.setMapOutputValueClass(LongWritable.class);
       
        //InfiniDBInputFormat.setInput(jobconf, InfiniDoopRecord.class, "nation",
     //      null,  "n_nationkey", fields);
        InfiniDBOutputFormat.setOutput(jobconf, "zz1", outFields);
        //InfiniDBInputFormat.setInput(jobconf, InfiniDBRecord.class, "select * from nation", "select count(*) from nation");
    Job job = new Job(jobconf);
    jobconf.set("mapred.input.dir", "output1");
    jobconf.set("mapred.output.dir", "output");
   
        jobconf.setMapperClass(InfiniDoopMapper.class);
        jobconf.setNumMapTasks(1);
        jobconf.setNumReduceTasks(2);
        JobClient client = new JobClient();
        //System.out.println(jobconf.getUser());
        //System.out.println(FileOutputFormat.getOutputPath(jobconf));
        jobconf.setInt("mapred.min.split.size", 2147483647);

        //int i = (job.waitForCompletion(true)? 1: 0);
        //return i;
        client.setConf(jobconf);
    try {
View Full Code Here

   *                     job tracker.
   */
  public static void main(String[] args) throws IOException {
    Configuration defaults = new Configuration();
   
    JobConf conf = new JobConf(defaults, WordCount.class);
    conf.setJobName("wordcount");
    // the keys are words (strings)
    conf.setOutputKeyClass(UTF8.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);
   
    conf.setMapperClass(MapClass.class);       
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
   
    List other_args = new ArrayList();
    for(int i=0; i < args.length; ++i) {
      try {
        if ("-m".equals(args[i])) {
          conf.setNumMapTasks(Integer.parseInt(args[++i]));
        } else if ("-r".equals(args[i])) {
          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else {
          other_args.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " +
                           args[i-1]);
        printUsage(); // exits
      }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
      System.out.println("ERROR: Wrong number of parameters: " +
          other_args.size() + " instead of 2.");
      printUsage();
    }
    conf.setInputDir(new File((String) other_args.get(0)));
    conf.setOutputDir(new File((String) other_args.get(1)));
   
    // Uncomment to run locally in a single process
    // conf.set("mapred.job.tracker", "local");
   
    JobClient.runJob(conf);
View Full Code Here

    File tempDir =
      new File("grep-temp-"+
               Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));

    JobConf grepJob = new JobConf(defaults, Grep.class);
    grepJob.setJobName("grep-search");

    grepJob.setInputDir(new File(args[0]));

    grepJob.setMapperClass(RegexMapper.class);
    grepJob.set("mapred.mapper.regex", args[2]);
    if (args.length == 4)
      grepJob.set("mapred.mapper.regex.group", args[3]);
   
    grepJob.setCombinerClass(LongSumReducer.class);
    grepJob.setReducerClass(LongSumReducer.class);

    grepJob.setOutputDir(tempDir);
    grepJob.setOutputFormat(SequenceFileOutputFormat.class);
    grepJob.setOutputKeyClass(UTF8.class);
    grepJob.setOutputValueClass(LongWritable.class);

    JobClient.runJob(grepJob);

    JobConf sortJob = new JobConf(defaults, Grep.class);
    sortJob.setJobName("grep-sort");

    sortJob.setInputDir(tempDir);
    sortJob.setInputFormat(SequenceFileInputFormat.class);
    sortJob.setInputKeyClass(UTF8.class);
    sortJob.setInputValueClass(LongWritable.class);

    sortJob.setMapperClass(InverseMapper.class);

    sortJob.setNumReduceTasks(1);                 // write a single file
    sortJob.setOutputDir(new File(args[1]));
    sortJob.setOutputKeyComparatorClass           // sort by decreasing freq
      (LongWritable.DecreasingComparator.class);

    JobClient.runJob(sortJob);

    new JobClient(defaults).getFs().delete(tempDir);
View Full Code Here

  protected void setRequiredServiceHadoopConf(Configuration conf) {
    conf.set("fs.hdfs.impl.disable.cache", "true");
  }

  protected JobConf createHadoopConf(Configuration conf) {
    JobConf hadoopConf = new JobConf();
    XConfiguration.copy(serviceHadoopConf, hadoopConf);
    XConfiguration.copy(conf, hadoopConf);
    return hadoopConf;
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.mapred.JobConf

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.