Examples of org.apache.hadoop.mapreduce.OutputFormat

org.apache.hadoop.mapreduce.OutputFormat
OutputFormat describes the output-specification for a Map-Reduce job.
The Map-Reduce framework relies on the OutputFormat of the job to:
1. Validate the output-specification of the job. For e.g. check that the output directory doesn't already exist.
2. Provide the {@link RecordWriter} implementation to be used to write outthe output files of the job. Output files are stored in a {@link FileSystem}.
@see RecordWriter

    /*
     * Define the Input Format and the Output Format!
     */


    InputFormat inputFormat = new TupleTextInputFormat(schema, fieldsPos, false, null);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);


    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);

View Full Code Here

    /*
     * Define the Input Format and the Output Format!
     */


    InputFormat inputFormat = new TupleTextInputFormat(schema, fieldsPos, false, null);
    OutputFormat outputFormat = new TupleTextOutputFormat(schema, false, ' ',
        TupleTextOutputFormat.NO_QUOTE_CHARACTER, TupleTextOutputFormat.NO_ESCAPE_CHARACTER);


    builder.addInput(inPath, inputFormat, new IdentityTupleMapper());
    builder.setTupleReducer(new IdentityTupleReducer());
    builder.setOutput(outPath, outputFormat, ITuple.class, NullWritable.class);

View Full Code Here

    // If not in cache, create a new one
    if(context == null) {


      context = new OutputContext();


      OutputFormat mainOutputFormat;


      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }


      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));


      // The trick is to create a new Job for each output
      Job job = new Job(this.context.getConfiguration());
      job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
      job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext = new TaskAttemptContext(job.getConfiguration(),
          this.context.getTaskAttemptID());


      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded
      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;


      // Load the OutputFormat instance
      OutputFormat outputFormat = InstancesDistributor.loadInstance(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class,
          getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext = new JobContext(taskContext.getConfiguration(),
          taskContext.getJobID());
      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);


      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter,

View Full Code Here


    builder.setJarByClass(callingClass);
    // Define the output format


    TableSpec[] tbls = tableSpecs.toArray(new TableSpec[0]);
    OutputFormat outputFormat = null;
    try {
      outputFormat = OutputFormatFactory.getOutputFormat(tablespace.getEngine(), batchSize, tbls);
    } catch (Exception e) {
      System.err.println(e);
      throw new RuntimeException(e);

View Full Code Here

    // If not in cache, create a new one
    if(context == null) {


      context = new OutputContext();


      OutputFormat mainOutputFormat;


      try {
        mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(
            this.context.getOutputFormatClass(), this.context.getConfiguration()));
      } catch(ClassNotFoundException e1) {
        throw new RuntimeException(e1);
      }


      ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
          .getOutputCommitter(this.context));


      // The trick is to create a new Job for each output
      Job job = new Job(this.context.getConfiguration());
      job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
      job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
      // Check possible specific context for the output
      setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
      TaskAttemptContext taskContext = new TaskAttemptContext(job.getConfiguration(),
          this.context.getTaskAttemptID());


      // First we change the output dir for the new OutputFormat that we will
      // create
      // We put it inside the main output work path -> in case the Job fails,
      // everything will be discarded
      taskContext.getConfiguration().set("mapred.output.dir",
          baseOutputCommitter.getBaseDir() + "/" + baseFileName);
      context.taskAttemptContext = taskContext;


      // Load the OutputFormat instance
      OutputFormat outputFormat = DCUtils.loadSerializedObjectInDC(
          context.taskAttemptContext.getConfiguration(), OutputFormat.class,
          getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
      // We have to create a JobContext for meeting the contract of the
      // OutputFormat
      JobContext jobContext = new JobContext(taskContext.getConfiguration(),
          taskContext.getJobID());
      context.jobContext = jobContext;
      // The contract of the OutputFormat is to check the output specs
      outputFormat.checkOutputSpecs(jobContext);
      // We get the output committer so we can call it later
      context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
      // Save the RecordWriter to cache it
      context.recordWriter = outputFormat.getRecordWriter(taskContext);


      // if counters are enabled, wrap the writer with context
      // to increment counters
      if(countersEnabled) {
        context.recordWriter = new RecordWriterWithCounter(context.recordWriter,

View Full Code Here

            new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
        org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
            new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
        org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = 
            new TaskAttemptContextImpl(conf, taskAttemptID);
        OutputFormat outputFormat =
          ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
        committer = outputFormat.getOutputCommitter(taskContext);
      } else {
        committer = ReflectionUtils.newInstance(conf.getClass(
            "mapred.output.committer.class", FileOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class), conf);
      }

View Full Code Here

          .newTaskId(jobId, 0, TaskType.MAP);
      org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = MRBuilderUtils
          .newTaskAttemptId(taskID, 0);
      TaskAttemptContext taskContext = new TaskAttemptContextImpl(conf,
          TypeConverter.fromYarn(attemptID));
      OutputFormat outputFormat;
      try {
        outputFormat = ReflectionUtils.newInstance(taskContext
            .getOutputFormatClass(), conf);
        committer = outputFormat.getOutputCommitter(taskContext);
      } catch (Exception e) {
        throw new YarnException(e);
      }
    } else {
      committer = ReflectionUtils.newInstance(conf.getClass(

View Full Code Here

    theRecordWriter.write(key1, val1);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}

View Full Code Here

      theRecordWriter.write(key2, val2);
    } finally {
      theRecordWriter.close(tContext);
    }
    
    OutputFormat outputFormat = ReflectionUtils.newInstance(
        tContext.getOutputFormatClass(), conf);
    OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
    committer.commitTask(tContext);
  }

View Full Code Here

    theRecordWriter.write(key1, val1);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}

View Full Code Here

0 1 2 3 4 5 6 7

TOP

Related Classes of org.apache.hadoop.mapreduce.OutputFormat

com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs

com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleTextInputOutputFormat

com.splout.db.hadoop.TablespaceGenerator

com.twitter.elephantbird.pig.load.TestRCFileProtobufStorage

com.twitter.elephantbird.pig.load.TestRCFileThriftStorage

org.apache.crunch.io.CrunchOutputs

org.apache.hadoop.mapred.LocalJobRunner$Job

org.apache.hadoop.mapred.LocalJobRunnerWithFix$Job

org.apache.hadoop.mapred.MRVertexOutputCommitter

org.apache.hadoop.mapreduce.v2.app.MRAppMaster

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.