Examples of org.apache.hadoop.mapred.OutputCollector

org.apache.hadoop.mapred.OutputCollector
Collects the <key, value> pairs output by {@link Mapper}s and {@link Reducer}s.
OutputCollector is the generalization of the facility provided by the Map-Reduce framework to collect data output by either the Mapper or the Reducer i.e. intermediate outputs or the output of the job.


    @Override
    public void sink( FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
        // it's ok to use NULL here so the collector does not write anything
        TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
        OutputCollector outputCollector = sinkCall.getOutput();
        if( updateBy != null )
        {
            Tuple allValues = tupleEntry.selectTuple( updateValueFields );
            Tuple updateValues = tupleEntry.selectTuple( updateByFields );


            allValues = cleanTuple( allValues );


            TupleRecord key = new TupleRecord( allValues );


            if( updateValues.equals( updateIfTuple ) )
                outputCollector.collect( key, null );
            else
                outputCollector.collect( key, key );


            return;
        }


        Tuple result = tupleEntry.selectTuple( getSinkFields() );


        result = cleanTuple( result );


        outputCollector.collect( new TupleRecord( result ), null );
    }

View Full Code Here

      // validate input split
      InputSplit split = splits[i];
      Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);


      // validate record reader
      OutputCollector collector = mock(OutputCollector.class);
      Reporter reporter = mock(Reporter.class);
      RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);


      // validate we can read all the data back
      ImmutableBytesWritable key = rr.createKey();

View Full Code Here

  private void runOldCombiner(final TezRawKeyValueIterator rawIter, final Writer writer) throws IOException {
    Class<? extends Reducer> reducerClazz = (Class<? extends Reducer>) conf.getClass("mapred.combiner.class", null, Reducer.class);
    
    Reducer combiner = ReflectionUtils.newInstance(reducerClazz, conf);
    
    OutputCollector collector = new OutputCollector() {
      @Override
      public void collect(Object key, Object value) throws IOException {
        writer.append(key, value);
      }
    };

View Full Code Here

    Reducer reducer =
        ReflectionUtils.newInstance(job.getReducerClass(), job);


    // make output collector


    OutputCollector collector =
        new OutputCollector() {
      public void collect(Object key, Object value)
          throws IOException {
        output.write(key, value);
      }
    };

View Full Code Here

    
    updateJobWithSplit(job, inputSplit);


    RecordReader in = new OldRecordReader(input);


    OutputCollector collector = new OldOutputCollector(output);


    MapRunnable runner =
        (MapRunnable)ReflectionUtils.newInstance(job.getMapRunnerClass(), job);


    runner.run(in, collector, (Reporter)reporter);

View Full Code Here

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here

      // validate input split
      InputSplit split = splits[i];
      Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);


      // validate record reader
      OutputCollector collector = mock(OutputCollector.class);
      Reporter reporter = mock(Reporter.class);
      RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);


      // validate we can read all the data back
      ImmutableBytesWritable key = rr.createKey();

View Full Code Here

        // create a new record writer
        writer = outputFormat.getRecordWriter(FileSystem.get(outputConf), 
                                              outputConf, fileName, reporter);


        // return an output collector using the writer we just created.
        return new StoreFuncAdaptor(new OutputCollector() 
            {
                @SuppressWarnings({"unchecked"})
                public void collect(Object key, Object value) throws IOException {
                    writer.write(key,value);
                }

View Full Code Here

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.mapred.OutputCollector

cascading.flow.tez.stream.element.TezBoundaryStage

cascading.flow.tez.stream.element.TezMergeGate

com.twitter.elephantbird.cascading2.scheme.LzoBinaryScheme

com.twitter.maple.hbase.HBaseScheme

com.twitter.maple.jdbc.JDBCScheme

org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat

org.apache.hadoop.mapred.pipes.PipesReducer

org.apache.hadoop.streaming.PipeMapRed

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReducePOStoreImpl

org.apache.tez.mapreduce.combine.MRCombiner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.