Examples of org.apache.hadoop.mapred.OutputCollector

org.apache.hadoop.mapred.OutputCollector
Collects the <key, value> pairs output by {@link Mapper}s and {@link Reducer}s.
OutputCollector is the generalization of the facility provided by the Map-Reduce framework to collect data output by either the Mapper or the Reducer i.e. intermediate outputs or the output of the job.

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here


  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();
    Tuple key = tupleEntry.selectTuple(keyField);
    ImmutableBytesWritable keyBytes = (ImmutableBytesWritable) key.getObject(0);


    if (useSalt) {
      keyBytes = HBaseSalter.addSaltPrefix(keyBytes);
    }


    Put put;
    if (this.timeStamp == 0L) {
      put = new Put(keyBytes.get());
    } else {
      put = new Put(keyBytes.get(), this.timeStamp);
    }
    
    for (int i = 0; i < valueFields.length; i++) {
      Fields fieldSelector = valueFields[i];
      TupleEntry values = tupleEntry.selectEntry(fieldSelector);


      for (int j = 0; j < values.getFields().size(); j++) {
        Fields fields = values.getFields();
        Tuple tuple = values.getTuple();


        ImmutableBytesWritable valueBytes = (ImmutableBytesWritable) tuple.getObject(j);
        if (valueBytes != null)
            put.add(Bytes.toBytes(familyNames[i]), Bytes.toBytes((String) fields.get(j)), valueBytes.get());
      }
    }


    outputCollector.collect(null, put);
  }

View Full Code Here


  @SuppressWarnings("unchecked")
  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();
    Tuple key = tupleEntry.selectTuple(RowKeyField);
    Object okey = key.getObject(0);
    ImmutableBytesWritable keyBytes = getBytes(okey);
    Put put = new Put(keyBytes.get());
    Fields outFields = tupleEntry.getFields().subtract(RowKeyField);
    if (null != outFields) {
      TupleEntry values = tupleEntry.selectEntry(outFields);
      for (int n = 0; n < values.getFields().size(); n++) {
        Object o = values.get(n);
        ImmutableBytesWritable valueBytes = getBytes(o);
        Comparable field = outFields.get(n);
        ColumnName cn = parseColumn((String) field);
        if (null == cn.family) {
          if (n >= familyNames.length)
            cn.family = familyNames[familyNames.length - 1];
          else
            cn.family = familyNames[n];
        }
        if (null != o || writeNulls)
          put.add(Bytes.toBytes(cn.family), Bytes.toBytes(cn.name), valueBytes.get());
      }
    }
    outputCollector.collect(null, put);
  }

View Full Code Here


    @Override
    public void sink( FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
        // it's ok to use NULL here so the collector does not write anything
        TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
        OutputCollector outputCollector = sinkCall.getOutput();
        if( updateBy != null )
        {
            Tuple allValues = tupleEntry.selectTuple( updateValueFields );
            Tuple updateValues = tupleEntry.selectTuple( updateByFields );


            allValues = cleanTuple( allValues );


            TupleRecord key = new TupleRecord( allValues );


            if( updateValues.equals( updateIfTuple ) )
                outputCollector.collect( key, null );
            else
                outputCollector.collect( key, key );


            return;
        }


        Tuple result = tupleEntry.selectTuple( getSinkFields() );


        result = cleanTuple( result );


        outputCollector.collect( new TupleRecord( result ), null );
    }

View Full Code Here


    int count = 0;
    for( LogicalOutput logicalOutput : logicalOutputs )
      collectors[ count++ ] = new OldOutputCollector( logicalOutput );


    return new OutputCollector()
    {
    @Override
    public void collect( Object key, Object value ) throws IOException
      {
      for( OutputCollector outputCollector : collectors )

View Full Code Here


    int count = 0;
    for( LogicalOutput logicalOutput : logicalOutputs )
      collectors[ count++ ] = new OldOutputCollector( logicalOutput );


    return new OutputCollector()
    {
    @Override
    public void collect( Object key, Object value ) throws IOException
      {
      for( OutputCollector outputCollector : collectors )

View Full Code Here

        // called at all in this task). If reducer still generates output,
        // which is very uncommon and we may not have to support this case.
        // So we don't write this output to HDFS, but we consume/collect
        // this output just to avoid reducer hanging forever.


        OutputCollector collector = new OutputCollector() {
          public void collect(Object key, Object value)
            throws IOException {
            //just consume it, no need to write the record anywhere
          }
        };

View Full Code Here


  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();
    Tuple key = tupleEntry.selectTuple(keyField);
    ImmutableBytesWritable keyBytes = (ImmutableBytesWritable) key.getObject(0);
    Put put = new Put(keyBytes.get());


    for (int i = 0; i < valueFields.length; i++) {
      Fields fieldSelector = valueFields[i];
      TupleEntry values = tupleEntry.selectEntry(fieldSelector);


      for (int j = 0; j < values.getFields().size(); j++) {
        Fields fields = values.getFields();
        Tuple tuple = values.getTuple();


        ImmutableBytesWritable valueBytes = (ImmutableBytesWritable) tuple.getObject(j);
        put.add(Bytes.toBytes(familyNames[i]), Bytes.toBytes((String) fields.get(j)), valueBytes.get());
      }
    }


    outputCollector.collect(null, put);
  }

View Full Code Here

  private static final long serialVersionUID = -5011096855302946106L;


  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<T, OutputCollector> sinkCall)
    throws IOException {
    OutputCollector collector = sinkCall.getOutput();
    TupleEntry entry = sinkCall.getOutgoingEntry();
    T writable = sinkCall.getContext();
    writable.set((M) entry.getTuple().getObject(0));
    collector.collect(null, writable);
  }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.mapred.OutputCollector

cascading.flow.tez.stream.element.TezBoundaryStage

cascading.flow.tez.stream.element.TezMergeGate

com.twitter.elephantbird.cascading2.scheme.LzoBinaryScheme

com.twitter.maple.hbase.HBaseScheme

com.twitter.maple.jdbc.JDBCScheme

org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat

org.apache.hadoop.mapred.pipes.PipesReducer

org.apache.hadoop.streaming.PipeMapRed

org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReducePOStoreImpl

org.apache.tez.mapreduce.combine.MRCombiner

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.