Package org.apache.avro.mapred.tether

Source Code of org.apache.avro.mapred.tether.TetherTask$Collector

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.avro.mapred.tether;

import java.io.IOException;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.net.InetSocketAddress;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.avro.Schema;
import org.apache.avro.util.Utf8;
import org.apache.avro.ipc.Transceiver;
import org.apache.avro.ipc.SocketTransceiver;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRequestor;

/** Base class for Java tether mapreduce programs.  Useless except for testing,
* since it's already possible to write Java MapReduce programs without
* tethering.  Also serves as an example of how a framework may be
* implemented. */
public abstract class TetherTask<IN,MID,OUT> {
  static final Logger LOG = LoggerFactory.getLogger(TetherTask.class);

  private Transceiver clientTransceiver;
  private OutputProtocol outputClient;

  private TaskType taskType;
  private int partitions;

  private DecoderFactory decoderFactory = DecoderFactory.defaultFactory();
  private BinaryDecoder decoder;

  private SpecificDatumReader<IN> inReader;
  private SpecificDatumReader<MID> midReader;
  private IN inRecord;
  private MID midRecord;
  private MID midRecordSpare;
  private Collector<MID> midCollector;
  private Collector<OUT> outCollector;

  private static class Buffer extends ByteArrayOutputStream {
    public ByteBuffer data() {
      return ByteBuffer.wrap(buf, 0, count);
    }
  }

  /** Collector for map and reduce output values. */
  public class Collector<T> {
    private SpecificDatumWriter<T> writer;
    private Buffer buffer = new Buffer();
    private BinaryEncoder encoder = new BinaryEncoder(buffer);
   
    private Collector(Schema schema) {
      this.writer = new SpecificDatumWriter<T>(schema);
    }

    /** Collect a map or reduce output value. */
    public void collect(T record) throws IOException {
      buffer.reset();
      writer.write(record, encoder);
      outputClient.output(buffer.data());
    }
   
    /** Collect a pre-partitioned map output value. */
    public void collect(T record, int partition) throws IOException {
      buffer.reset();
      writer.write(record, encoder);
      outputClient.outputPartitioned(partition, buffer.data());
    }
  }

  void open(int inputPort) throws IOException {
    // open output client, connecting to parent
    String clientPortString = System.getenv("AVRO_TETHER_OUTPUT_PORT");
    if (clientPortString == null)
      throw new RuntimeException("AVRO_TETHER_OUTPUT_PORT env var is null");
    int clientPort = Integer.parseInt(clientPortString);
    this.clientTransceiver =
      new SocketTransceiver(new InetSocketAddress(clientPort));
    this.outputClient = (OutputProtocol)
      SpecificRequestor.getClient(OutputProtocol.class, clientTransceiver);

    // send inputPort to parent
    outputClient.configure(inputPort);
  }

  void configure(TaskType taskType, CharSequence inSchemaText, CharSequence outSchemaText) {
    this.taskType = taskType;
    try {
      Schema inSchema = Schema.parse(inSchemaText.toString());
      Schema outSchema = Schema.parse(outSchemaText.toString());
      switch (taskType) {
      case MAP:
        this.inReader = new SpecificDatumReader<IN>(inSchema);
        this.midCollector = new Collector<MID>(outSchema);
        break;
      case REDUCE:
        this.midReader = new SpecificDatumReader<MID>(inSchema);
        this.outCollector = new Collector<OUT>(outSchema);
        break;
      }
    } catch (Throwable e) {
      fail(e.toString());
    }
  }

  void partitions(int partitions) { this.partitions = partitions; }

  /** Return the number of map output partitions of this job. */
  public int partitions() { return partitions; }

  void input(ByteBuffer data, long count) {
    try {
      decoder = decoderFactory.createBinaryDecoder(data.array(), decoder);
      for (long i = 0; i < count; i++) {
        switch (taskType) {
        case MAP:
          inRecord = inReader.read(inRecord, decoder);
          map(inRecord, midCollector);
          break;
        case REDUCE:
          MID prev = midRecord;
          midRecord = midReader.read(midRecordSpare, decoder);
          if (prev != null && !midRecord.equals(prev))
            reduceFlush(prev, outCollector);
          reduce(midRecord, outCollector);
          midRecordSpare = prev;
          break;
        }
      }
    } catch (Throwable e) {
      LOG.warn("failing: "+e, e);
      fail(e.toString());
    }
  }

  void complete() {
    if (taskType == TaskType.REDUCE && midRecord != null)
      try {
        reduceFlush(midRecord, outCollector);
      } catch (Throwable e) {
        LOG.warn("failing: "+e, e);
        fail(e.toString());
      }
    outputClient.complete();
  }

  /** Called with input values to generate intermediate values. */
  public abstract void map(IN record, Collector<MID> collector)
    throws IOException;
  /** Called with sorted intermediate values. */
  public abstract void reduce(MID record, Collector<OUT> collector)
    throws IOException;
  /** Called with the last intermediate value in each equivalence run. */
  public abstract void reduceFlush(MID record, Collector<OUT> collector)
    throws IOException;

  /** Call to update task status. */
  public void status(String message) {
    outputClient.status(new Utf8(message));
  }

  /** Call to increment a counter. */
  public void count(String group, String name, long amount) {
    outputClient.count(new Utf8(group), new Utf8(name), amount);
  }

  /** Call to fail the task. */
  public void fail(String message) {
    outputClient.fail(new Utf8(message));
    close();
  }

  void close() {
    if (clientTransceiver != null)
      try {
        clientTransceiver.close();
      } catch (IOException e) {}                  // ignore
  }

}
TOP

Related Classes of org.apache.avro.mapred.tether.TetherTask$Collector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.