Package

Source Code of MaxWidgetIdGenericAvro$MaxWidgetReducer

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroCollector;
import org.apache.avro.mapred.AvroInputFormat;
import org.apache.avro.mapred.AvroJob;
import org.apache.avro.mapred.AvroMapper;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.avro.mapred.AvroReducer;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.mapred.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MaxWidgetIdGenericAvro extends Configured implements Tool {
 
  public static class MaxWidgetMapper extends AvroMapper<GenericRecord, Pair<Long, GenericRecord>> {
   
    private GenericRecord maxWidget;
    private AvroCollector<Pair<Long, GenericRecord>> collector;
   
    @Override
    public void map(GenericRecord widget,
        AvroCollector<Pair<Long, GenericRecord>> collector, Reporter reporter)
        throws IOException {
      this.collector = collector;
      Integer id = (Integer) widget.get("id");
      if (id != null) {
        if (maxWidget == null
            || id > (Integer) maxWidget.get("id")) {
          maxWidget = widget;
        }
      }
    }
   
    @Override
    public void close() throws IOException {
      if (maxWidget != null) {
        collector.collect(new Pair<Long, GenericRecord>(0L, maxWidget));
      }
      super.close();
    }
   
  }
 
  static GenericRecord copy(GenericRecord record) {
    Schema schema = record.getSchema();
    GenericRecord copy = new GenericData.Record(schema);
    for (Schema.Field f : schema.getFields()) {
      copy.put(f.name(), record.get(f.name()));
    }
    return copy;
  }
 
  public static class MaxWidgetReducer extends AvroReducer<Long, GenericRecord, GenericRecord> {
   
    @Override
    public void reduce(Long key, Iterable<GenericRecord> values,
        AvroCollector<GenericRecord> collector, Reporter reporter) throws IOException {
      GenericRecord maxWidget = null;

      for (GenericRecord w : values) {
        if (maxWidget == null
            || (Integer) w.get("id") > (Integer) maxWidget.get("id")) {
          maxWidget = copy(w);
        }
      }

      if (maxWidget != null) {
        collector.collect(maxWidget);
      }
    }
  }

  public int run(String [] args) throws Exception {
    JobConf conf = new JobConf(getConf(), getClass());
    conf.setJobName("Max temperature");
   
    Path inputDir = new Path("widgets");
    FileInputFormat.addInputPath(conf, inputDir);
    FileOutputFormat.setOutputPath(conf, new Path("maxwidget"));
   
    Schema schema = readSchema(inputDir, conf);
   
    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(AvroOutputFormat.class);
       
    AvroJob.setInputSchema(conf, schema);
    AvroJob.setMapOutputSchema(conf,
        Pair.getPairSchema(Schema.create(Schema.Type.LONG), schema));
    AvroJob.setOutputSchema(conf, schema);

    AvroJob.setMapperClass(conf, MaxWidgetMapper.class);
    AvroJob.setReducerClass(conf, MaxWidgetReducer.class);

    conf.setNumReduceTasks(1);

    JobClient.runJob(conf);
    return 0;
  }
 
  /**
   * Read the Avro schema from the first file in the input directory.
   */
  private Schema readSchema(Path inputDir, Configuration conf) throws IOException {
    FsInput fsInput = null;
    FileReader<Object> reader = null;
    try {
      fsInput = new FsInput(new Path(inputDir, "part-m-00000.avro"), conf);
      reader = DataFileReader.openReader(fsInput, new GenericDatumReader<Object>());
      return reader.getSchema();
    } finally {
      IOUtils.closeStream(fsInput);
      IOUtils.closeStream(reader);
    }
  }

  public static void main(String [] args) throws Exception {
    int ret = ToolRunner.run(new MaxWidgetIdGenericAvro(), args);
    System.exit(ret);
  }
}
TOP

Related Classes of MaxWidgetIdGenericAvro$MaxWidgetReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.