Package org.apache.crunch.io.avro

Source Code of org.apache.crunch.io.avro.AvroKeyValueIT$MapRedPersonMapper

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.apache.crunch.io.avro;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.io.Serializable;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.avro.mapred.Pair;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
import org.apache.crunch.PTable;
import org.apache.crunch.Pipeline;
import org.apache.crunch.impl.mr.MRPipeline;
import org.apache.crunch.io.From;
import org.apache.crunch.test.CrunchTestSupport;
import org.apache.crunch.test.Person;
import org.apache.crunch.types.PTableType;
import org.apache.crunch.types.avro.Avros;
import org.apache.crunch.types.avro.ReflectedPerson;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.Test;

/**
* Tests for verifying behavior with Avro produced using the org.apache.avro.mapred.*
* and org.apache.avro.mapreduce.* APIs.
*/
public class AvroKeyValueIT extends CrunchTestSupport implements Serializable {

  @Test
  public void testInputFromMapReduceKeyValueFile_Generic() throws InterruptedException, IOException, ClassNotFoundException {

    Path keyValuePath = produceMapReduceOutputFile();

    Pipeline pipeline = new MRPipeline(AvroKeyValueIT.class, tempDir.getDefaultConfiguration());
    PTable<Person, Integer> personTable = pipeline.read(
        From.avroTableFile(keyValuePath, Avros.tableOf(Avros.specifics(Person.class), Avros.ints())));

    org.apache.crunch.Pair<Person, Integer> firstEntry = Iterables.getFirst(personTable.materialize(), null);

    assertEquals("a", firstEntry.first().getName().toString());
    assertEquals(Integer.valueOf(1), firstEntry.second());

    pipeline.done();

  }

  @Test
  public void testInputFromMapRedKeyValueFile_Specific() throws IOException {
    Path keyValuePath = produceMapRedOutputFile();

    Pipeline pipeline = new MRPipeline(AvroKeyValueIT.class, tempDir.getDefaultConfiguration());
    PTable<Person, Integer> personTable = pipeline.read(
        From.avroTableFile(keyValuePath, Avros.keyValueTableOf(Avros.specifics(Person.class), Avros.ints())));

    org.apache.crunch.Pair<Person, Integer> firstEntry = Iterables.getFirst(personTable.materialize(), null);

    assertEquals("a", firstEntry.first().getName().toString());
    assertEquals(Integer.valueOf(1), firstEntry.second());

    // Verify that deep copying on this PType works as well
    PTableType<Person, Integer> tableType = Avros.keyValueTableOf(Avros.specifics(Person.class), Avros.ints());
    tableType.initialize(tempDir.getDefaultConfiguration());
    org.apache.crunch.Pair<Person, Integer> detachedPair = tableType.getDetachedValue(firstEntry);
    assertEquals(firstEntry, detachedPair);

    pipeline.done();
  }

  @Test
  public void testInputFromMapRedKeyValueFile_Reflect() throws IOException {
    Path keyValuePath = produceMapRedOutputFile();

    Pipeline pipeline = new MRPipeline(AvroKeyValueIT.class, tempDir.getDefaultConfiguration());
    PTable<ReflectedPerson, Integer> personTable = pipeline.read(
        From.avroTableFile(keyValuePath, Avros.keyValueTableOf(Avros.reflects(ReflectedPerson.class), Avros.ints())));

    org.apache.crunch.Pair<ReflectedPerson, Integer> firstEntry = Iterables.getFirst(personTable.materialize(), null);

    assertEquals("a", firstEntry.first().getName().toString());
    assertEquals(Integer.valueOf(1), firstEntry.second());

    // Verify that deep copying on this PType works as well
    PTableType<ReflectedPerson, Integer> tableType =
        Avros.keyValueTableOf(Avros.reflects(ReflectedPerson.class), Avros.ints());
    tableType.initialize(tempDir.getDefaultConfiguration());
    org.apache.crunch.Pair<ReflectedPerson, Integer> detachedPair = tableType.getDetachedValue(firstEntry);
    assertEquals(firstEntry, detachedPair);

    pipeline.done();
  }

  /**
   * Produces an Avro file using the org.apache.avro.mapred.* API.
   */
  private Path produceMapRedOutputFile() throws IOException {

    JobConf conf = new JobConf(tempDir.getDefaultConfiguration(), AvroKeyValueIT.class);

    org.apache.avro.mapred.AvroJob.setOutputSchema(
        conf,
        Pair.getPairSchema(Person.SCHEMA$, Schema.create(Schema.Type.INT)));


    conf.setMapperClass(MapRedPersonMapper.class);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);



    Path outputPath = new Path(tempDir.getFileName("mapreduce_output"));
    org.apache.hadoop.mapred.FileInputFormat.setInputPaths(conf, tempDir.copyResourcePath("letters.txt"));
    org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(conf, outputPath);

    RunningJob runningJob = JobClient.runJob(conf);
    runningJob.waitForCompletion();

    return outputPath;

  }

  /**
   * Produces an Avro file using the org.apache.avro.mapreduce.* API.
   */
  private Path produceMapReduceOutputFile() throws IOException, ClassNotFoundException, InterruptedException {


    Job job = new Job(tempDir.getDefaultConfiguration());
    job.setJarByClass(AvroKeyValueIT.class);
    job.setJobName("Color Count");

    Path outputPath = new Path(tempDir.getFileName("mapreduce_output"));

    FileInputFormat.setInputPaths(job, tempDir.copyResourcePath("letters.txt"));
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(MapReducePersonMapper.class);
    job.setNumReduceTasks(0);
    AvroJob.setOutputKeySchema(job, Person.SCHEMA$);
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);

    boolean success = job.waitForCompletion(true);

    if (!success) {
      throw new RuntimeException("Job failed");
    }

    return outputPath;
  }

  public static class MapReducePersonMapper extends
      Mapper<LongWritable, Text, AvroKey<Person>, AvroValue<Integer>> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      Person person = Person.newBuilder()
          .setName(value.toString())
          .setAge(value.toString().length())
          .setSiblingnames(ImmutableList.<CharSequence>of())
          .build();
      context.write(
          new AvroKey<Person>(person),
          new AvroValue<Integer>(1));

    }
  }

  public static class MapRedPersonMapper implements org.apache.hadoop.mapred.Mapper<LongWritable, Text, AvroWrapper<Pair<Person,Integer>>, NullWritable> {
    @Override
    public void map(LongWritable key, Text value, OutputCollector<AvroWrapper<Pair<Person,Integer>>, NullWritable> outputCollector, Reporter reporter) throws IOException {
      Person person = Person.newBuilder()
          .setName(value.toString())
          .setAge(value.toString().length())
          .setSiblingnames(ImmutableList.<CharSequence>of())
          .build();
      outputCollector.collect(
          new AvroWrapper<Pair<Person, Integer>>(new Pair<Person, Integer>(person, 1)),
          NullWritable.get());
    }

    @Override
    public void close() throws IOException {
    }

    @Override
    public void configure(JobConf entries) {
    }
  }

}
TOP

Related Classes of org.apache.crunch.io.avro.AvroKeyValueIT$MapRedPersonMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.