Package org.apache.avro.mapreduce

Source Code of org.apache.avro.mapreduce.TestAvroKeyValueRecordWriter$R1

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.  See the License for the specific language governing
* permissions and limitations under the License.
*/

package org.apache.avro.mapreduce;

import static org.easymock.EasyMock.createMock;
import static org.easymock.EasyMock.replay;
import static org.easymock.EasyMock.verify;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.hadoop.io.AvroDatumConverter;
import org.apache.avro.hadoop.io.AvroDatumConverterFactory;
import org.apache.avro.hadoop.io.AvroKeyValue;
import org.apache.avro.io.DatumReader;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.FsInput;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.junit.Test;

public class TestAvroKeyValueRecordWriter {
  @Test
  public void testWriteRecords() throws IOException {
    Job job = new Job();
    AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);

    replay(context);

    AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration());
    AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);
    AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null);
    @SuppressWarnings("unchecked")
    AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter
        = factory.create((Class<AvroValue<TextStats>>) avroValue.getClass());
    CodecFactory compressionCodec = CodecFactory.nullCodec();
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    // Use a writer to generate a Avro container file in memory.
    // Write two records: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>.
    AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer
        = new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter,
            new ReflectData(), compressionCodec, outputStream);
    TextStats appleStats = new TextStats();
    appleStats.name = "apple";
    writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats));
    TextStats bananaStats = new TextStats();
    bananaStats.name = "banana";
    writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats));
    writer.close(context);

    verify(context);

    ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
    Schema readerSchema = AvroKeyValue.getSchema(
        Schema.create(Schema.Type.STRING), TextStats.SCHEMA$);
    DatumReader<GenericRecord> datumReader
        = new SpecificDatumReader<GenericRecord>(readerSchema);
    DataFileStream<GenericRecord> avroFileReader
        = new DataFileStream<GenericRecord>(inputStream, datumReader);

    // Verify that the first record was written.
    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, TextStats> firstRecord
        = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
    assertNotNull(firstRecord.get());
    assertEquals("apple", firstRecord.getKey().toString());
    assertEquals("apple", firstRecord.getValue().name.toString());

    // Verify that the second record was written;
    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, TextStats> secondRecord
        = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
    assertNotNull(secondRecord.get());
    assertEquals("banana", secondRecord.getKey().toString());
    assertEquals("banana", secondRecord.getValue().name.toString());

    // That's all, folks.
    assertFalse(avroFileReader.hasNext());
    avroFileReader.close();
  }

  public static class R1 {
    String attribute;
  }
  @Test public void testUsingReflection() throws Exception {
    Job job = new Job();
    Schema schema = ReflectData.get().getSchema(R1.class);
    AvroJob.setOutputValueSchema(job, schema);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    replay(context);

    R1 record = new R1();
    record.attribute = "test";
    AvroValue<R1> avroValue = new AvroValue<R1>(record);

    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    AvroDatumConverterFactory factory =
      new AvroDatumConverterFactory(job.getConfiguration());

    AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);

    @SuppressWarnings("unchecked")
    AvroDatumConverter<AvroValue<R1>, R1> valueConverter =
      factory.create((Class<AvroValue<R1>>) avroValue.getClass());

    AvroKeyValueRecordWriter<Text, AvroValue<R1>> writer =
      new AvroKeyValueRecordWriter<Text, AvroValue<R1>>(
        keyConverter, valueConverter, new ReflectData(),
        CodecFactory.nullCodec(), outputStream);

    writer.write(new Text("reflectionData"), avroValue);
    writer.close(context);

    verify(context);

    ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
    Schema readerSchema = AvroKeyValue.getSchema(
      Schema.create(Schema.Type.STRING), schema);
    DatumReader<GenericRecord> datumReader =
      new ReflectDatumReader<GenericRecord>(readerSchema);
    DataFileStream<GenericRecord> avroFileReader =
      new DataFileStream<GenericRecord>(inputStream, datumReader);

    // Verify that the first record was written.
    assertTrue(avroFileReader.hasNext());

    // Verify that the record holds the same data that we've written
    AvroKeyValue<CharSequence, R1> firstRecord =
      new AvroKeyValue<CharSequence, R1>(avroFileReader.next());
    assertNotNull(firstRecord.get());
    assertEquals("reflectionData", firstRecord.getKey().toString());
    assertEquals(record.attribute, firstRecord.getValue().attribute);
  }
  @Test
  public void testSyncableWriteRecords() throws IOException {
    Job job = new Job();
    AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);

    replay(context);

    AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration());
    AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class);
    AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null);
    @SuppressWarnings("unchecked")
    AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter
        = factory.create((Class<AvroValue<TextStats>>) avroValue.getClass());
    CodecFactory compressionCodec = CodecFactory.nullCodec();
    FileOutputStream outputStream = new FileOutputStream(new File("target/temp.avro"));

    // Write a marker followed by each record: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>.
    AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer
        = new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter,
            new ReflectData(), compressionCodec, outputStream);
    TextStats appleStats = new TextStats();
    appleStats.name = "apple";
    long pointOne = writer.sync();
    writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats));
    TextStats bananaStats = new TextStats();
    bananaStats.name = "banana";
    long pointTwo = writer.sync();
    writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats));
    writer.close(context);

    verify(context);

  Configuration conf = new Configuration();
  conf.set("fs.default.name", "file:///");
  Path avroFile = new Path("target/temp.avro");
  DataFileReader<GenericData.Record> avroFileReader = new DataFileReader<GenericData.Record>(new FsInput(avroFile,
      conf), new SpecificDatumReader<GenericData.Record>());
   
 
  avroFileReader.seek(pointTwo);
    // Verify that the second record was written;
    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, TextStats> secondRecord
        = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
    assertNotNull(secondRecord.get());
    assertEquals("banana", secondRecord.getKey().toString());
    assertEquals("banana", secondRecord.getValue().name.toString());

   
  avroFileReader.seek(pointOne);
    // Verify that the first record was written.
    assertTrue(avroFileReader.hasNext());
    AvroKeyValue<CharSequence, TextStats> firstRecord
        = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next());
    assertNotNull(firstRecord.get());
    assertEquals("apple", firstRecord.getKey().toString());
    assertEquals("apple", firstRecord.getValue().name.toString());


    // That's all, folks.
    avroFileReader.close();
 
}
TOP

Related Classes of org.apache.avro.mapreduce.TestAvroKeyValueRecordWriter$R1

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.