Package com.squareup.cascading2.serialization

Source Code of com.squareup.cascading2.serialization.TestProtobufSerialization

package com.squareup.cascading2.serialization;

import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.Insert;
import cascading.pipe.Each;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import com.google.protobuf.Message;
import com.squareup.cascading2.generated.Example;
import com.squareup.cascading2.scheme.ProtobufScheme;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.Serializer;
import org.apache.hadoop.mapred.JobConf;
import org.junit.Ignore;

public class TestProtobufSerialization extends TestCase {

  public void testBareRoundtrip() throws Exception {
    ProtobufSerialization serde = new ProtobufSerialization();
    Serializer<Example.Person> ser = serde.getSerializer(Example.Person.class);
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ser.open(byteArrayOutputStream);
    ser.serialize(Example.Person.newBuilder().setName("bryan").setId(1).build());
    ser.serialize(Example.Person.newBuilder().setName("lucas").setId(2).build());
    ser.close();

    Deserializer<Example.Person> de = serde.getDeserializer(Example.Person.class);
    de.open(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
    Example.Person person = de.deserialize(null);
    assertEquals("bryan", person.getName());
    assertEquals(1, person.getId());

    person = de.deserialize(null);
    assertEquals("lucas", person.getName());
    assertEquals(2, person.getId());
  }

  public void testAsGroupByValue() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));

    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};

    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }

    tec.close();

    Pipe inPipe = new Pipe("input");
    Pipe injectedPipe = new Each(inPipe, Fields.NONE, new Insert(new Fields("key"), 7), new Fields("key", "value"));
    Pipe groupByPipe = new GroupBy(injectedPipe, new Fields("key"));

    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations", new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }

    assertEquals(expectedTuples, tuples);
  }

  public void testAsGroupByKey() throws Exception {
    FileSystem.get(new Configuration()).delete(new Path("/tmp/input"), true);
    FileSystem.get(new Configuration()).delete(new Path("/tmp/output"), true);

    Tap t = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/input");
    TupleEntryCollector tec = t.openForWrite(new HadoopFlowProcess(new JobConf()));

    HashSet<Tuple> expectedTuples = new HashSet<Tuple>(){{
      add(new Tuple(Example.Person.newBuilder().setName("bryan").setId(1).build()));
      add(new Tuple(Example.Person.newBuilder().setName("lucas").setId(2).build()));
    }};

    for (Tuple tuple : expectedTuples) {
      tec.add(tuple);
    }

    tec.close();

    Pipe inPipe = new Pipe("input");
    Pipe groupByPipe = new GroupBy(inPipe, new Fields("value"));

    Hfs sink = new Hfs(new ProtobufScheme("value", Example.Person.class), "/tmp/output");
    Map<Object, Object> properties = new HashMap<Object, Object>(){{
      put("io.serializations",
          new JobConf().get("io.serializations") + "," + ProtobufSerialization.class.getName());
    }};
    new HadoopFlowConnector(properties).connect(t, sink, groupByPipe).complete();

    TupleEntryIterator tei = sink.openForRead(new HadoopFlowProcess(new JobConf()));
    Set<Tuple> tuples = new HashSet<Tuple>();
    while (tei.hasNext()) {
      tuples.add(tei.next().getTupleCopy());
    }

    assertEquals(expectedTuples, tuples);
  }
}
TOP

Related Classes of com.squareup.cascading2.serialization.TestProtobufSerialization

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.