Package com.datasalt.pangool.tuplemr.serialization

Source Code of com.datasalt.pangool.tuplemr.serialization.TestSimpleTupleDeserializer

package com.datasalt.pangool.tuplemr.serialization;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import java.io.ByteArrayInputStream;
import java.io.IOException;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.junit.Test;

import com.datasalt.pangool.io.Fields;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Mutator;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.serialization.HadoopSerialization;

public class TestSimpleTupleDeserializer {

  @Test
  public void test() throws IOException {
    Schema schema = new Schema("schema", Fields.parse("a:string, b:int?, c:double"));
    Schema targetSchema = new Schema("target", Fields.parse("a:string, b:int?, c:double, d:long?, e:boolean?"));
   
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);

    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
    tuple.set("c", 5d);
   
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
   
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
   
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
   
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    ITuple targetTuple = new Tuple(targetSchema);
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
   
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
   
    // Something important is that if we read a file that doesn't contains a field
    // just after a file that contains this field, we should clear the field even
    // in the case that no default value was provided.
    schema = new Schema("schema", Fields.parse("a:string, c:double"));
    tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("c", 5d);
   
    bos = new ByteArrayOutputStream();
    ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ser.open(bos);
   
    for(int i = 0; i < 10; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
    bos.close();   
    bis = new ByteArrayInputStream(bos.toByteArray());   
    des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    for(int i = 0; i < 10; i++) {
      des.deserialize(targetTuple);
    }
   
    assertEquals("foo", targetTuple.getString("a"));
    assertNull(targetTuple.get("b"));
    assertEquals(5d, targetTuple.get("c"));
    assertNull(targetTuple.get("d"));
    assertNull(targetTuple.get("e"));
   
    bis.close();
  }
 
  @Test
  public void testDefaultValuesPerformance() throws IOException {
    Field cField = Field.create("c", Type.DOUBLE, true, 100d);
    Field dField = Field.create("d", Type.LONG, true, 1000l);
   
    Schema schema = new Schema("schema", Fields.parse("a:string, b:int?"));
    Schema targetSchema = Mutator.superSetOf(schema, cField, dField);
   
    Configuration conf = new Configuration();
    HadoopSerialization hadoopSerDe = new HadoopSerialization(conf);

    ITuple tuple = new Tuple(schema);
    tuple.set("a", "foo");
    tuple.set("b", 10);
   
    SimpleTupleSerializer ser = new SimpleTupleSerializer(schema, hadoopSerDe, conf);
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    ser.open(bos);
   
    for(int i = 0; i < 100000; i++) {
      ser.serialize(tuple);
    }
   
    ser.close();
   
    bos.close();
    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
   
    SimpleTupleDeserializer des = new SimpleTupleDeserializer(schema, targetSchema, hadoopSerDe, conf);
    des.open(bis);
   
    ITuple targetTuple = new Tuple(targetSchema);
    long start = System.currentTimeMillis();
    for(int i = 0; i < 100000; i++) {
      des.deserialize(targetTuple);
    }
    long end = System.currentTimeMillis();
    System.out.println(end - start);
   
    assertEquals("foo", targetTuple.getString("a"));
    assertEquals(10, targetTuple.get("b"));
    assertEquals(100d, targetTuple.get("c"));
    assertEquals(1000l, targetTuple.get("d"));
  }
}
TOP

Related Classes of com.datasalt.pangool.tuplemr.serialization.TestSimpleTupleDeserializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.