Package com.datasalt.pangool.tuplemr.serialization

Source Code of com.datasalt.pangool.tuplemr.serialization.TupleDeserializer$CachedTuples

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.tuplemr.serialization;

import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.util.ReflectionUtils;

import com.datasalt.pangool.io.DatumWrapper;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.FieldDeserializer;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.io.Utf8;
import com.datasalt.pangool.serialization.HadoopSerialization;
import com.datasalt.pangool.tuplemr.SerializationInfo;
import com.datasalt.pangool.tuplemr.TupleMRConfig;
import com.datasalt.pangool.utils.Buffer;


public class TupleDeserializer implements Deserializer<DatumWrapper<ITuple>> {

  private static class CachedTuples {
    private ITuple commonTuple;
    private List<ITuple> specificTuples=new ArrayList<ITuple>();
    private List<ITuple> resultTuples=new ArrayList<ITuple>();
  }
 
  private final TupleMRConfig tupleMRConf;
  private final Configuration conf;
  private final SerializationInfo serInfo;
  private  DataInputStream in;
  private final HadoopSerialization ser;
  private final boolean isRollup;
  private final boolean multipleSources;

  private final Buffer tmpInputBuffer = new Buffer();
  private DatumWrapper<CachedTuples> cachedTuples = new DatumWrapper<CachedTuples>();

  public TupleDeserializer(HadoopSerialization ser, TupleMRConfig tupleMRConfig, Configuration conf) {
    this.tupleMRConf = tupleMRConfig;
    this.conf = conf;
    this.serInfo = tupleMRConf.getSerializationInfo();
    this.ser = ser;
    this.isRollup = tupleMRConf.getRollupFrom() != null && !tupleMRConf.getRollupFrom().isEmpty();
    this.multipleSources = tupleMRConf.getNumIntermediateSchemas() >= 2;
    this.cachedTuples.datum(createCachedTuples(tupleMRConf));
    this.cachedTuples.swapInstances(); //do rollup
    this.cachedTuples.datum(createCachedTuples(tupleMRConf));
   
  }
 
  private static CachedTuples createCachedTuples(TupleMRConfig config){
    SerializationInfo serInfo = config.getSerializationInfo();
    boolean multipleSources = config.getNumIntermediateSchemas() >= 2;
    CachedTuples r = new CachedTuples();
    r.commonTuple = new Tuple(serInfo.getCommonSchema());
    for (Schema sourceSchema : config.getIntermediateSchemas()){
      r.resultTuples.add(new Tuple(sourceSchema));
    }
   
    if (multipleSources){
      for(Schema specificSchema : serInfo.getSpecificSchemas()){
        r.specificTuples.add(new Tuple(specificSchema));
      }
    }
    return r;
  }
 
  @Override
  public void open(InputStream in) throws IOException {
    if (in instanceof DataInputStream){
      this.in = (DataInputStream)in;
    } else {
      this.in = new DataInputStream(in);
    }
  }
 
  @Override
  public DatumWrapper<ITuple> deserialize(DatumWrapper<ITuple> t) throws IOException {
    if(t == null) {
      t = new DatumWrapper<ITuple>();
    }
    if(isRollup) {
      t.swapInstances();
      this.cachedTuples.swapInstances();
    }

    ITuple tuple = (multipleSources) ?
        deserializeMultipleSources() : deserializeOneSource(t.datum());
    t.datum(tuple);
   
    return t;
  }
 
  private ITuple deserializeMultipleSources() throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple =tuples.commonTuple;
   
    readFields(commonTuple,in,serInfo.getCommonSchemaDeserializers());
    int schemaId = WritableUtils.readVInt(in);
    ITuple specificTuple = tuples.specificTuples.get(schemaId);
    readFields(specificTuple,in,serInfo.getSpecificSchemaDeserializers().get(schemaId));
    ITuple result = tuples.resultTuples.get(schemaId);
    mixIntermediateIntoResult(commonTuple,specificTuple,result,schemaId);
    return result;
  }
 
  private void mixIntermediateIntoResult(ITuple commonTuple,ITuple specificTuple,ITuple result,int schemaId){
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(schemaId);
    for (int i =0 ; i < commonTranslation.length ; i++){
      int destPos = commonTranslation[i];
      result.set(destPos,commonTuple.get(i));
    }
   
    int[] specificTranslation = serInfo.getSpecificSchemaIndexTranslation(schemaId);
    for (int i =0 ; i < specificTranslation.length ; i++){
      int destPos = specificTranslation[i];
      result.set(destPos,specificTuple.get(i));
    }
  }
 
  private ITuple deserializeOneSource(ITuple reuse) throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;
    readFields(commonTuple,in,serInfo.getCommonSchemaDeserializers());
    if (reuse == null){
      reuse = tuples.resultTuples.get(0);
    }
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(0); //just one common schema
    for (int i =0 ; i < commonTranslation.length ; i++){
      int destPos = commonTranslation[i];
      reuse.set(destPos,commonTuple.get(i));
    }
    return reuse;
  }

  public void readFields(ITuple tuple, DataInputStream input,FieldDeserializer[] customDeserializers)
      throws IOException {
    Schema schema = tuple.getSchema();
    for(int index = 0; index < schema.getFields().size(); index++) {
      FieldDeserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);
      switch(field.getType()){
      case INT:  tuple.set(index,WritableUtils.readVInt(input)); break;
      case LONG: tuple.set(index,WritableUtils.readVLong(input)); break;
      case DOUBLE: tuple.set(index,input.readDouble()); break;
      case FLOAT: tuple.set(index,input.readFloat()); break;
      case STRING: readUtf8(input,tuple,index); break;
      case BOOLEAN:
        byte b = input.readByte();
        tuple.set(index,(b != 0));
        break;
      case ENUM: readEnum(input,tuple,field.getObjectClass(),index); break;
      case BYTES: readBytes(input,tuple,index); break;
      case OBJECT: readCustomObject(input,tuple,field.getObjectClass(),index,customDeser); break;
      default:
        throw new IOException("Not supported type:" + field.getType());
      }
    }
  }
 
  protected void readUtf8(DataInputStream input,ITuple tuple,int index) throws IOException {
    //this method is safe because tuple is internal, the tuple is not the final one
    Utf8 t = (Utf8)tuple.get(index);
    if (t == null){
      t = new Utf8();
      tuple.set(index,t);
    }
    t.readFields(input);
  }
 
  protected void readCustomObject(DataInputStream input,ITuple tuple,Class<?> expectedType,int index,
      FieldDeserializer customDeser)
      throws IOException{
    int size = WritableUtils.readVInt(input);
    if(size >=0) {
      Object object = tuple.get(index);
      if (customDeser != null){
        customDeser.open(input );
        object = customDeser.deserialize(object);
        customDeser.close();
        tuple.set(index,object);
      } else {
        if(object == null) {
          tuple.set(index, ReflectionUtils.newInstance(expectedType, conf));
        }
        tmpInputBuffer.setSize(size);
        input.readFully(tmpInputBuffer.getBytes(), 0, size);
        Object ob = ser.deser(tuple.get(index), tmpInputBuffer.getBytes(), 0, size);
        tuple.set(index, ob);
      }
     
     
    } else {
      throw new IOException("Error deserializing, custom object serialized with negative length : " + size);
    }
  }
 
  public void readBytes(DataInputStream input,ITuple tuple,int index) throws IOException {
    int length = WritableUtils.readVInt(input);
    ByteBuffer old = (ByteBuffer)tuple.get(index);
    ByteBuffer result;
    if (old != null && length <= old.capacity()) {
      result = old;
      result.clear();
    } else {
      result = ByteBuffer.allocate(length);
      tuple.set(index,result);
    }
    input.readFully(result.array(),result.position(),length);
    result.limit(length);
  }
 
 
  protected void readEnum(DataInputStream input,ITuple tuple,Class<?> fieldType,int index) throws IOException{
    int ordinal = WritableUtils.readVInt(input);
    try {
      Object[] enums = fieldType.getEnumConstants();
      tuple.set(index,enums[ordinal]);
    } catch(ArrayIndexOutOfBoundsException e) {
      throw new IOException("Ordinal index out of bounds for " + fieldType + " ordinal="+ordinal);
    }
  }
 

  @Override
  public void close() throws IOException {
    in.close();
  }
}
TOP

Related Classes of com.datasalt.pangool.tuplemr.serialization.TupleDeserializer$CachedTuples

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.