Package com.datasalt.pangool.tuplemr.serialization

Source Code of com.datasalt.pangool.tuplemr.serialization.TupleDeserializer$CachedTuples

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.tuplemr.serialization;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.serializer.Deserializer;

import com.datasalt.pangool.io.DatumWrapper;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Tuple;
import com.datasalt.pangool.serialization.HadoopSerialization;
import com.datasalt.pangool.tuplemr.SerializationInfo;
import com.datasalt.pangool.tuplemr.TupleMRConfig;

public class TupleDeserializer implements Deserializer<DatumWrapper<ITuple>> {

  private static class CachedTuples {
    private ITuple commonTuple;
    private List<ITuple> specificTuples = new ArrayList<ITuple>();
    private List<ITuple> resultTuples = new ArrayList<ITuple>();
  }

  private final TupleMRConfig tupleMRConf;
  private final SerializationInfo serInfo;
  private final boolean isRollup;
  private final boolean multipleSources;
  private DatumWrapper<CachedTuples> cachedTuples = new DatumWrapper<CachedTuples>();

  private SimpleTupleDeserializer simpleTupleDeSer;

  public TupleDeserializer(HadoopSerialization ser, TupleMRConfig tupleMRConfig, Configuration conf) {
    simpleTupleDeSer = new SimpleTupleDeserializer(ser, conf);
    this.tupleMRConf = tupleMRConfig;
    this.serInfo = tupleMRConf.getSerializationInfo();
    this.isRollup = tupleMRConf.getRollupFrom() != null && !tupleMRConf.getRollupFrom().isEmpty();
    this.multipleSources = tupleMRConf.getNumIntermediateSchemas() >= 2;
    this.cachedTuples.datum(createCachedTuples(tupleMRConf));
    this.cachedTuples.swapInstances(); // do rollup
    this.cachedTuples.datum(createCachedTuples(tupleMRConf));
  }

  private static CachedTuples createCachedTuples(TupleMRConfig config) {
    SerializationInfo serInfo = config.getSerializationInfo();
    boolean multipleSources = config.getNumIntermediateSchemas() >= 2;
    CachedTuples r = new CachedTuples();
    r.commonTuple = new Tuple(serInfo.getCommonSchema());
    for(Schema sourceSchema : config.getIntermediateSchemas()) {
      r.resultTuples.add(new Tuple(sourceSchema));
    }

    if(multipleSources) {
      for(Schema specificSchema : serInfo.getSpecificSchemas()) {
        r.specificTuples.add(new Tuple(specificSchema));
      }
    }
    return r;
  }

  @Override
  public void open(InputStream in) throws IOException {
    simpleTupleDeSer.open(in);
  }

  @Override
  public DatumWrapper<ITuple> deserialize(DatumWrapper<ITuple> t) throws IOException {
    if(t == null) {
      t = new DatumWrapper<ITuple>();
    }
    if(isRollup) {
      t.swapInstances();
      this.cachedTuples.swapInstances();
    }

    ITuple tuple = (multipleSources) ? deserializeMultipleSources() : deserializeOneSource(t.datum());
    t.datum(tuple);

    return t;
  }

  private ITuple deserializeMultipleSources() throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;

    simpleTupleDeSer.readFields(commonTuple, commonTuple.getSchema(), serInfo.getCommonSchemaDeserializers());
    int schemaId = WritableUtils.readVInt(simpleTupleDeSer.getInput());
    ITuple specificTuple = tuples.specificTuples.get(schemaId);
    simpleTupleDeSer.readFields(specificTuple, specificTuple.getSchema(), serInfo.getSpecificSchemaDeserializers().get(schemaId));
    ITuple result = tuples.resultTuples.get(schemaId);
    mixIntermediateIntoResult(commonTuple, specificTuple, result, schemaId);
    return result;
  }

  private void mixIntermediateIntoResult(ITuple commonTuple, ITuple specificTuple, ITuple result, int schemaId) {
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(schemaId);
    for(int i = 0; i < commonTranslation.length; i++) {
      int destPos = commonTranslation[i];
      result.set(destPos, commonTuple.get(i));
    }

    int[] specificTranslation = serInfo.getSpecificSchemaIndexTranslation(schemaId);
    for(int i = 0; i < specificTranslation.length; i++) {
      int destPos = specificTranslation[i];
      result.set(destPos, specificTuple.get(i));
    }
  }

  private ITuple deserializeOneSource(ITuple reuse) throws IOException {
    CachedTuples tuples = cachedTuples.datum();
    ITuple commonTuple = tuples.commonTuple;
    simpleTupleDeSer.readFields(commonTuple, commonTuple.getSchema(), serInfo.getCommonSchemaDeserializers());
    if(reuse == null) {
      reuse = tuples.resultTuples.get(0);
    }
    int[] commonTranslation = serInfo.getCommonSchemaIndexTranslation(0); // just one common schema
    for(int i = 0; i < commonTranslation.length; i++) {
      int destPos = commonTranslation[i];
      reuse.set(destPos, commonTuple.get(i));
    }
    return reuse;
  }

  @Override
  public void close() throws IOException {
    simpleTupleDeSer.close();
  }
}
TOP

Related Classes of com.datasalt.pangool.tuplemr.serialization.TupleDeserializer$CachedTuples

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.