Package com.datasalt.pangool.utils

Source Code of com.datasalt.pangool.utils.AvroUtils

/**
* Copyright [2012] [Datasalt Systems S.L.]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasalt.pangool.utils;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.avro.generic.GenericData.Record;
import org.apache.avro.mapred.AvroSerialization;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.util.ReflectionUtils;

import com.datasalt.pangool.PangoolRuntimeException;
import com.datasalt.pangool.io.ITuple;
import com.datasalt.pangool.io.Schema;
import com.datasalt.pangool.io.Schema.Field;
import com.datasalt.pangool.io.Schema.Field.Type;
import com.datasalt.pangool.serialization.HadoopSerialization;

public class AvroUtils {

  public static void addAvroSerialization(Configuration conf) {
    Collection<String> serializations = conf.getStringCollection("io.serializations");
    if(!serializations.contains(AvroSerialization.class.getName())) {
      serializations.add(AvroSerialization.class.getName());
      conf.setStrings("io.serializations", serializations.toArray(new String[0]));
    }
  }

  /**
   * Converts from one Avro schema to one Pangool schema for de-serializing it
   */
  public static Schema toPangoolSchema(org.apache.avro.Schema avroSchema) {
    List<Field> fields = new ArrayList<Field>();
    for(org.apache.avro.Schema.Field field : avroSchema.getFields()) {
      org.apache.avro.Schema.Type type = field.schema().getType();
      switch(type){
      case INT: fields.add(Field.create(field.name(),Type.INT)); break;
      case LONG: fields.add(Field.create(field.name(),Type.LONG)); break;
      case FLOAT: fields.add(Field.create(field.name(),Type.FLOAT)); break;
      case DOUBLE: fields.add(Field.create(field.name(),Type.DOUBLE)); break;
      case BOOLEAN: fields.add(Field.create(field.name(),Type.BOOLEAN)); break;
      case STRING: fields.add(Field.create(field.name(),Type.STRING)); break;
      case BYTES:
        String clazz = avroSchema.getProp(field.name());
        try{
        fields.add(Field.createObject(field.name(), Class.forName(clazz)));
        } catch(ClassNotFoundException e){
          throw new PangoolRuntimeException(e);
        }
        break;
      case ENUM:
        clazz = avroSchema.getProp(field.name());
        try{
          fields.add(Field.createEnum(field.name(),Class.forName(clazz)));
          } catch(ClassNotFoundException e){
            throw new PangoolRuntimeException(e);
          }
        break;
      default:
        throw new PangoolRuntimeException("Avro type:" + type + " can't be converted to Pangool Schema type");
      }
    }
    Schema schema = new Schema(avroSchema.getFullName(), fields);
    return schema;
  }

  /**
   * Converts from one Pangool schema to one Avro schema for serializing it
   */
  public static org.apache.avro.Schema toAvroSchema(Schema pangoolSchema) {
    List<org.apache.avro.Schema.Field> avroFields = new ArrayList<org.apache.avro.Schema.Field>();
    Map<String, String> complexTypesMetadata = new HashMap<String, String>();
    for(Field field : pangoolSchema.getFields()) {
      org.apache.avro.Schema.Type avroFieldType = null;
      switch(field.getType()){
        case INT: avroFieldType = org.apache.avro.Schema.Type.INT; break;
        case FLOAT: avroFieldType = org.apache.avro.Schema.Type.FLOAT; break;
        case DOUBLE: avroFieldType = org.apache.avro.Schema.Type.DOUBLE; break;
        case LONG: avroFieldType = org.apache.avro.Schema.Type.LONG; break;
        case BOOLEAN: avroFieldType = org.apache.avro.Schema.Type.BOOLEAN; break;
        case STRING: avroFieldType = org.apache.avro.Schema.Type.STRING; break;
        case OBJECT:
          avroFieldType = org.apache.avro.Schema.Type.BYTES;
          complexTypesMetadata.put(field.getName(), field.getObjectClass().getName());
        break;
        case ENUM:
          avroFieldType = org.apache.avro.Schema.Type.ENUM;
          complexTypesMetadata.put(field.getName(),field.getObjectClass().getName());
          break;
        default:
          throw new PangoolRuntimeException("Not avro conversion from Pangool Schema type:" + field.getType());
       
      }
      org.apache.avro.Schema avroFieldSchema;
      if (avroFieldType == org.apache.avro.Schema.Type.ENUM){
        Object[] enumValues = field.getObjectClass().getEnumConstants();
        List<String> values=new ArrayList<String>();
        for (Object e : enumValues){
          values.add(e.toString());
        }
        avroFieldSchema = org.apache.avro.Schema.createEnum(field.getName(),null,null, values);
      } else {
        //simple types
        avroFieldSchema = org.apache.avro.Schema.create(avroFieldType);
      }
      avroFields.add(new org.apache.avro.Schema.Field(field.getName(),avroFieldSchema, null, null));
    }

    org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord(pangoolSchema.getName(), null, null, false);
    for(Map.Entry<String, String> props : complexTypesMetadata.entrySet()) {
      avroSchema.addProp(props.getKey(), props.getValue());
    }
    avroSchema.setFields(avroFields);
    return avroSchema;
  }

  /**
   * Moves data between a Tuple and an Avro Record
   */
  public static void toRecord(ITuple tuple, Record record,
      DataOutputBuffer tmpOutputBuffer, HadoopSerialization ser) throws IOException {
    Schema pangoolSchema = tuple.getSchema();
    for(int i = 0; i < pangoolSchema.getFields().size(); i++) {
      Object obj = tuple.get(i);
      Field field = pangoolSchema.getField(i);
      switch(field.getType()){
      case INT:
      case LONG:
      case FLOAT:
      case BOOLEAN:
      case DOUBLE:
        record.put(i, obj); //optimistic
        break;
      case OBJECT:
        tmpOutputBuffer.reset();
        ser.ser(obj, tmpOutputBuffer);
        ByteBuffer buffer = ByteBuffer.wrap(tmpOutputBuffer.getData(), 0, tmpOutputBuffer.getLength());
        record.put(i, buffer);
        break;
      case ENUM:
        record.put(i,obj.toString());
        break;
      case STRING:
        record.put(i,new Utf8(obj.toString())); //could be directly String ?
        break;
      default:
          throw
          new IOException("Not correspondence to Avro type from Pangool type " + field.getType());
      }
    }
  }

  /**
   * Moves data between a Record and a Tuple
   */
  public static void toTuple(Record record, ITuple tuple, Configuration conf,
      HadoopSerialization ser) throws IOException {
    for(org.apache.avro.Schema.Field field : record.getSchema().getFields()) {
      int pos = field.pos();
      Object obj = record.get(pos);
      switch(field.schema().getType()){
      case INT:
      case LONG:
      case BOOLEAN:
      case FLOAT:
      case DOUBLE:
        tuple.set(pos,obj); //very optimistic
        break;
      case STRING:
        if (!(tuple.get(pos) instanceof Utf8)){
          tuple.set(pos,new com.datasalt.pangool.io.Utf8());
        }
        com.datasalt.pangool.io.Utf8 utf8=(com.datasalt.pangool.io.Utf8)tuple.get(pos);
        if (obj instanceof String){
          utf8.set((String)obj);
        } else if (obj instanceof Utf8){
          Utf8 avroUtf8 = (Utf8)obj;
          utf8.set(avroUtf8.getBytes(),0,avroUtf8.getByteLength());
        } else {
          throw new IOException("Not supported avro field " +
              org.apache.avro.Schema.Type.STRING + " with instance " + obj.getClass().getName());
        }
        break;
      case ENUM:
        String clazzName = record.getSchema().getProp(field.name());
        try{
          Class clazz = Class.forName(clazzName);
          Enum e = Enum.valueOf(clazz,obj.toString());
          tuple.set(pos,e);
        } catch(ClassNotFoundException e){
          throw new IOException(e);
        }
        break;
      case BYTES:
        clazzName = record.getSchema().getProp(field.name());
        try{
          Class clazz = Class.forName(clazzName);
          if(tuple.get(pos) == null || tuple.get(pos).getClass() != clazz) {
            tuple.set(pos, ReflectionUtils.newInstance(clazz, conf));
          }
          ByteBuffer byteBuffer = (ByteBuffer) obj;
          byte[] bytes = byteBuffer.array();
          ser.deser(tuple.get(pos), bytes, 0, bytes.length);
        } catch(ClassNotFoundException e){
          throw new IOException(e);
        }
        break;
      default:
        throw new IOException("Not supported avro type : " + field.schema().getType());
      }
    }
  }
}
TOP

Related Classes of com.datasalt.pangool.utils.AvroUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.