Package com.cloudera.cdk.data.hbase.avro

Source Code of com.cloudera.cdk.data.hbase.avro.AvroKeySerDe

/**
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.data.hbase.avro;

import com.cloudera.cdk.data.DatasetException;
import com.cloudera.cdk.data.PartitionKey;
import com.cloudera.cdk.data.PartitionStrategy;
import com.cloudera.cdk.data.hbase.avro.io.MemcmpDecoder;
import com.cloudera.cdk.data.hbase.avro.io.MemcmpEncoder;
import com.cloudera.cdk.data.hbase.impl.KeySerDe;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.Encoder;

/**
* Avro implementation of the KeySerDe interface. This will serialize Keys and
* PartialKeys to a special ordered memcmp-able avro encoding.
*
* @param <K>
*          The StorageKey type.
*/
public class AvroKeySerDe implements KeySerDe {

  private final Schema schema;
  private final Schema[] partialSchemas;
  private final PartitionStrategy partitionStrategy;

  public AvroKeySerDe(Schema schema, PartitionStrategy partitionStrategy) {
    this.schema = schema;
    int fieldSize = schema.getFields().size();
    partialSchemas = new Schema[fieldSize];
    for (int i = 0; i < fieldSize; i++) {
      if (i == (fieldSize - 1)) {
        break;
      }
      List<Field> partialFieldList = new ArrayList<Field>();
      for (Field field : schema.getFields().subList(0, i + 1)) {
        partialFieldList.add(AvroUtils.cloneField(field));
      }
      partialSchemas[i] = Schema.createRecord(partialFieldList);
    }
    this.partitionStrategy = partitionStrategy;
  }

  @Override
  public byte[] serialize(PartitionKey key) {
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    Encoder encoder = new MemcmpEncoder(outputStream);

    Schema schemaToUse;
    if (key.getLength() == schema.getFields().size()) {
      schemaToUse = schema;
    } else {
      schemaToUse = partialSchemas[key.getLength() - 1];
    }
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(
        schemaToUse);
    GenericRecord record = new GenericData.Record(schemaToUse);
    for (int i = 0; i < key.getLength(); i++) {
      Object keyPart = key.get(i);
      if (keyPart == null) {
        // keyPart is null, let's make sure we check that the key can support a
        // null value so we can throw a friendly exception if it can't.
        Schema fieldSchema = schemaToUse.getFields().get(i).schema();
        if (fieldSchema.getType() != Schema.Type.NULL
            && fieldSchema.getType() != Schema.Type.UNION) {
          throw new DatasetException(
              "Null key field only supported in null type or union type that has a null type.");
        } else if (fieldSchema.getType() == Schema.Type.UNION) {
          boolean foundNullInUnion = false;
          for (Schema unionSchema : fieldSchema.getTypes()) {
            if (unionSchema.getType() == Schema.Type.NULL) {
              foundNullInUnion = true;
            }
          }
          if (!foundNullInUnion) {
            throw new DatasetException(
                "Null key field only supported in union type that has a null type.");
          }
        }
      }
      record.put(i, keyPart);
    }
    AvroUtils.writeAvroEntity(record, encoder, datumWriter);
    return outputStream.toByteArray();
  }

  @Override
  public PartitionKey deserialize(byte[] keyBytes) {
    ByteArrayInputStream inputStream = new ByteArrayInputStream(keyBytes);
    Decoder decoder = new MemcmpDecoder(inputStream);
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(
        schema);
    GenericRecord genericRecord = AvroUtils
        .readAvroEntity(decoder, datumReader);

    Object[] keyParts = new Object[genericRecord.getSchema().getFields().size()];
    for (int i = 0; i < genericRecord.getSchema().getFields().size(); i++) {
      keyParts[i] = genericRecord.get(i);
    }
    return partitionStrategy.partitionKey(keyParts);
  }

  @Override
  public byte[] serialize(Object... keyPartValues) {
    return serialize(partitionStrategy.partitionKey(keyPartValues));
  }
}
TOP

Related Classes of com.cloudera.cdk.data.hbase.avro.AvroKeySerDe

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.