Package org.apache.avro.generic

Source Code of org.apache.avro.generic.GenericDatumReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.generic;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.nio.ByteBuffer;

import org.codehaus.jackson.JsonNode;

import org.apache.avro.AvroRuntimeException;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.util.Utf8;

/** {@link DatumReader} for generic Java objects. */
public class GenericDatumReader<D> implements DatumReader<D> {
  private Schema actual;
  private Schema expected;

  public GenericDatumReader() {}

  public GenericDatumReader(Schema actual) {
    setSchema(actual);
  }

  public GenericDatumReader(Schema actual, Schema expected) {
    this(actual);
    this.expected = expected;
  }

  public void setSchema(Schema actual) { this.actual = actual; }

  @SuppressWarnings("unchecked")
  public D read(D reuse, Decoder in) throws IOException {
    return (D) read(reuse, actual, expected != null ? expected : actual, in);
  }
 
  /** Called to read data.*/
  protected Object read(Object old, Schema actual,
                        Schema expected, Decoder in) throws IOException {
    if (actual.getType() == Type.UNION)           // resolve unions
      actual = actual.getTypes().get((int)in.readIndex());
    if (expected.getType() == Type.UNION)
      expected = resolveExpected(actual, expected);
    switch (actual.getType()) {
    case RECORD:  return readRecord(old, actual, expected, in);
    case ENUM:    return readEnum(actual, expected, in);
    case ARRAY:   return readArray(old, actual, expected, in);
    case MAP:     return readMap(old, actual, expected, in);
    case FIXED:   return readFixed(old, actual, expected, in);
    case STRING:  return readString(old, in);
    case BYTES:   return readBytes(old, in);
    case INT:     return in.readInt();
    case LONG:    return in.readLong();
    case FLOAT:   return in.readFloat();
    case DOUBLE:  return in.readDouble();
    case BOOLEAN: return in.readBoolean();
    case NULL:    in.readNull(); return null;
    default: throw new AvroRuntimeException("Unknown type: "+actual);
    }
  }

  private Schema resolveExpected(Schema actual, Schema expected) {
    // first scan for exact match
    for (Schema branch : expected.getTypes())
      if (branch.getType() == actual.getType())
        switch (branch.getType()) {
        case RECORD:
        case ENUM:
        case FIXED:
          String name = branch.getName();
          if (name == null || name.equals(actual.getName()))
            return branch;
          break;
        default:
          return branch;
        }
    // then scan match via numeric promotion
    for (Schema branch : expected.getTypes())
      switch (actual.getType()) {
      case INT:
        switch (branch.getType()) {
        case LONG: case FLOAT: case DOUBLE:
          return branch;
        }
        break;
      case LONG:
        switch (branch.getType()) {
        case FLOAT: case DOUBLE:
          return branch;
        }
        break;
      case FLOAT:
        switch (branch.getType()) {
        case DOUBLE:
          return branch;
        }
        break;
      }
    throw new AvroTypeException("Expected "+expected+", found "+actual);
  }

  /** Called to read a record instance. May be overridden for alternate record
   * representations.*/
  protected Object readRecord(Object old, Schema actual, Schema expected,
                              Decoder in) throws IOException {
    /* TODO: We may want to compute the expected and actual mapping and cache
     * the mapping (keyed by <actual, expected>). */
    String recordName = expected.getName();
    if (recordName != null && !recordName.equals(actual.getName()))
      throw new AvroTypeException("Expected "+expected+", found "+actual);
    Map<String, Field> expectedFields = expected.getFields();
    // all fields not in expected should be removed by newRecord.
    Object record = newRecord(old, expected);
    int size = 0;
    for (Map.Entry<String, Field> entry : actual.getFields().entrySet()) {
      String fieldName = entry.getKey();
      Field actualField = entry.getValue();
      Field expectedField =
          expected == actual ? actualField : expectedFields.get(entry.getKey());
      if (expectedField == null) {
        skip(actualField.schema(), in);
        continue;
      }
      int fieldPosition = expectedField.pos();
      Object oldDatum =
          (old != null) ? getField(record, fieldName, fieldPosition) : null;
      addField(record, fieldName, fieldPosition,
               read(oldDatum,actualField.schema(),expectedField.schema(), in));
      size++;
    }
    if (expectedFields.size() > size) {           // not all fields set
      Set<String> actualFields = actual.getFields().keySet();
      for (Map.Entry<String, Field> entry : expectedFields.entrySet()) {
        String fieldName = entry.getKey();
        if (!actualFields.contains(fieldName)) {  // an unset field
          Field f = entry.getValue();
          JsonNode json = f.defaultValue();
          if (json != null)                       // has default
            addField(record, fieldName, f.pos()// add default
                     defaultFieldValue(old, f.schema(), json));
          else if (old != null)                   // remove stale value
            removeField(record, fieldName, entry.getValue().pos());
        }
      }
    }
    return record;
  }

  /** Called by the default implementation of {@link #readRecord} to add a
   * record fields value to a record instance.  The default implementation is
   * for {@link GenericRecord}.*/
  protected void addField(Object record, String name, int position, Object o) {
    ((GenericRecord) record).put(name, o);
  }
 
  /** Called by the default implementation of {@link #readRecord} to retrieve a
   * record field value from a reused instance.  The default implementation is
   * for {@link GenericRecord}.*/
  protected Object getField(Object record, String name, int position) {
    return ((GenericRecord) record).get(name);
  }

  /** Called by the default implementation of {@link #readRecord} to remove a
   * record field value from a reused instance.  The default implementation is
   * for {@link GenericRecord}.*/
  protected void removeField(Object record, String field, int position) {
    ((GenericRecord) record).remove(field);
  }
 
  /** Called by the default implementation of {@link #readRecord} to construct
      a default value for a field. */
  protected Object defaultFieldValue(Object old, Schema schema, JsonNode json)
    throws IOException {
    switch (schema.getType()) {
    case RECORD:
      Object record = newRecord(old, schema);
      for (Map.Entry<String, Field> entry : schema.getFields().entrySet()) {
        String name = entry.getKey();
        Field f = entry.getValue();
        JsonNode v = json.get(name);
        if (v == null) v = f.defaultValue();
        if (v != null) {
          Object o = old != null ? getField(old, name, f.pos()) : null;
          addField(record, name, f.pos(), defaultFieldValue(o, f.schema(), v));
        } else if (old != null) {
          removeField(record, name, f.pos());
        }
      }
      return record;
    case ENUM:
      return createEnum(json.getTextValue(), schema);
    case ARRAY:
      Object array = newArray(old, json.size(), schema);
      Schema element = schema.getElementType();
      for (JsonNode node : json)
        addToArray(array, defaultFieldValue(peekArray(array), element, node));
      return array;
    case MAP:
      Object map = newMap(old, json.size());
      Schema value = schema.getValueType();
      for (Iterator<String> i = json.getFieldNames(); i.hasNext();) {
        String key = i.next();
        addToMap(map, new Utf8(key),
                 defaultFieldValue(null, value, json.get(key)));
      }
      return map;
    case UNION:   return defaultFieldValue(old, schema.getTypes().get(0), json);
    case FIXED:   return createFixed(old,json.getTextValue().getBytes("ISO-8859-1"),schema);
    case STRING:  return createString(json.getTextValue());
    case BYTES:  return createBytes(json.getTextValue().getBytes("ISO-8859-1"));
    case INT:     return json.getIntValue();
    case LONG:    return json.getLongValue();
    case FLOAT:   return (float)json.getDoubleValue();
    case DOUBLE:  return json.getDoubleValue();
    case BOOLEAN: return json.getBooleanValue();
    case NULL:    return null;
    default: throw new AvroRuntimeException("Unknown type: "+actual);
    }
  }

  /** Called to read an enum value. May be overridden for alternate enum
   * representations.  By default, returns the symbol as a String. */
  protected Object readEnum(Schema actual, Schema expected, Decoder in)
    throws IOException {
    String name = expected.getName();
    if (name != null && !name.equals(actual.getName()))
      throw new AvroTypeException("Expected "+expected+", found "+actual);
    return createEnum(actual.getEnumSymbols().get(in.readEnum()), expected);
  }

  /** Called to create an enum value. May be overridden for alternate enum
   * representations.  By default, returns the symbol as a String. */
  protected Object createEnum(String symbol, Schema schema) { return symbol; }

  /** Called to read an array instance.  May be overridden for alternate array
   * representations.*/
  protected Object readArray(Object old, Schema actual, Schema expected,
                             Decoder in) throws IOException {
    Schema actualType = actual.getElementType();
    Schema expectedType = expected.getElementType();
    long l = in.readArrayStart();
    if (l > 0) {
      Object array = newArray(old, (int) l, expected);
      do {
        for (long i = 0; i < l; i++) {
          addToArray(array, read(peekArray(array), actualType, expectedType, in))
        }
      } while ((l = in.arrayNext()) > 0);
     
      return array;
    } else {
      return newArray(old, 0, expected);
    }
  }

  /** Called by the default implementation of {@link #readArray} to retrieve a
   * value from a reused instance.  The default implementation is for {@link
   * GenericArray}.*/
  @SuppressWarnings("unchecked")
  protected Object peekArray(Object array) {
    return ((GenericArray) array).peek();
  }

  /** Called by the default implementation of {@link #readArray} to add a value.
   * The default implementation is for {@link GenericArray}.*/
  @SuppressWarnings("unchecked")
  protected void addToArray(Object array, Object e) {
    ((GenericArray) array).add(e);
  }
 
  /** Called to read a map instance.  May be overridden for alternate map
   * representations.*/
  protected Object readMap(Object old, Schema actual, Schema expected,
                           Decoder in) throws IOException {
    Schema aValue = actual.getValueType();
    Schema eValue = expected.getValueType();
    long l = in.readMapStart();
    Object map = newMap(old, (int) l);
    if (l > 0) {
      do {
        for (int i = 0; i < l; i++) {
          addToMap(map,
              readString(null, in),
              read(null, aValue, eValue, in));
        }
      } while ((l = in.mapNext()) > 0);
    }
    return map;
  }

  /** Called by the default implementation of {@link #readMap} to add a
   * key/value pair.  The default implementation is for {@link Map}.*/
  @SuppressWarnings("unchecked")
  protected void addToMap(Object map, Object key, Object value) {
    ((Map) map).put(key, value);
  }
 
  /** Called to read a fixed value. May be overridden for alternate fixed
   * representations.  By default, returns {@link GenericFixed}. */
  protected Object readFixed(Object old, Schema actual, Schema expected,
                             Decoder in)
    throws IOException {
    if (!actual.equals(expected))
      throw new AvroTypeException("Expected "+expected+", found "+actual);
    GenericFixed fixed = (GenericFixed)createFixed(old, expected);
    in.readFixed(fixed.bytes(), 0, actual.getFixedSize());
    return fixed;
  }

  /** Called to create an fixed value. May be overridden for alternate fixed
   * representations.  By default, returns {@link GenericFixed}. */
  protected Object createFixed(Object old, Schema schema) {
    if ((old instanceof GenericFixed)
        && ((GenericFixed)old).bytes().length == schema.getFixedSize())
      return old;
    return new GenericData.Fixed(schema);
  }

  /** Called to create an fixed value. May be overridden for alternate fixed
   * representations.  By default, returns {@link GenericFixed}. */
  protected Object createFixed(Object old, byte[] bytes, Schema schema) {
    GenericFixed fixed = (GenericFixed)createFixed(old, schema);
    System.arraycopy(bytes, 0, fixed.bytes(), 0, schema.getFixedSize());
    return fixed;
  }
  /**
   * Called to create new record instances. Subclasses may override to use a
   * different record implementation. The returned instance must conform to the
   * schema provided. If the old object contains fields not present in the
   * schema, they should either be removed from the old object, or it should
   * create a new instance that conforms to the schema. By default, this returns
   * a {@link GenericData.Record}.
   */
  protected Object newRecord(Object old, Schema schema) {
    if (old instanceof GenericRecord) {
      GenericRecord record = (GenericRecord)old;
      if (record.getSchema() == schema)
        return record;
    }
    return new GenericData.Record(schema);
  }

  /** Called to create new array instances.  Subclasses may override to use a
   * different array implementation.  By default, this returns a {@link
   * GenericData.Array}.*/
  @SuppressWarnings("unchecked")
  protected Object newArray(Object old, int size, Schema schema) {
    if (old instanceof GenericArray) {
      ((GenericArray) old).clear();
      return old;
    } else return new GenericData.Array(size, schema);
  }

  /** Called to create new array instances.  Subclasses may override to use a
   * different map implementation.  By default, this returns a {@link
   * HashMap}.*/
  @SuppressWarnings("unchecked")
  protected Object newMap(Object old, int size) {
    if (old instanceof Map) {
      ((Map) old).clear();
      return old;
    } else return new HashMap<Object, Object>(size);
  }

  /** Called to read strings.  Subclasses may override to use a different
   * string representation.  By default, this calls {@link
   * Decoder#readString(Utf8)}.*/
  protected Object readString(Object old, Decoder in) throws IOException {
    return in.readString((Utf8)old);
  }

  /** Called to create a string from a default value.  Subclasses may override
   * to use a different string representation.  By default, this calls {@link
   * Utf8#Utf8(String)}.*/
  protected Object createString(String value) { return new Utf8(value); }

  /** Called to read byte arrays.  Subclasses may override to use a different
   * byte array representation.  By default, this calls {@link
   * Decoder#readBytes(ByteBuffer)}.*/
  protected Object readBytes(Object old, Decoder in) throws IOException {
    return in.readBytes((ByteBuffer)old);
  }

  /** Called to create byte arrays from default values.  Subclasses may
   * override to use a different byte array representation.  By default, this
   * calls {@link ByteBuffer#wrap(byte[])}.*/
  protected Object createBytes(byte[] value) { return ByteBuffer.wrap(value); }

  /** Skip an instance of a schema. */
  public static void skip(Schema schema, Decoder in) throws IOException {
    switch (schema.getType()) {
    case RECORD:
      for (Map.Entry<String, Schema> entry : schema.getFieldSchemas())
        skip(entry.getValue(), in);
      break;
    case ENUM:
      in.readInt();
      break;
    case ARRAY:
      Schema elementType = schema.getElementType();
      for (long l = in.skipArray(); l > 0; l = in.skipArray()) {
        for (long i = 0; i < l; i++) {
          skip(elementType, in);
        }
      }
      break;
    case MAP:
      Schema value = schema.getValueType();
      for (long l = in.skipMap(); l > 0; l = in.skipMap()) {
        for (long i = 0; i < l; i++) {
          in.skipString();
          skip(value, in);
        }
      }
      break;
    case UNION:
      skip(schema.getTypes().get((int)in.readIndex()), in);
      break;
    case FIXED:
      in.skipFixed(schema.getFixedSize());
      break;
    case STRING:
      in.skipString();
      break;
    case BYTES:
      in.skipBytes();
      break;
    case INT:     in.readInt();           break;
    case LONG:    in.readLong();          break;
    case FLOAT:   in.readFloat();         break;
    case DOUBLE:  in.readDouble();        break;
    case BOOLEAN: in.readBoolean();       break;
    case NULL:                            break;
    default: throw new RuntimeException("Unknown type: "+schema);
    }
  }

}
TOP

Related Classes of org.apache.avro.generic.GenericDatumReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.