Package org.apache.pig.piggybank.storage.avro

Source Code of org.apache.pig.piggybank.storage.avro.PigAvroDatumReader

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package org.apache.pig.piggybank.storage.avro;

import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.ResolvingDecoder;
import org.apache.avro.util.Utf8;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;

/**
* An avro GenericDatumReader which reads in avro data and
* converts them to pig data: tuples, bags, etc.
*
*/
public class PigAvroDatumReader extends GenericDatumReader<Object> {

    /**
     * Construct where the writer's and reader's schemas are the same.
     */
    public PigAvroDatumReader(Schema schema) {
        super(schema);
    }

    /**
     * Construct given writer's and reader's schema.
     */
    public PigAvroDatumReader(Schema writer, Schema reader) throws IOException {
        super(writer, reader);
    }

    /**
     * Called to read a record instance. Overridden to read a pig tuple.
     */
    @Override
    protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException {

        // find out the order in which we will receive fields from the ResolvingDecoder
        Field[] readOrderedFields = in.readFieldOrder();

        /* create an empty tuple */
        Tuple tuple = TupleFactory.getInstance().newTuple(readOrderedFields.length);

        /* read fields and put in output order in tuple
         * The ResolvingDecoder figures out the writer schema to reader schema mapping for us
         */
        for (Field f : readOrderedFields) {
            tuple.set(f.pos(), read(old, f.schema(), in));
        }

        return tuple;
    }

    /**
     * Called to read a map instance. Overridden to read a pig map.
     */
    protected Object readMap(Object old, Schema expected, ResolvingDecoder in) throws IOException {
        Schema eValue = expected.getValueType();
        long l = in.readMapStart();
        Object map = newMap(old, (int) l);
        if (l > 0) {
            do {
                for (int i = 0; i < l; i++) {
                    addToMap(map, readString(null, AvroStorageUtils.StringSchema, in),
                                      read(null, eValue, in));
                }
            } while ((l = in.mapNext()) > 0);
        }
        return map;
    }

    /**
     * Called to create an enum value. Overridden to create a pig string.
     */
    @Override
    protected Object createEnum(String symbol, Schema schema) {
        return symbol;
    }

    /**
     * Called by the default implementation of {@link #readArray} to retrieve a
     * value from a reused instance.
     */
    @Override
    protected Object peekArray(Object array) {
        return null;
    }

    /**
     * Called by the default implementation of {@link #readArray} to add a
     * value. Overridden to append to pig bag.
     */
    @Override
    protected void addToArray(Object array, long pos, Object e) {
        if (e instanceof Tuple) {
            ((DataBag) array).add((Tuple) e);
        } else {
            Tuple t = new DefaultTuple();
            t.append(e);
            ((DataBag) array).add(t);
        }
    }

    /**
     * Called to read a fixed value. Overridden to read a pig byte array.
     */
    @Override
    protected Object readFixed(Object old, Schema expected, Decoder in) throws IOException {
        GenericFixed fixed = (GenericFixed) super.readFixed(old, expected, in);
        DataByteArray byteArray = new DataByteArray(fixed.bytes());
        return byteArray;
     }

    /**
     * Called to create new record instances. Overridden to return a new tuple.
     */
    @Override
    protected Object newRecord(Object old, Schema schema) {
        return TupleFactory.getInstance().newTuple();
    }

    /**
     * Called to create new array instances. Overridden to return a new bag.
     */
    @Override
    protected Object newArray(Object old, int size, Schema schema) {
        return BagFactory.getInstance().newDefaultBag();
    }

    /**
     * Called to read strings. Overridden to return a pig string.
     */
    @Override
    protected Object readString(Object old, Schema expected, Decoder in) throws IOException {
        return super.readString(old, expected, in).toString();
    }

    /**
     * Called to read byte arrays. Overridden to return a pig byte array.
     */
    @Override
    protected Object readBytes(Object old, Decoder in) throws IOException {
        ByteBuffer buf = (ByteBuffer) super.readBytes(old, in);
        DataByteArray byteArray = new DataByteArray(buf.array());
        return byteArray;
    }

}
TOP

Related Classes of org.apache.pig.piggybank.storage.avro.PigAvroDatumReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.