Package com.inadco.hbl.math.pig

Source Code of com.inadco.hbl.math.pig.ExpAvgCombine

package com.inadco.hbl.math.pig;

import java.io.IOException;
import java.lang.reflect.Type;

import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;

import com.inadco.hbl.math.aggregators.IrregularSamplingSummarizer;

/**
* This function accepts arguments -- bag of byte arrays -- representing
* {@link IrregularSamplingSummarizer}'s serialized state and combines them to
* produce one {@link IrregularSamplingSummarizer} representing unified history.
* The summarizers are though to have been accumulated separate <b>disjoint</b>
* subsets of the history.
* <P>
*
*
* @author dmitriy
*
*/

public class ExpAvgCombine extends EvalFunc<DataByteArray> {

    private final DataInputBuffer              m_dib = new DataInputBuffer();
    private final DataOutputBuffer             m_dob = new DataOutputBuffer();
    private final IrregularSamplingSummarizer  m_summarizer1;
    private final IrregularSamplingSummarizer  m_summarizer2;
   

   

    /**
     * Serializer's init string. however, it doesn't
     * require anything but type here since this function
     * technically never creates initialized instances,
     * only buffers ("bare" instances).
     *
     * @param initStr
     */
    public ExpAvgCombine(String initStr ) {
        super();
        m_summarizer1=PigSummarizerHelper.createSummarizer(initStr, true);
        m_summarizer2=PigSummarizerHelper.createSummarizer(initStr, true);
       
    }

    @Override
    public DataByteArray exec(Tuple input) throws IOException {
        IrregularSamplingSummarizer result = null;
        for (Object obj : input.getAll()) {
            if ( obj == null ) continue; // one of arguments is null, ignore
           
            byte[] bytes = DataType.toBytes(obj);
            m_dib.reset(bytes, bytes.length);
            IrregularSamplingSummarizer sum =
                PigSummarizerHelper.bytes2ser(result == null ? m_summarizer1 : m_summarizer2, m_dib);
            if (result == null)
                result = sum;
            else
                result.combine(sum);
        }
        DataOutputBuffer dob = PigSummarizerHelper.ser2bytes(result, m_dob);
        return dob == null ? null : new DataByteArray(dob.getData(), 0, dob.getLength());
    }

    @Override
    public Schema outputSchema(Schema input) {
        for (FieldSchema fs : input.getFields())
            if (fs.type != DataType.BYTEARRAY)
                throw new IllegalArgumentException("Invalid input schema for ExpAvgCombine function.");

        return super.outputSchema(input);
    }

    @Override
    public Type getReturnType() {
        return DataByteArray.class;
    }

}
TOP

Related Classes of com.inadco.hbl.math.pig.ExpAvgCombine

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.