Package com.inadco.hbl.math.pig

Source Code of com.inadco.hbl.math.pig.PigSummarizerHelper

package com.inadco.hbl.math.pig;

import java.io.IOException;
import java.util.Properties;

import org.apache.commons.lang.Validate;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Writable;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;

import com.inadco.hbl.math.aggregators.IrregularSamplingSummarizer;
import com.inadco.hbl.math.aggregators.OnlineCannyAvgSummarizer;
import com.inadco.hbl.math.aggregators.OnlineCannyRateSummarizer;
import com.inadco.hbl.math.aggregators.OnlineExpAvgSummarizer;
import com.inadco.hbl.math.aggregators.OnlineExpBiasedBinomialSummarizer;
import com.inadco.hbl.math.aggregators.OnlineExpRateSummarizer;

/**
* Helper to parse/bootstrap stuff
*
* @author dmitriy
*
*/

public final class PigSummarizerHelper {

    private PigSummarizerHelper() {
    }

    /**
     * Parse init string passed to summarizer functions.
     * <P>
     *
     * The init string is a name=value pairs, space/tab separated.
     *
     * @param str
     *            init string
     * @return properties parsed out as sting values only.
     */
    public static Properties parseInitString(String str) {

        Properties result = new Properties();
        for (String nvstr : str.split("[ |\\t]+")) {
            String[] nv = nvstr.split("=");
            Validate.isTrue(nv.length == 2, "configuration properties should be name=value pairs");
            Validate.isTrue(!result.contains(nv[0]), "duplicate parameter passed in");
            result.put(nv[0], nv[1]);
        }

        return result;

    }

    public static IrregularSamplingSummarizer createSummarizer(String initString, boolean bareInstance) {

        Properties props = parseInitString(initString);
        String type = props.getProperty("type");
        Validate.notNull(type, "type parameter is required");
        type = type.toUpperCase();

        SupportedIrregularsEnum typeEnum;
        try {
            typeEnum = SupportedIrregularsEnum.valueOf(type);
        } catch (IllegalArgumentException exc) {
            throw new IllegalArgumentException(String.format("Summarizer type \"%s\" is not supported.", type));
        }

        switch (typeEnum) {
        case EXP_AVG:
            return createExpAvgSum(props, bareInstance);
        case EXP_BIASED_BINOMIAL:
            return createExpBinomialSum(props, bareInstance);
        case EXP_RATE:
            return createExpRateSum(props, bareInstance);
        case CANNY_AVG:
            return createCannyAvgSum(props, bareInstance);
        case CANNY_RATE:
            return createCannyRateSum(props, bareInstance);

        }
        return null;
    }

    public static enum SupportedIrregularsEnum {
        EXP_AVG, EXP_BIASED_BINOMIAL, EXP_RATE, CANNY_AVG, CANNY_RATE
    }

    static OnlineExpAvgSummarizer createExpAvgSum(Properties props, boolean bareInstance) {

        if (bareInstance)
            return new OnlineExpAvgSummarizer();

        double dt;
        double m = OnlineExpAvgSummarizer.DEFAULT_HISTORY_MARGIN;

        if (props.containsKey("m"))
            m = Double.parseDouble(props.getProperty("m"));

        Validate.isTrue(props.containsKey("dt"), "dt parameter is required for exp avg summarizer");
        dt = Double.parseDouble(props.getProperty("dt"));

        return new OnlineExpAvgSummarizer(dt, m);

    }

    static OnlineCannyAvgSummarizer createCannyAvgSum(Properties props, boolean bareInstance) {
        if (bareInstance)
            return new OnlineCannyAvgSummarizer();

        double dt;
        double m = OnlineCannyAvgSummarizer.DEFAULT_MARGIN;
        double k = OnlineCannyAvgSummarizer.DEFAULT_K;

        if (props.containsKey("m"))
            m = Double.parseDouble(props.getProperty("m"));
        if (props.containsKey("k"))
            k = Double.parseDouble(props.getProperty("k"));

        Validate.isTrue(props.containsKey("dt"), "dt parameter is required for exp avg summarizer");
        dt = Double.parseDouble(props.getProperty("dt"));

        return new OnlineCannyAvgSummarizer(dt, m, k);

    }

    static OnlineCannyRateSummarizer createCannyRateSum(Properties props, boolean bareInstance) {
        double dt;
        double m = OnlineCannyAvgSummarizer.DEFAULT_MARGIN;
        double k = OnlineCannyAvgSummarizer.DEFAULT_K;

        if (props.containsKey("m"))
            m = Double.parseDouble(props.getProperty("m"));
        if (props.containsKey("k"))
            k = Double.parseDouble(props.getProperty("k"));

        Validate.isTrue(props.containsKey("dt"), "dt parameter is required for exp avg summarizer");
        dt = Double.parseDouble(props.getProperty("dt"));

        return new OnlineCannyRateSummarizer(dt, m, k);

    }

    static OnlineExpBiasedBinomialSummarizer createExpBinomialSum(Properties props,
                                                                                   boolean bareInstance) {

        if (bareInstance)
            return new OnlineExpBiasedBinomialSummarizer();

        double dt;
        double m = OnlineExpBiasedBinomialSummarizer.DEFAULT_HISTORY_MARGIN;
        double p0;
        double epsilon = OnlineExpBiasedBinomialSummarizer.DEFAULT_EPSILON;

        if (props.containsKey("m"))
            m = Double.parseDouble(props.getProperty("m"));
        if (props.containsKey("epsilon"))
            epsilon = Double.parseDouble(props.getProperty("epsilon"));

        Validate.isTrue(props.containsKey("dt"), "dt parameter is required for exp binomial summarizer");
        dt = Double.parseDouble(props.getProperty("dt"));

        Validate.isTrue(props.containsKey("p0"), "p0 parameter is required for exp binomial summarizer");
        p0 = Double.parseDouble(props.getProperty("p0"));

        return new OnlineExpBiasedBinomialSummarizer(p0, epsilon, dt, m);
    }

    static OnlineExpRateSummarizer createExpRateSum(Properties props, boolean bareInstance) {

        if (bareInstance)
            return new OnlineExpRateSummarizer();

        double dt;
        double m = OnlineExpRateSummarizer.DEFAULT_HISTORY_MARGIN;

        if (props.containsKey("m"))
            m = Double.parseDouble(props.getProperty("m"));

        Validate.isTrue(props.containsKey("dt"), "dt parameter is required for exp rate summarizer");
        dt = Double.parseDouble(props.getProperty("dt"));

        return new OnlineExpRateSummarizer(dt, m);
    }

    static <T extends Writable> DataOutputBuffer ser2bytes(T summarizer, DataOutputBuffer dob) throws IOException {
        if (summarizer == null)
            return null;
        dob.reset();
        summarizer.write(dob);
        dob.close();
        return dob;
    }

    /**
     *
     * @param holderInstance
     * @param dib
     *            input
     * @return
     * @throws IOException
     */
    static IrregularSamplingSummarizer bytes2ser(IrregularSamplingSummarizer holderInstance, DataInputBuffer dib)
        throws IOException {
        if (dib == null)
            return null;
        holderInstance.readFields(dib);
        return holderInstance;
    }

    /**
     * combines bunch of serialized summarizer states in bag parameter onto
     * existing summarizer (if any)
     *
     * @param bag
     * @param dib
     *            temporary buffer used in deserializing bagged summarizers
     * @param summarizerInitStr
     *            init string for new summarizers
     * @param summarizer
     *            pre-existing sum, if any
     * @param buffer
     *            temp buffer to deserialize bagged summarizers
     * @return combined summarizer (summarizer or new summarizer if
     *         summarizer==null)
     * @throws IOException
     */
    static IrregularSamplingSummarizer combine(Tuple bag,
                                               DataInputBuffer dib,
                                               String summarizerInitStr,
                                               IrregularSamplingSummarizer summarizer,
                                               IrregularSamplingSummarizer buffer) throws IOException {

        DataBag db = DataType.toBag(bag.get(0));
        for (Tuple t : db) {
            byte[] dba = DataType.toBytes(t.get(0));
            dib.reset(dba, dba.length);
            IrregularSamplingSummarizer sum = PigSummarizerHelper.bytes2ser(buffer, dib);
            if (summarizer == null) {
                summarizer = PigSummarizerHelper.createSummarizer(summarizerInitStr, true);
                summarizer.assign(sum);
            } else
                summarizer.combine(sum);
        }
        return summarizer;

    }

}
TOP

Related Classes of com.inadco.hbl.math.pig.PigSummarizerHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.