Package org.apache.pig.test

Source Code of org.apache.pig.test.TestBuiltin

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;

import java.io.File;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import junit.framework.TestCase;

import org.junit.Test;

import org.apache.pig.Algebraic;
import org.apache.pig.FilterFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.LoadFunc;
import org.apache.pig.PigServer;
import org.apache.pig.ExecType;
import org.apache.pig.EvalFunc;
import org.apache.pig.StoreFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.builtin.*;
import org.apache.pig.data.*;
import org.apache.pig.data.DefaultAbstractBag.BagDelimiterTuple;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
import org.apache.pig.impl.PigContext;

public class TestBuiltin extends TestCase {
   
    private String initString = "mapreduce";
    //private String initString = "local";
    MiniCluster cluster = MiniCluster.buildCluster();

    TupleFactory tupleFactory = DefaultTupleFactory.getInstance();
    BagFactory bagFactory = DefaultBagFactory.getInstance();
   
    // some inputs
    private static Integer[] intInput = { 3, 1, 2, 4, 5, 7, null, 6, 8, 9, 10 };
    private static Long[] intAsLong = { 3L, 1L, 2L, 4L, 5L, 7L, null, 6L, 8L, 9L, 10L };
   
    private static Long[] longInput = { 145769183483345L, null, 4345639849L, 3435543121L, 2L, 5L, 9L, 7L, 8L, 6L, 10L };
   
    private static Float[] floatInput = { 10.4f, 2.35f, 3.099f, null, 4.08495f, 5.350f, 6.78f, 7.0f, 8.0f, 9.0f, 0.09f };
    private static Double[] floatAsDouble = { 10.4, 2.35, 3.099, null, 4.08495, 5.350, 6.78, 7.0, 8.0, 9.0, 0.09 };
   
    private static Double[] doubleInput = { 5.5673910, 121.0, 3.0, 0.000000834593, 1.0, 6.0, 7.0, 8.0, 9.0, 10.0, null };
   
    private static String[] ba = { "7", "2", "3", null, "4", "5", "6", "1", "8", "9", "10"};
    private static Double[] baAsDouble = { 7.0, 2.0, 3.0, null, 4.0, 5.0, 6.0, 1.0, 8.0, 9.0, 10.0};
   
    private static String[] stringInput = {"unit", "test", null, "input", "string"};
    private static DataByteArray[] ByteArrayInput = Util.toDataByteArrays(ba);

    // The HashMaps below are used to set up the appropriate EvalFunc,
    // the allowed input and expected output for the different aggregate functions
    // which have different implementations for different input types
    // This way rather than quickly exploding the test cases (one per input type
    // per aggregate), all cases for a given aggregate stage are handled
    // in one test case in a loop
   
    // A mapping between name of Aggregate function to its corresponding EvalFunc object
    private static HashMap<String, EvalFunc<?>> evalFuncMap = new HashMap<String, EvalFunc<?>>();
   
    // A mapping between a type name (example: "Integer") and a tuple containing
    // a bag of inputs of that type
    private static HashMap<String, Tuple> inputMap = new HashMap<String, Tuple>();
   
    // A mapping between name of Aggregate function and the input type of its
    // argument
    private static HashMap<String, String> allowedInput = new HashMap<String, String>();
   
    // A mapping between name of Aggregate function and the output value (based on the
    // inputs above)
    private static HashMap<String, Object> expectedMap = new HashMap<String, Object>();
   
    String[] stages = {"Initial", "Intermediate", "Final"};
   
    String[][] aggs = {
            {"SUM", "IntSum", "LongSum", "FloatSum", "DoubleSum"},
            {"AVG", "IntAvg", "LongAvg", "FloatAvg", "DoubleAvg"},
            {"MIN", "IntMin", "LongMin", "FloatMin", "DoubleMin", "StringMin"},
            {"MAX", "IntMax", "LongMax", "FloatMax", "DoubleMax", "StringMax"},
            {"COUNT"},
            };
   
    String[] inputTypeAsString = {"ByteArray", "Integer", "Long", "Float", "Double", "String" };
   
    @Override
    public void setUp() {
      
        // First set up data structs for "base" SUM, MIN and MAX and AVG.
        // The allowed input and expected output data structs for
        // the "Intermediate" and "Final" stages can be based on the
        // "base" case - the allowed inputs for Initial stage can be based
        // on the "base" case.  In the test cases, the
        // output of Initial is sent to Intermediate, so we don't
        // explicitly test the output of Initial and hence do not
        // need to set up expectedMap.
       
        // first set up EvalFuncMap and expectedMap
        setupEvalFuncMap();
       
        expectedMap.put("SUM", new Double(55));
        expectedMap.put("DoubleSum", new Double(170.567391834593));
        expectedMap.put("IntSum", new Long(55));
        expectedMap.put("LongSum", new Long(145776964666362L));
        expectedMap.put("FloatSum", new Double(56.15395));

        expectedMap.put("AVG", new Double(5.5));
        expectedMap.put("DoubleAvg", new Double(17.0567391834593));
        expectedMap.put("LongAvg", new Double(14577696466636.2));
        expectedMap.put("IntAvg", new Double(5.5));
        expectedMap.put("FloatAvg", new Double(5.615394958853722));
       
        expectedMap.put("MIN", new Double(1));
        expectedMap.put("IntMin", new Integer(1));
        expectedMap.put("LongMin", new Long(2));
        expectedMap.put("FloatMin", new Float(0.09f));
        expectedMap.put("DoubleMin", new Double(0.000000834593));
        expectedMap.put("StringMin", "input");
       
        expectedMap.put("MAX", new Double(10));
        expectedMap.put("IntMax", new Integer(10));
        expectedMap.put("LongMax", new Long(145769183483345L));
        expectedMap.put("FloatMax", new Float(10.4f));
        expectedMap.put("DoubleMax", new Double(121.0));
        expectedMap.put("StringMax", "unit");
       
        expectedMap.put("COUNT", new Long(10));

        // set up allowedInput
        for (String[] aggGroups : aggs) {
            int i = 0;
            for(String agg: aggGroups) {
                allowedInput.put(agg, inputTypeAsString[i++]);
            }
        }
       
        // The idea here is that we can reuse the same input
        // and expected output of the algebraic functions
        // for their Intermediate and Final Stages. For the
        // Initial stage we can reuse the input of the algebraic
        // function.
       
        for (String[] aggGroups : aggs) {
            for(String agg: aggGroups) {
                for (String stage : stages) {
                    if(stage.equals("Initial")) {
                        // For the Initial function, the input should match the input
                        // for the aggregate function itself. In the test cases, the
                        // output of Initial is sent to Intermediate, so we don't
                        // explicitly test the output of Initial and hence do not
                        // need to set up expectedMap.
                        allowedInput.put(agg + stage, allowedInput.get(agg));
                    } else {
                        // For IntSumIntermediate and IntSumFinal and
                        // FloatSumIntermediate and FloatSumFinal, the input is expected
                        // be of types Long and Double respectively (Initial version
                        // of these functions is supposed to convert the Int to Long
                        // and Float to Double respectively) - Likewise for SUMIntermediate
                        // and SumFinal the input is expected to be Double - The Initial
                        // version is supposed to convert byteArrays to Double
                        if((agg).equals("IntSum") || (agg).equals("IntAvg")) {
                            allowedInput.put(agg + stage, "IntegerAsLong");
                        } else if ((agg).equals("FloatSum") || agg.equals("FloatAvg")) {
                            allowedInput.put(agg + stage, "FloatAsDouble");
                        }else if ((agg).equals("MIN") || agg.equals("MAX") || 
                                (agg.equals("SUM")) || agg.equals("AVG")) {
                            // For MIN and MAX the Intermediate and Final functions
                            // expect input to be Doubles (Initial is supposed to
                            // convert the ByteArray to Double)
                            allowedInput.put(agg + stage, "ByteArrayAsDouble");
                        } else {
                            // In all other cases, the input and expected output
                            // for "Intermediate" and "Final" stages should match the input
                            // and expected output for the aggregate function itself
                            allowedInput.put(agg + stage, allowedInput.get(agg));
                           
                        }
                        // For Average, we set up expectedMap only for the "Final" stage
                        // For other aggs, set up expected Map for both "Intermediate"
                        // and "Final"
                        if(! agg.matches("(?i)avg") || stage.equals("Final")) {
                            expectedMap.put(agg + stage, expectedMap.get(agg));
                        }
                    }
                }
            }
        }
       
        // For Avg, the expected output (for the sum part) for Intermediate are the
        // same as SUM - so handled a little differently accordingly
        expectedMap.put("AVGIntermediate", expectedMap.get("SUM"));
        expectedMap.put("DoubleAvgIntermediate", expectedMap.get("DoubleSum"));
        expectedMap.put("LongAvgIntermediate", expectedMap.get("LongSum"));
        expectedMap.put("IntAvgIntermediate", expectedMap.get("IntSum"));
        expectedMap.put("FloatAvgIntermediate", expectedMap.get("FloatSum"));
       
        // set up input hash
        try{
            inputMap.put("Integer", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), intInput));
            inputMap.put("IntegerAsLong", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), intAsLong));
            inputMap.put("Long", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), longInput));
            inputMap.put("Float", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), floatInput));
            inputMap.put("FloatAsDouble", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), floatAsDouble));
            inputMap.put("Double", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), doubleInput));
            inputMap.put("ByteArray", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), ByteArrayInput));
            inputMap.put("ByteArrayAsDouble", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), baAsDouble));
            inputMap.put("String", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), stringInput));
           
        }catch(ExecException e) {
            e.printStackTrace();
        }
    }
     
    /**
     *
     */
    private void setupEvalFuncMap() {

        for (String[] aggGroup : aggs) {
            for (String agg : aggGroup) {
                // doing this as a two step process because PigContext.instantiateFuncFromSpec("SUM.Intermediate")
                // fails with class resolution error.
                EvalFunc<?> func = (EvalFunc<?>)PigContext.instantiateFuncFromSpec(agg);
                evalFuncMap.put(agg, func);
                evalFuncMap.put(agg + "Initial", (EvalFunc<?>)PigContext.instantiateFuncFromSpec(((Algebraic)func).getInitial()));
                evalFuncMap.put(agg + "Intermediate", (EvalFunc<?>)PigContext.instantiateFuncFromSpec(((Algebraic)func).getIntermed()));
                evalFuncMap.put(agg + "Final", (EvalFunc<?>)PigContext.instantiateFuncFromSpec(((Algebraic)func).getFinal()));
            }
        }
    }

    /**
     * Test the case where the combiner is not called - so initial is called
     * and then final is called
     * @throws Exception
     */
    @Test
    public void testAggNoCombine() throws Exception {
       
        for (String[] aggGroup : aggs) {
            String[] aggFinalTypes = null; // will contains AVGFinal, DoubleAvgFinal etc
            String[] aggInitialTypes = null; // will contains AVGInitial, DoubleAvgInitial etc
           
            for (String stage: stages) {
                String[] aggTypesArray = null;
                if(stage.equals("Initial")) {
                    aggInitialTypes = new String[aggGroup.length];
                    aggTypesArray = aggInitialTypes;
                } else  if(stage.equals("Final")) {
                    aggFinalTypes = new String[aggGroup.length];
                    aggTypesArray = aggFinalTypes;
                } else { // Intermediate
                    continue;
                }
                for (int i = 0; i < aggTypesArray.length; i++) {
                    aggTypesArray[i] = aggGroup[i] + stage;
                }
            }

            for(int k = 0; k < aggFinalTypes.length; k++) {
                EvalFunc<?> avgInitial = evalFuncMap.get(aggInitialTypes[k]);
                Tuple tup = inputMap.get(getInputType(aggInitialTypes[k]));
               
                // To test this case, first AVGInitial is called for each input
                // value and output of it is put into a bag. The bag containing
                // all AVGInitial output is provided as input to AVGFinal
               
                // The tuple we got above has a bag with input
                // values. Lets call AVGInitial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag  finalInputBg = bagFactory.newDefaultBag();
                for (Tuple tuple : bg) {
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    initialInputBg.add(tuple);
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                    finalInputBg.add((Tuple)avgInitial.exec(initialInputTuple));
                }
   
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                String msg = "[Testing " + aggGroup[k] + " on input type: " + getInputType(aggFinalTypes[k]);
                System.err.println(msg + " for no combiner case]");
                Object output = aggFinal.exec(finalInputTuple);
                msg += " ( (output) " + output + " == " + getExpected(aggFinalTypes[k]) + " (expected) )]";
                // for doubles, precisions can be a problem - so check
                // if the type is double for expected result and check
                // within some precision
                if(getExpected(aggFinalTypes[k]) instanceof Double) {
                    assertEquals(msg, (Double)getExpected(aggFinalTypes[k]), (Double)output, 0.00001);
                } else {
                    assertEquals(msg, getExpected(aggFinalTypes[k]), output);
                }
            }   
        }
    }
  
    /**
     * Test the case where the combiner is called once - so initial is called
     * and then Intermediate and then final is called
     * @throws Exception
     */
    @Test
    public void testAggSingleCombine() throws Exception {
       
        for (String[] aggGroup : aggs) {
            String[] aggFinalTypes = null; // will contains AVGFinal, DoubleAvgFinal etc
            String[] aggInitialTypes = null; // will contains AVGInitial, DoubleAvgInitial etc
            String[] aggIntermediateTypes = null; // will contains AVGIntermediate, DoubleAvgIntermediate etc
            for (String stage: stages) {
                String[] aggTypesArray = null;
                if(stage.equals("Initial")) {
                    aggInitialTypes = new String[aggGroup.length];
                    aggTypesArray = aggInitialTypes;
                } else if (stage.equals("Intermediate")) {
                    aggIntermediateTypes = new String[aggGroup.length];
                    aggTypesArray = aggIntermediateTypes;
                } else  {// final
                    aggFinalTypes = new String[aggGroup.length];
                    aggTypesArray = aggFinalTypes;
                }

                for (int i = 0; i < aggTypesArray.length; i++) {
                    aggTypesArray[i] = aggGroup[i] + stage;
                }
            }
            for(int k = 0; k < aggFinalTypes.length; k++) {
                EvalFunc<?> aggInitial = evalFuncMap.get(aggInitialTypes[k]);
                Tuple tup = inputMap.get(getInputType(aggInitialTypes[k]));
                // To test this case, first <Agg>Initial is called for each input
                // value. The output from <Agg>Initial for the first half of inputs is
                // put into one bag and the next half into another. Then these two
                // bags are provided as inputs to two separate calls of <Agg>Intermediate.
                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                // The tuple we got above has a bag with input
                // values. Lets call <Agg>Initial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag  intermediateInputBg1 = bagFactory.newDefaultBag();
                DataBag  intermediateInputBg2 = bagFactory.newDefaultBag();
                int i = 0;
                for (Tuple tuple : bg) {
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    initialInputBg.add(tuple);
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                    if(i < bg.size()/2) {
                        intermediateInputBg1.add((Tuple)aggInitial.exec(initialInputTuple));
                    } else {
                        intermediateInputBg2.add((Tuple)aggInitial.exec(initialInputTuple));
                    }
                    i++;
                }

                EvalFunc<?> avgIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag finalInputBg = bagFactory.newDefaultBag();
                Tuple intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg1);
                finalInputBg.add((Tuple)avgIntermediate.exec(intermediateInputTuple));
                intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg2);
                finalInputBg.add((Tuple)avgIntermediate.exec(intermediateInputTuple));
               
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                String msg = "[Testing " + aggGroup[k] + " on input type: " + getInputType(aggFinalTypes[k]);
                System.err.println(msg + " for single combiner case]");
                Object output = aggFinal.exec(finalInputTuple);
                msg += " ( (output) " + output + " == " + getExpected(aggFinalTypes[k]) + " (expected) )]";
                // for doubles, precisions can be a problem - so check
                // if the type is double for expected result and check
                // within some precision
                if(getExpected(aggFinalTypes[k]) instanceof Double) {
                    assertEquals(msg, (Double)getExpected(aggFinalTypes[k]), (Double)output, 0.00001);
                } else {
                    assertEquals(msg, getExpected(aggFinalTypes[k]), output);
                }
            }   
        }
   
    }
        
   
    /**
     * Test the case where the combiner is called more than once - so initial is called
     * and then Intermediate called couple of times and then final is called
     * @throws Exception
     */
    @Test
    public void testAggMultipleCombine() throws Exception {
       
        for (String[] aggGroup : aggs) {
            String[] aggFinalTypes = null; // will contains AVGFinal, DoubleAvgFinal etc
            String[] aggInitialTypes = null; // will contains AVGInitial, DoubleAvgInitial etc
            String[] aggIntermediateTypes = null; // will contains AVGIntermediate, DoubleAvgIntermediate etc
            for (String stage: stages) {
                String[] aggTypesArray = null;
                if(stage.equals("Initial")) {
                    aggInitialTypes = new String[aggGroup.length];
                    aggTypesArray = aggInitialTypes;
                } else if (stage.equals("Intermediate")) {
                    aggIntermediateTypes = new String[aggGroup.length];
                    aggTypesArray = aggIntermediateTypes;
                } else  {// final
                    aggFinalTypes = new String[aggGroup.length];
                    aggTypesArray = aggFinalTypes;
                }

                for (int i = 0; i < aggTypesArray.length; i++) {
                    aggTypesArray[i] = aggGroup[i] + stage;
                }
            }
            for(int k = 0; k < aggFinalTypes.length; k++) {
                EvalFunc<?> aggInitial = evalFuncMap.get(aggInitialTypes[k]);
                Tuple tup = inputMap.get(getInputType(aggInitialTypes[k]));
                // To test this case, first <Agg>Initial is called for each input
                // value. The output from <Agg>Initial for quarter of values from
                // the inputs is put into one bag. Then 4 calls are made to Intermediate
                // with each bag going to one call. This simulates the call in the map-combine
                // boundary. The outputs from the first two calls to Intermediate above are
                // put into a bag and the output from the next two calls put into another bag.
                // These two bags are provided as inputs to two separate calls of <Agg>Intermediate.
                // This simulates the call in the combine-reduce boundary.
                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                // The tuple we got above has a bag with input
                // values. Lets call <Agg>Initial with each value:
                DataBag bg = (DataBag) tup.get(0);
                DataBag[]  mapIntermediateInputBgs = new DataBag[4];
                for (int i = 0; i < mapIntermediateInputBgs.length; i++) {
                    mapIntermediateInputBgs[i] = bagFactory.newDefaultBag();
                }
                Iterator<Tuple> it = bg.iterator();
                for(int i = 0; i < 4; i++) {
                    for(int j = 0; j < bg.size()/4; j++) {
                        DataBag initialInputBg = bagFactory.newDefaultBag();
                        initialInputBg.add(it.next());
                        Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                        mapIntermediateInputBgs[i].add((Tuple)aggInitial.exec(initialInputTuple));
                    }
                    if(i == 3) {
                        // if the last quarter has more elements process them
                        while(it.hasNext()) {
                            DataBag initialInputBg = bagFactory.newDefaultBag();
                            initialInputBg.add(it.next());
                            Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                            mapIntermediateInputBgs[i].add((Tuple)aggInitial.exec(initialInputTuple));
                        }
                    }
                }

                EvalFunc<?> aggIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag[] reduceIntermediateInputBgs = new DataBag[2];
                for (int i = 0; i < reduceIntermediateInputBgs.length; i++) {
                    reduceIntermediateInputBgs[i] = bagFactory.newDefaultBag();                   
                }

                // simulate call to combine after map
                for(int i = 0; i < 4; i++) {
                    Tuple intermediateInputTuple = tupleFactory.newTuple(mapIntermediateInputBgs[i]);
                    if(i < 2) {
                        reduceIntermediateInputBgs[0].add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                    } else {
                        reduceIntermediateInputBgs[1].add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                    }
                }
              
                DataBag finalInputBag = bagFactory.newDefaultBag();
                // simulate call to combine before reduce
                for(int i = 0; i < 2; i++) {
                    Tuple intermediateInputTuple = tupleFactory.newTuple(reduceIntermediateInputBgs[i]);
                    finalInputBag.add((Tuple)aggIntermediate.exec(intermediateInputTuple));
                }
               
                // simulate call to final (in reduce)
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBag);
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                String msg = "[Testing " + aggGroup[k] + " on input type: " + getInputType(aggFinalTypes[k]);
                System.err.println(msg + " for multiple combiner case]");
                Object output = aggFinal.exec(finalInputTuple);
                msg += " ( (output) " + output + " == " + getExpected(aggFinalTypes[k]) + " (expected) )]";
                // for doubles, precisions can be a problem - so check
                // if the type is double for expected result and check
                // within some precision
                if(getExpected(aggFinalTypes[k]) instanceof Double) {
                    assertEquals(msg, (Double)getExpected(aggFinalTypes[k]), (Double)output, 0.00001);
                } else {
                    assertEquals(msg, getExpected(aggFinalTypes[k]), output);
                }
            }   
        }
   
    }
   
    /**
     * Test the case where an empty bag is given as input to
     * the Initial function and the output is fed to Intermediate
     * function whose output is fed to the Final function
     * @throws Exception
     */
    @Test
    public void testAggEmptyBagWithCombiner() throws Exception {
       
        for (String[] aggGroup : aggs) {
            String[] aggFinalTypes = null; // will contains AVGFinal, DoubleAvgFinal etc
            String[] aggInitialTypes = null; // will contains AVGInitial, DoubleAvgInitial etc
            String[] aggIntermediateTypes = null; // will contains AVGIntermediate, DoubleAvgIntermediate etc
            for (String stage: stages) {
                String[] aggTypesArray = null;
                if(stage.equals("Initial")) {
                    aggInitialTypes = new String[aggGroup.length];
                    aggTypesArray = aggInitialTypes;
                } else if (stage.equals("Intermediate")) {
                    aggIntermediateTypes = new String[aggGroup.length];
                    aggTypesArray = aggIntermediateTypes;
                } else  {// final
                    aggFinalTypes = new String[aggGroup.length];
                    aggTypesArray = aggFinalTypes;
                }

                for (int i = 0; i < aggTypesArray.length; i++) {
                    aggTypesArray[i] = aggGroup[i] + stage;
                }
            }
            for(int k = 0; k < aggFinalTypes.length; k++) {
                EvalFunc<?> aggInitial = evalFuncMap.get(aggInitialTypes[k]);
                // To test this case, first <Agg>Initial is called with an empty bag
                // as input. This is done in two ierations of 5 calls.
                // The output from <Agg>Initial for the first half of inputs is
                // put into one bag and the next half into another. Then these two
                // bags are provided as inputs to two separate calls of <Agg>Intermediate.
                // The outputs from the two calls to <Agg>Intermediate are put into a bag
                // and sent as input to <Agg>Final
               
                DataBag  intermediateInputBg1 = bagFactory.newDefaultBag();
                DataBag  intermediateInputBg2 = bagFactory.newDefaultBag();
                Tuple outputTuple = null;
                for(int i = 0; i < 10; i++) {
                    // create empty bag input to be provided as input
                    // argument to the "Initial" function
                    DataBag initialInputBg = bagFactory.newDefaultBag();
                    Tuple initialInputTuple = tupleFactory.newTuple(initialInputBg);
                   
                    if(i < 5) {
                        outputTuple = (Tuple)aggInitial.exec(initialInputTuple);
                        // check that output is null for all aggs except COUNT
                        // COUNT will give an output of 0 for empty bag input
                        checkZeroOrNull(aggInitial, outputTuple.get(0));
                        intermediateInputBg1.add(outputTuple);
                    } else {
                        outputTuple = (Tuple)aggInitial.exec(initialInputTuple);
                        // check that output is null for all aggs except COUNT
                        // COUNT will give an output of 0 for empty bag input
                        checkZeroOrNull(aggInitial, outputTuple.get(0));
                        intermediateInputBg2.add(outputTuple);
                    }
                }

                EvalFunc<?> aggIntermediate = evalFuncMap.get(aggIntermediateTypes[k]);
                DataBag finalInputBg = bagFactory.newDefaultBag();
                Tuple intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg1);
                outputTuple = (Tuple)aggIntermediate.exec(intermediateInputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(aggIntermediate, outputTuple.get(0));
                finalInputBg.add(outputTuple);
                intermediateInputTuple = tupleFactory.newTuple(intermediateInputBg2);
                outputTuple = (Tuple)aggIntermediate.exec(intermediateInputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(aggIntermediate, outputTuple.get(0));
                finalInputBg.add(outputTuple);
               
                Tuple finalInputTuple = tupleFactory.newTuple(finalInputBg);
               
                EvalFunc<?> aggFinal = evalFuncMap.get(aggFinalTypes[k]);
                Object output = aggFinal.exec(finalInputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(aggFinal, output);
            }   
        }
   
    }

    /**
     * Test the case where an empty bag is given as input to the non
     * combiner version of aggregate functions
     * @throws Exception if there are issues executing the aggregate function
     */
    @Test
    public void testAggEmptyBag() throws Exception {
       
        for (String[] aggGroup : aggs) {
           
            for(int k = 0; k < aggGroup.length; k++) {
                EvalFunc<?> agg = evalFuncMap.get(aggGroup[k]);

                // call agg with empty bag as input
                DataBag inputBag = bagFactory.newDefaultBag();
                Tuple inputTuple = tupleFactory.newTuple(inputBag);
               
                Object output = agg.exec(inputTuple);
                // check that output is null for all aggs except COUNT
                // COUNT will give an output of 0 for empty bag input
                checkZeroOrNull(agg, output);
            }   
        }
   
    }

    private void checkZeroOrNull(EvalFunc<?> func, Object output) {
        if(func.getClass().getName().contains("COUNT")) {
            assertEquals(new Long(0), output);
        } else {
            assertEquals(null, output);
        }
    }


    // Builtin MATH Functions
    // =======================
    @Test
    public void testAVG() throws Exception {
        String[] avgTypes = {"AVG", "DoubleAvg", "LongAvg", "IntAvg", "FloatAvg"};
        for(int k = 0; k < avgTypes.length; k++) {
            EvalFunc<?> avg = evalFuncMap.get(avgTypes[k]);
            Tuple tup = inputMap.get(getInputType(avgTypes[k]));
            Object output = avg.exec(tup);
            String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
                         output + " == " + getExpected(avgTypes[k]) + " (expected) )]";
            assertEquals(msg, (Double)output, (Double)getExpected(avgTypes[k]), 0.00001);
           
        }
    }

    @Test
    public void testAVGIntermediate() throws Exception {
        String[] avgTypes = {"AVGIntermediate", "DoubleAvgIntermediate", "LongAvgIntermediate", "IntAvgIntermediate", "FloatAvgIntermediate"};
        for(int k = 0; k < avgTypes.length; k++) {
            EvalFunc<?> avg = evalFuncMap.get(avgTypes[k]);
            String inputType = getInputType(avgTypes[k]);
            Tuple tup = inputMap.get(inputType);
            // The tuple we got above has a bag with input
            // values. Input to the Intermediate.exec() however comes
            // from the map which would put each value and a count of
            // 1 in a tuple and send it down. So lets create a bag with
            // tuples that have two fields - the value and a count 1.
            DataBag bag = (DataBag) tup.get(0);
            DataBag  bg = bagFactory.newDefaultBag();
            for (Tuple t: bag) {
                Tuple newTuple = tupleFactory.newTuple(2);
                newTuple.set(0, t.get(0));
                newTuple.set(1, new Long(1));
                bg.add(newTuple);               
            }
            Tuple intermediateInput = tupleFactory.newTuple();
            intermediateInput.append(bg);
           
            Object output = avg.exec(intermediateInput);
           
            if(inputType == "Long" || inputType == "Integer" || inputType == "IntegerAsLong") {
                Long l = (Long)((Tuple)output).get(0);
                String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
                              l + " == " + getExpected(avgTypes[k]) + " (expected) )]";
                assertEquals(msg, (Long)getExpected(avgTypes[k]), l);
            } else {
                Double f1 = (Double)((Tuple)output).get(0);
                String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
                               f1 + " == " + getExpected(avgTypes[k]) + " (expected) )]";
                assertEquals(msg, (Double)getExpected(avgTypes[k]), f1, 0.00001);
            }
            Long f2 = (Long)((Tuple)output).get(1);
            assertEquals("[Testing " + avgTypes[k] + " on input type: "+
                inputType+"]Expected count to be 11", 11, f2.longValue());
        }
    }
   
    @Test
    public void testAVGFinal() throws Exception {
        String[] avgTypes = {"AVGFinal", "DoubleAvgFinal", "LongAvgFinal", "IntAvgFinal", "FloatAvgFinal"};
        String[] avgIntermediateTypes = {"AVGIntermediate", "DoubleAvgIntermediate", "LongAvgIntermediate", "IntAvgIntermediate", "FloatAvgIntermediate"};
        for(int k = 0; k < avgTypes.length; k++) {
            EvalFunc<?> avg = evalFuncMap.get(avgTypes[k]);
            Tuple tup = inputMap.get(getInputType(avgTypes[k]));
           
            // To test AVGFinal, AVGIntermediate should first be called and
            // the output of AVGIntermediate should be supplied as input to
            // AVGFinal. To simulate this, we will call Intermediate twice
            // on the above tuple and collect the outputs and pass it to
            // Final.
           
            // get the right "Intermediate" EvalFunc
            EvalFunc<?> avgIntermediate = evalFuncMap.get(avgIntermediateTypes[k]);
            // The tuple we got above has a bag with input
            // values. Input to the Intermediate.exec() however comes
            // from the map which would put each value and a count of
            // 1 in a tuple and send it down. So lets create a bag with
            // tuples that have two fields - the value and a count 1.
            // The input has 10 values - lets put the first five of them
            // in the input to the first call of AVGIntermediate and the
            // remaining five in the second call.
            DataBag bg = (DataBag) tup.get(0);
            DataBag  bg1 = bagFactory.newDefaultBag();
            DataBag  bg2 = bagFactory.newDefaultBag();
            int i = 0;
            for (Tuple t: bg) {
                Tuple newTuple = tupleFactory.newTuple(2);
                newTuple.set(0, t.get(0));
                if ( t.get(0) == null)
                    newTuple.set(1, new Long(0));
                else
                    newTuple.set(1, new Long(1));
                if(i < 5) {
                    bg1.add(newTuple);
                } else {
                    bg2.add(newTuple);
                }
                i++;
            }
            Tuple intermediateInput1 = tupleFactory.newTuple();
            intermediateInput1.append(bg1);
            Object output1 = avgIntermediate.exec(intermediateInput1);
            Tuple intermediateInput2 = tupleFactory.newTuple();
            intermediateInput2.append(bg2);
            Object output2 = avgIntermediate.exec(intermediateInput2);
           
            DataBag bag = Util.createBag(new Tuple[]{(Tuple)output1, (Tuple)output2});
           
            Tuple finalTuple = TupleFactory.getInstance().newTuple(1);
            finalTuple.set(0, bag);
            Object output = avg.exec(finalTuple);
            String msg = "[Testing " + avgTypes[k] + " on input type: " + getInputType(avgTypes[k]) + " ( (output) " +
            output + " == " + getExpected(avgTypes[k]) + " (expected) )]";
            assertEquals(msg, (Double)getExpected(avgTypes[k]), (Double)output, 0.00001);
        }   
    }

    @Test
    public void testCOUNT() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, null };
        long expected = input.length - 1;

        EvalFunc<Long> count = new COUNT();
        Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
        Long output = count.exec(tup);

        assertTrue(output == expected);
    }

    @Test
    public void testCOUNTIntermed() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
       
        DataBag intermediateInputBag = bagFactory.newDefaultBag();
        // call initial and then Intermed
        for (Integer i : input) {
            Tuple t = tupleFactory.newTuple(i);
            DataBag b = bagFactory.newDefaultBag();
            b.add(t);
            Tuple initialInput = tupleFactory.newTuple(b);
            EvalFunc<?> initial = new COUNT.Initial();
            intermediateInputBag.add((Tuple)initial.exec(initialInput));
        }

        EvalFunc<Tuple> countIntermed = new COUNT.Intermediate();
        Tuple intermediateInput = tupleFactory.newTuple(intermediateInputBag);
        Tuple output = countIntermed.exec(intermediateInput);

        Long f1 = DataType.toLong(output.get(0));
        assertEquals("Expected count to be 10", 10, f1.longValue());
    }

    @Test
    public void testCOUNTFinal() throws Exception {
        long input[] = { 23, 38, 39 };
        Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);

        EvalFunc<Long> count = new COUNT.Final();
        Long output = count.exec(tup);

        assertEquals("Expected count to be 100", 100, output.longValue());
    }

    @Test
    public void testCOUNT_STAR() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, null };
        long expected = input.length;

        EvalFunc<Long> count = new COUNT_STAR();
        Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);
        Long output = count.exec(tup);

        assertTrue(output == expected);
    }

    @Test
    public void testCOUNT_STARIntermed() throws Exception {
        Integer input[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
       
        DataBag intermediateInputBag = bagFactory.newDefaultBag();
        // call initial and then Intermed
        for (Integer i : input) {
            Tuple t = tupleFactory.newTuple(i);
            DataBag b = bagFactory.newDefaultBag();
            b.add(t);
            Tuple initialInput = tupleFactory.newTuple(b);
            EvalFunc<?> initial = new COUNT_STAR.Initial();
            intermediateInputBag.add((Tuple)initial.exec(initialInput));
        }

        EvalFunc<Tuple> countIntermed = new COUNT_STAR.Intermediate();
        Tuple intermediateInput = tupleFactory.newTuple(intermediateInputBag);
        Tuple output = countIntermed.exec(intermediateInput);

        Long f1 = DataType.toLong(output.get(0));
        assertEquals("Expected count to be 10", 10, f1.longValue());
    }

    @Test
    public void testCOUNT_STARFinal() throws Exception {
        long input[] = { 23, 38, 39 };
        Tuple tup = Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), input);

        EvalFunc<Long> count = new COUNT_STAR.Final();
        Long output = count.exec(tup);

        assertEquals("Expected count to be 100", 100, output.longValue());
    }

    @Test
    public void testSUM() throws Exception {
        String[] sumTypes = {"SUM", "DoubleSum", "LongSum", "IntSum", "FloatSum"};
        for(int k = 0; k < sumTypes.length; k++) {
            EvalFunc<?> sum = evalFuncMap.get(sumTypes[k]);
            String inputType = getInputType(sumTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = sum.exec(tup);

            String msg = "[Testing " + sumTypes[k] + " on input type: " + getInputType(sumTypes[k]) + " ( (output) " +
                          output + " == " + getExpected(sumTypes[k]) + " (expected) )]";
           
            if(inputType == "Integer" || inputType == "Long") {
                assertEquals(msg, (Long)output, (Long)getExpected(sumTypes[k]), 0.00001);
            } else {
                assertEquals(msg, (Double)output, (Double)getExpected(sumTypes[k]), 0.00001);
            }
        }
    }

    @Test
    public void testSUMIntermed() throws Exception {
        String[] sumTypes = {"SUMIntermediate", "DoubleSumIntermediate", "LongSumIntermediate", "IntSumIntermediate", "FloatSumIntermediate"};
        for(int k = 0; k < sumTypes.length; k++) {
            EvalFunc<?> sum = evalFuncMap.get(sumTypes[k]);
            String inputType = getInputType(sumTypes[k]);
           
            Tuple tup = inputMap.get(inputType);
            Object output = sum.exec(tup);
           
            String msg = "[Testing " + sumTypes[k] + " on input type: " + getInputType(sumTypes[k]) + " ( (output) " +
                            ((Tuple)output).get(0) + " == " + getExpected(sumTypes[k]) + " (expected) )]";
            if(inputType.equals("Integer") || inputType.equals("Long") || inputType.equals("IntegerAsLong")) {
              assertEquals(msg, (Long) ((Tuple)output).get(0), (Long)getExpected(sumTypes[k]), 0.00001);
            } else {
              assertEquals(msg, (Double) ((Tuple)output).get(0), (Double)getExpected(sumTypes[k]), 0.00001);
            }
        }
    }

    @Test
    public void testSUMFinal() throws Exception {
        String[] sumTypes = {"SUMFinal", "DoubleSumFinal", "LongSumFinal", "IntSumFinal", "FloatSumFinal"};
        for(int k = 0; k < sumTypes.length; k++) {
            EvalFunc<?> sum = evalFuncMap.get(sumTypes[k]);
            String inputType = getInputType(sumTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = sum.exec(tup);
           
            String msg = "[Testing " + sumTypes[k] + " on input type: " + getInputType(sumTypes[k]) + " ( (output) " +
            output + " == " + getExpected(sumTypes[k]) + " (expected) )]";

            if(inputType.equals("Integer") || inputType.equals("Long") || inputType.equals("IntegerAsLong")) {
              assertEquals(msg, (Long)output, (Long)getExpected(sumTypes[k]), 0.00001);
            } else {
              assertEquals(msg, (Double)output, (Double)getExpected(sumTypes[k]), 0.00001);
            }
        }
    }

    @Test
    public void testMIN() throws Exception {
        String[] minTypes = {"MIN", "LongMin", "IntMin", "FloatMin"};
        for(int k = 0; k < minTypes.length; k++) {
            EvalFunc<?> min = evalFuncMap.get(minTypes[k]);
            String inputType = getInputType(minTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = min.exec(tup);

            String msg = "[Testing " + minTypes[k] + " on input type: " + getInputType(minTypes[k]) + " ( (output) " +
                           output + " == " + getExpected(minTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)output, (Double)getExpected(minTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)output, (Long)getExpected(minTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)output, (Integer)getExpected(minTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)output, (Double)getExpected(minTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)output, (Float)getExpected(minTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)output, (String)getExpected(minTypes[k]));
            }
        }
    }


    @Test
    public void testMINIntermediate() throws Exception {
       
        String[] minTypes = {"MINIntermediate", "LongMinIntermediate", "IntMinIntermediate", "FloatMinIntermediate"};
        for(int k = 0; k < minTypes.length; k++) {
            EvalFunc<?> min = evalFuncMap.get(minTypes[k]);
            String inputType = getInputType(minTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = min.exec(tup);

            String msg = "[Testing " + minTypes[k] + " on input type: " + getInputType(minTypes[k]) + " ( (output) " +
                           ((Tuple)output).get(0) + " == " + getExpected(minTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)((Tuple)output).get(0), (Double)getExpected(minTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)((Tuple)output).get(0), (Long)getExpected(minTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)((Tuple)output).get(0), (Integer)getExpected(minTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)((Tuple)output).get(0), (Double)getExpected(minTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)((Tuple)output).get(0), (Float)getExpected(minTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)((Tuple)output).get(0), (String)getExpected(minTypes[k]));
            }
        }
    }

    @Test
    public void testMINFinal() throws Exception {
        String[] minTypes = {"MINFinal", "LongMinFinal", "IntMinFinal", "FloatMinFinal"};
        for(int k = 0; k < minTypes.length; k++) {
            EvalFunc<?> min = evalFuncMap.get(minTypes[k]);
            String inputType = getInputType(minTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = min.exec(tup);

            String msg = "[Testing " + minTypes[k] + " on input type: " + getInputType(minTypes[k]) + " ( (output) " +
                           output + " == " + getExpected(minTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)output, (Double)getExpected(minTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)output, (Long)getExpected(minTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)output, (Integer)getExpected(minTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)output, (Double)getExpected(minTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)output, (Float)getExpected(minTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)output, (String)getExpected(minTypes[k]));
            }
        }
    }

    @Test
    public void testMAX() throws Exception {
       
        String[] maxTypes = {"MAX", "LongMax", "IntMax", "FloatMax"};
        for(int k = 0; k < maxTypes.length; k++) {
            EvalFunc<?> max = evalFuncMap.get(maxTypes[k]);
            String inputType = getInputType(maxTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = max.exec(tup);

            String msg = "[Testing " + maxTypes[k] + " on input type: " + getInputType(maxTypes[k]) + " ( (output) " +
                           output + " == " + getExpected(maxTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)output, (Double)getExpected(maxTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)output, (Long)getExpected(maxTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)output, (Integer)getExpected(maxTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)output, (Double)getExpected(maxTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)output, (Float)getExpected(maxTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)output, (String)getExpected(maxTypes[k]));
            }
        }
    }


    @Test
    public void testMAXIntermed() throws Exception {
       
        String[] maxTypes = {"MAXIntermediate", "LongMaxIntermediate", "IntMaxIntermediate", "FloatMaxIntermediate"};
        for(int k = 0; k < maxTypes.length; k++) {
            EvalFunc<?> max = evalFuncMap.get(maxTypes[k]);
            String inputType = getInputType(maxTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = max.exec(tup);

            String msg = "[Testing " + maxTypes[k] + " on input type: " + getInputType(maxTypes[k]) + " ( (output) " +
                           ((Tuple)output).get(0) + " == " + getExpected(maxTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)((Tuple)output).get(0), (Double)getExpected(maxTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)((Tuple)output).get(0), (Long)getExpected(maxTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)((Tuple)output).get(0), (Integer)getExpected(maxTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)((Tuple)output).get(0), (Double)getExpected(maxTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)((Tuple)output).get(0), (Float)getExpected(maxTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)((Tuple)output).get(0), (String)getExpected(maxTypes[k]));
            }
        }
    }

    @Test
    public void testMAXFinal() throws Exception {
       
        String[] maxTypes = {"MAXFinal", "LongMaxFinal", "IntMaxFinal", "FloatMaxFinal"};
        for(int k = 0; k < maxTypes.length; k++) {
            EvalFunc<?> max = evalFuncMap.get(maxTypes[k]);
            String inputType = getInputType(maxTypes[k]);
            Tuple tup = inputMap.get(inputType);
            Object output = max.exec(tup);

            String msg = "[Testing " + maxTypes[k] + " on input type: " + getInputType(maxTypes[k]) + " ( (output) " +
                           output + " == " + getExpected(maxTypes[k]) + " (expected) )]";

            if(inputType == "ByteArray") {
              assertEquals(msg, (Double)output, (Double)getExpected(maxTypes[k]));
            } else if(inputType == "Long") {
                assertEquals(msg, (Long)output, (Long)getExpected(maxTypes[k]));
            } else if(inputType == "Integer") {
                assertEquals(msg, (Integer)output, (Integer)getExpected(maxTypes[k]));
            } else if (inputType == "Double") {
                assertEquals(msg, (Double)output, (Double)getExpected(maxTypes[k]));
            } else if (inputType == "Float") {
                assertEquals(msg, (Float)output, (Float)getExpected(maxTypes[k]));
            } else if (inputType == "String") {
                assertEquals(msg, (String)output, (String)getExpected(maxTypes[k]));
            }
        }

    }
   
   
    @Test
    public void testDistinct() throws Exception {
   
        Integer[] inp = new Integer[] { 1, 2 , 3, 1 ,4, 5, 3};
        DataBag inputBag = Util.createBagOfOneColumn(inp);
        EvalFunc<Tuple> initial = new Distinct.Initial();
        DataBag intermedInputBg1 = bagFactory.newDefaultBag();
        DataBag intermedInputBg2 = bagFactory.newDefaultBag();
        int i = 0;
        for (Tuple t : inputBag) {
            Tuple initialOutput = initial.exec(tupleFactory.newTuple(t));
            if(i < inp.length/2 ) {
                intermedInputBg1.add(initialOutput);
            } else {
                intermedInputBg2.add(initialOutput);
            }
            i++;
        }
       
        EvalFunc<Tuple> intermed = new Distinct.Intermediate();
       
        DataBag finalInputBg = bagFactory.newDefaultBag();
        finalInputBg.add(intermed.exec(tupleFactory.newTuple(intermedInputBg1)));
        finalInputBg.add(intermed.exec(tupleFactory.newTuple(intermedInputBg2)));
        EvalFunc<DataBag> fin = new Distinct.Final();
        DataBag result = fin.exec(tupleFactory.newTuple(finalInputBg));
       
        Integer[] exp = new Integer[] { 1, 2, 3, 4, 5};
        DataBag expectedBag = Util.createBagOfOneColumn(exp);
        assertEquals(expectedBag, result);
       
    }   

    @Test
    public void testDistinctProgressNonAlgebraic() throws Exception {

        //This test is for the exec method in Distinct which is not
        //called currently.

        int inputSize = 2002;
        Integer[] inp = new Integer[inputSize];
        for(int i = 0; i < inputSize; i+=2) {
            inp[i] = i/2;
            inp[i+1] = i/2;
        }

        DataBag inputBag = Util.createBagOfOneColumn(inp);
        EvalFunc<DataBag> distinct = new Distinct();
        DataBag result = distinct.exec(tupleFactory.newTuple(inputBag));
       
        Integer[] exp = new Integer[inputSize/2];
        for(int j = 0; j < inputSize/2; ++j) {
            exp[j] = j;
        }

        DataBag expectedBag = Util.createBagOfOneColumn(exp);
        assertEquals(expectedBag, result);
       
    }
   
    @Test
    public void testCONCAT() throws Exception {
       
        // DataByteArray concat
        byte[] a = {1,2,3};
        byte[] b = {4,5,6};
        byte[] expected = {1,2,3,4,5,6};
        DataByteArray dbaExpected = new DataByteArray(expected);
       
        DataByteArray dbaA = new DataByteArray(a);
        DataByteArray dbaB = new DataByteArray(b);
        EvalFunc<DataByteArray> concat = new CONCAT();
       
        Tuple t = TupleFactory.getInstance().newTuple(2);
        t.set(0, dbaA);
        t.set(1, dbaB);
        DataByteArray result = concat.exec(t);
        String msg = "[Testing CONCAT on input type: bytearray]";
        assertTrue(msg, result.equals(dbaExpected));
       
        // String concat
        String s1 = "unit ";
        String s2 = "test";
        String exp = "unit test";
        EvalFunc<String> sConcat = new StringConcat();
        Tuple ts = TupleFactory.getInstance().newTuple(2);
        ts.set(0, s1);
        ts.set(1, s2);
        String res = sConcat.exec(ts);
        msg = "[Testing StringConcat on input type: String]";
        assertTrue(msg, res.equals(exp));
       
    }

    @Test
    public void testSIZE() throws Exception {
       
        // DataByteArray size
        byte[] a = {1,2,3};
        DataByteArray dba = new DataByteArray(a);
        Long expected = new Long(3);
        Tuple t = TupleFactory.getInstance().newTuple(1);
        t.set(0, dba);
        EvalFunc<Long> size = new SIZE();       
        String msg = "[Testing SIZE on input type: bytearray]";
        assertTrue(msg, expected.equals(size.exec(t)));
       
        // String size
        String s = "Unit test case";
        expected = new Long(14);
        t.set(0, s);
        size = new StringSize();
        msg = "[Testing StringSize on input type: String]";
        assertTrue(msg, expected.equals(size.exec(t)));
       
        // Map size
        String[] mapContents = new String[]{"key1", "value1", "key2", "value2"};
        Map<String, Object> map = Util.createMap(mapContents);
        expected = new Long(2);
        t.set(0, map);
        size = new MapSize();
        msg = "[Testing MapSize on input type: Map]";
        assertTrue(msg, expected.equals(size.exec(t)));
       
        // Bag size
        Tuple t1 = Util.createTuple(new String[]{"a", "b", "c"});
        Tuple t2 = Util.createTuple(new String[]{"d", "e", "f"});
        Tuple t3 = Util.createTuple(new String[]{"g", "h", "i"});
        Tuple t4 = Util.createTuple(new String[]{"j", "k", "l"});
        DataBag b = Util.createBag(new Tuple[]{t1, t2, t3, t4});
        expected = new Long(4);
        t.set(0, b);
        size = new BagSize();
        msg = "[Testing BagSize on input type: Bag]";
        assertTrue(msg, expected.equals(size.exec(t)));
       
       
        // Tuple size
        expected = new Long(3);
        size = new TupleSize();
        msg = "[Testing TupleSize on input type: Tuple]";
        assertTrue(msg, expected.equals(size.exec(t1)));
       
        // Test for ARITY function.
        // It is depricated but we still need to make sure it works
        ARITY arrity = new ARITY();
        msg = "[Testing ARRITY on input type: Tuple]";
        assertTrue(msg, expected.equals(new Long(arrity.exec(t1))));
    }

    // Builtin APPLY Functions
    // ========================

   


    // Builtin LOAD Functions
    // =======================
    @Test
    public void testLFPig() throws Exception {
        String input1 = "this:is:delimited:by:a:colon\n";
        int arity1 = 6;

        LoadFunc p1 = new PigStorage(":");
        FakeFSInputStream ffis1 = new FakeFSInputStream(input1.getBytes());
        p1.bindTo(null, new BufferedPositionedInputStream(ffis1), 0, input1.getBytes().length);
        Tuple f1 = p1.getNext();
        assertTrue(f1.size() == arity1);

        LoadFunc p15 = new PigStorage();
        StringBuilder sb = new StringBuilder();
        int LOOP_COUNT = 100;
        for (int i = 0; i < LOOP_COUNT; i++) {
            for (int j = 0; j < LOOP_COUNT; j++) {
                sb.append(i + "\t" + i + "\t" + j % 2 + "\n");
            }
        }
        byte bytes[] = sb.toString().getBytes();
        FakeFSInputStream ffis15 = new FakeFSInputStream(bytes);
        p15.bindTo(null, new BufferedPositionedInputStream(ffis15), 0, bytes.length);
        int count = 0;
        while (true) {
            Tuple f15 = p15.getNext();
            if (f15 == null)
                break;
            count++;
            assertEquals(3, f15.size());
        }
        assertEquals(LOOP_COUNT * LOOP_COUNT, count);

        String input2 = ":this:has:a:leading:colon\n";
        int arity2 = 6;

        LoadFunc p2 = new PigStorage(":");
        FakeFSInputStream ffis2 = new FakeFSInputStream(input2.getBytes());
        p2.bindTo(null, new BufferedPositionedInputStream(ffis2), 0, input2.getBytes().length);
        Tuple f2 = p2.getNext();
        assertTrue(f2.size() == arity2);

        String input3 = "this:has:a:trailing:colon:\n";
        int arity3 = 6;

        LoadFunc p3 = new PigStorage(":");
        FakeFSInputStream ffis3 = new FakeFSInputStream(input3.getBytes());
        p3.bindTo(null, new BufferedPositionedInputStream(ffis3), 0, input1.getBytes().length);
        Tuple f3 = p3.getNext();
        assertTrue(f3.size() == arity3);
    }

    /*
    @Test
    public void testLFBin() throws Exception {

        BagFactory.init(new File("/tmp"));
       
       
        Tuple t1 = new Tuple(4);
        DataAtom a = new DataAtom("a");
        DataAtom b = new DataAtom("b");
        Tuple t2 = new Tuple(1);
        t2.setField(0,a);
        Tuple t3 = new Tuple(1);
        t3.setField(0, b);
        DataBag bag = BagFactory.getInstance().getNewBigBag();
        bag.add(t2);
        bag.add(t3);
        Tuple t4 = new Tuple(2);
        t4.setField(0, t2);
        t4.setField(1, t3);
       
        t1.setField(0, a);
        t1.setField(1, t2);
        t1.setField(2, bag);
        t1.setField(3, t4);
       
        Tuple t5 = new Tuple(4);
        DataAtom c = new DataAtom("the quick brown fox");
        DataAtom d = new DataAtom("jumps over the lazy dog");
        Tuple t6 = new Tuple(1);
        t6.setField(0,c);
        Tuple t7 = new Tuple(1);
        t7.setField(0, d);
        DataBag bag2 = BagFactory.getInstance().getNewBigBag();   
        for(int i = 0; i < 10; i ++) {
            bag2.add(t6);
            bag2.add(t7);
        }
        Tuple t8 = new Tuple(2);
        t8.setField(0, t6);
        t8.setField(1, t7);
       
        t5.setField(0, c);
        t5.setField(1, t6);
        t5.setField(2, bag2);
        t5.setField(3, t8);
       
       
        OutputStream os = new FileOutputStream("/tmp/bintest.bin");
        StoreFunc s = new BinStorage();
        s.bindTo(os);
        s.putNext(t1);
        s.putNext(t5);
        s.finish();
       
        LoadFunc l = new BinStorage();
        InputStream is = FileLocalizer.open("/tmp/bintest.bin", new PigContext(ExecType.LOCAL));
        l.bindTo("/tmp/bintest.bin", new BufferedPositionedInputStream(is), 0, Long.MAX_VALUE);
        Tuple r1 = l.getNext();
        Tuple r2 = l.getNext();
       
        assertTrue(r1.equals(t1));
        assertTrue(r2.equals(t5));
    }
    */

   
    @Test
    public void testLFText() throws Exception {
        String input1 = "This is some text.\nWith a newline in it.\n";
        String expected1 = "This is some text.";
        String expected2 = "With a newline in it.";
        FakeFSInputStream ffis1 = new FakeFSInputStream(input1.getBytes());
        LoadFunc text1 = new TextLoader();
        text1.bindTo(null, new BufferedPositionedInputStream(ffis1), 0, input1.getBytes().length);
        Tuple f1 = text1.getNext();
        Tuple f2 = text1.getNext();
        assertTrue(expected1.equals(f1.get(0).toString()) &&
            expected2.equals(f2.get(0).toString()));

        String input2 = "";
        FakeFSInputStream ffis2 = new FakeFSInputStream(input2.getBytes());
        LoadFunc text2 = new TextLoader();
        text2.bindTo(null, new BufferedPositionedInputStream(ffis2), 0, input2.getBytes().length);
        Tuple f3 = text2.getNext();
        assertTrue(f3 == null);
    }

    @Test
    public void testSFPig() throws Exception {
        byte[] buf = new byte[1024];
        FakeFSOutputStream os = new FakeFSOutputStream(buf);
        StoreFunc sfunc = new PigStorage("\t");
        sfunc.bindTo(os);

        DataByteArray[] input = { new DataByteArray("amy"),
            new DataByteArray("bob"), new DataByteArray("charlene"),
            new DataByteArray("david"), new DataByteArray("erin"),
            new DataByteArray("frank") };
        Tuple f1 = Util.loadTuple(TupleFactory.getInstance().newTuple(input.length), input);

        sfunc.putNext(f1);
        sfunc.finish();
       
        FakeFSInputStream is = new FakeFSInputStream(buf);
        LoadFunc lfunc = new PigStorage();
        lfunc.bindTo(null, new BufferedPositionedInputStream(is), 0, buf.length);
        Tuple f2 = lfunc.getNext();
       
        assertTrue(f1.equals(f2));       
    }
   
    @Test
    public void testTOKENIZE() throws Exception {
        TupleFactory tf = TupleFactory.getInstance();
        Tuple t1 = tf.newTuple(1);
        t1.set(0, "123 456\"789");
        Tuple t2 = tf.newTuple(1);
        t2.set(0, null);
        Tuple t3 = tf.newTuple(0);
       
        TOKENIZE f = new TOKENIZE();
        DataBag b = f.exec(t1);
        assertTrue(b.size()==3);
        Iterator<Tuple> i = b.iterator();
        Tuple rt = i.next();
        assertTrue(rt.get(0).equals("123"));
        rt = i.next();
        assertTrue(rt.get(0).equals("456"));
        rt = i.next();
        assertTrue(rt.get(0).equals("789"));
       
        b = f.exec(t2);
        assertTrue(b==null);
       
        b = f.exec(t3);
        assertTrue(b==null);
    }

    @Test
    public void testDIFF() throws Exception {
        // Test it in the case with two bags.
        BagFactory bf = BagFactory.getInstance();
        TupleFactory tf = TupleFactory.getInstance();

        DataBag b1 = bf.newDefaultBag();
        DataBag b2 = bf.newDefaultBag();
        for (int i = 0; i < 10; i++) b1.add(tf.newTuple(new Integer(i)));
        for (int i = 0; i < 10; i += 2) b2.add(tf.newTuple(new Integer(i)));
        Tuple t = tf.newTuple(2);
        t.set(0, b1);
        t.set(1, b2);
        DIFF d = new DIFF();
        DataBag result = d.exec(t);

        assertEquals(5, result.size());
        Iterator<Tuple> i = result.iterator();
        int[] values = new int[5];
        for (int j = 0; j < 5; j++) values[j] = (Integer)i.next().get(0);
        Arrays.sort(values);
        for (int j = 1; j < 10; j += 2) assertEquals(j, values[j/2]);

        // Test it in the case of two objects that are equals
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(1));
        result = d.exec(t);
        assertEquals(0, result.size());

        // Test it in the case of two objects that are not equal
        t = tf.newTuple(2);
        t.set(0, new Integer(1));
        t.set(1, new Integer(2));
        result = d.exec(t);
        assertEquals(2, result.size());
    }
   
    private static String getInputType(String typeFor) {
        return allowedInput.get(typeFor);
    }

    /**
     * @param expectedFor functionName for which expected result is sought
     * @return Object appropriate expected result
     */
    private Object getExpected(String expectedFor) {
        return expectedMap.get(expectedFor);
    }



}
TOP

Related Classes of org.apache.pig.test.TestBuiltin

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.