Package org.apache.pig.test

Source Code of org.apache.pig.test.TestMRCompiler

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;

import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.Random;

import org.apache.pig.ComparisonFunc;
import org.apache.pig.ExecType;
import org.apache.pig.FuncSpec;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.builtin.AVG;
import org.apache.pig.builtin.COUNT;
import org.apache.pig.builtin.SUM;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.builtin.GFCross;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompilerException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PlanPrinter;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.*;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserComparisonFunc;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POUserFunc;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.test.TestPOSort.WeirdComparator;
import org.apache.pig.test.utils.GenPhyOp;
import org.apache.pig.test.utils.LogicalPlanTester;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
* Test cases to test the MRCompiler.
* VERY IMPORTANT NOTE: The tests here compare results with a
* "golden" set of outputs. In each testcase here, the operators
* generated have a random operator key which uses Java's Random
* class. So if there is a code change which changes the number of
* operators created in a plan, then not only will the "golden" file
* for that test case need to be changed, but also for the tests
* that follow it since the operator keys that will be generated through
* Random will be different.
*/
public class TestMRCompiler extends junit.framework.TestCase {
    static MiniCluster cluster = MiniCluster.buildCluster();
   
    static PigContext pc;
    static PigContext pcMR;

    static final int MAX_SIZE = 100000;

    static final long SEED = 1013;
   
    static Random r;
    static{
        pc = new PigContext(ExecType.LOCAL, new Properties());
        pcMR = new PigContext(ExecType.MAPREDUCE, cluster.getProperties());
        try {
            pc.connect();
        } catch (ExecException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        r = new Random(SEED);
    }
   
    LogicalPlanTester planTester = new LogicalPlanTester(pc) ;
    LogicalPlanTester planTesterMR = new LogicalPlanTester(pcMR) ;

    // if for some reason, the golden files need
    // to be regenerated, set this to true - THIS
    // WILL OVERWRITE THE GOLDEN FILES - So use this
    // with caution and only for the testcases you need
    // and are sure of
    private boolean generate = false;

    @Override
    @Before
    public void setUp() throws ExecException {
        GenPhyOp.setR(r);
       
        GenPhyOp.setPc(pc);
        NodeIdGenerator.getGenerator().reset("");
    }

    @Override
    @After
    public void tearDown() throws Exception {
    }
   
    @Test
    public void testRun1() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan part1 = new PhysicalPlan();
        POLoad lC = GenPhyOp.topLoadOp();
        POFilter fC = GenPhyOp.topFilterOp();
        fC.setRequestedParallelism(20);
        POLocalRearrange lrC = GenPhyOp.topLocalRearrangeOp();
        lrC.setRequestedParallelism(10);
        POGlobalRearrange grC = GenPhyOp.topGlobalRearrangeOp();
        POPackage pkC = GenPhyOp.topPackageOp();
        part1.add(lC);
        part1.add(fC);
        part1.connect(lC, fC);
        part1.add(lrC);
        part1.connect(fC, lrC);
        part1.add(grC);
        part1.connect(lrC, grC);
        part1.add(pkC);
        part1.connect(grC, pkC);

        POPackage pkD = GenPhyOp.topPackageOp();
        pkD.setRequestedParallelism(20);
        POLocalRearrange lrD = GenPhyOp.topLocalRearrangeOp();
        lrD.setRequestedParallelism(30);
        POGlobalRearrange grD = GenPhyOp.topGlobalRearrangeOp();
        POLoad lD = GenPhyOp.topLoadOp();
        part1.add(lD);
        part1.add(lrD);
        part1.connect(lD, lrD);

        part1.add(grD);
        part1.connect(lrD, grD);
        part1.add(pkD);
        part1.connect(grD, pkD);

        POPackage pkCD = GenPhyOp.topPackageOp();
        POLocalRearrange lrCD1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lrCD2 = GenPhyOp.topLocalRearrangeOp();
        POGlobalRearrange grCD = GenPhyOp.topGlobalRearrangeOp();
        part1.add(lrCD1);
        part1.add(lrCD2);
        part1.connect(pkC, lrCD1);
        part1.connect(pkD, lrCD2);
        part1.add(grCD);
        part1.connect(lrCD1, grCD);
        part1.connect(lrCD2, grCD);
        part1.add(pkCD);
        part1.connect(grCD, pkCD);

        POLoad lA = GenPhyOp.topLoadOp();
        POLoad lB = GenPhyOp.topLoadOp();

        // POLoad lC = lA;
        POFilter fA = GenPhyOp.topFilterOp();

        POLocalRearrange lrA = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lrB = GenPhyOp.topLocalRearrangeOp();

        POGlobalRearrange grAB = GenPhyOp.topGlobalRearrangeOp();

        POPackage pkAB = GenPhyOp.topPackageOp();

        POFilter fAB = GenPhyOp.topFilterOp();
        POUnion unABC = GenPhyOp.topUnionOp();

        php.add(lA);
        php.add(lB);

        php.add(fA);

        php.connect(lA, fA);

        php.add(lrA);
        php.add(lrB);

        php.connect(fA, lrA);
        php.connect(lB, lrB);

        php.add(grAB);
        php.connect(lrA, grAB);
        php.connect(lrB, grAB);

        php.add(pkAB);
        php.connect(grAB, pkAB);

        php.add(fAB);
        php.connect(pkAB, fAB);

        php.merge(part1);

        List<PhysicalOperator> leaves = new ArrayList<PhysicalOperator>();
        for (PhysicalOperator phyOp : php.getLeaves()) {
            leaves.add(phyOp);
        }

        php.add(unABC);
        for (PhysicalOperator physicalOperator : leaves) {
            php.connect(physicalOperator, unABC);
        }

        POStore st = GenPhyOp.topStoreOp();

        php.add(st);
        php.connect(unABC, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC10.gld");
    }

    public void testRun2() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan part1 = new PhysicalPlan();
        POLoad lC = GenPhyOp.topLoadOp();
        POFilter fC = GenPhyOp.topFilterOp();
        POLocalRearrange lrC = GenPhyOp.topLocalRearrangeOp();
        POGlobalRearrange grC = GenPhyOp.topGlobalRearrangeOp();
        POPackage pkC = GenPhyOp.topPackageOp();
        part1.add(lC);
        part1.add(fC);
        part1.connect(lC, fC);
        part1.add(lrC);
        part1.connect(fC, lrC);
        part1.add(grC);
        part1.connect(lrC, grC);
        part1.add(pkC);
        part1.connect(grC, pkC);

        POPackage pkD = GenPhyOp.topPackageOp();
        POLocalRearrange lrD = GenPhyOp.topLocalRearrangeOp();
        POGlobalRearrange grD = GenPhyOp.topGlobalRearrangeOp();
        POLoad lD = GenPhyOp.topLoadOp();
        part1.add(lD);
        part1.add(lrD);
        part1.connect(lD, lrD);

        part1.add(grD);
        part1.connect(lrD, grD);
        part1.add(pkD);
        part1.connect(grD, pkD);
        part1.connect(pkD, grC);

        POLoad lA = GenPhyOp.topLoadOp();
        POLoad lB = GenPhyOp.topLoadOp();

        // POLoad lC = lA;
        POFilter fA = GenPhyOp.topFilterOp();

        POLocalRearrange lrA = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lrB = GenPhyOp.topLocalRearrangeOp();

        POGlobalRearrange grAB = GenPhyOp.topGlobalRearrangeOp();

        POPackage pkAB = GenPhyOp.topPackageOp();

        POFilter fAB = GenPhyOp.topFilterOp();
        POUnion unABC = GenPhyOp.topUnionOp();

        php.add(lA);
        php.add(lB);

        php.add(fA);

        php.connect(lA, fA);

        php.add(lrA);
        php.add(lrB);

        php.connect(fA, lrA);
        php.connect(lB, lrB);

        php.add(grAB);
        php.connect(lrA, grAB);
        php.connect(lrB, grAB);

        php.add(pkAB);
        php.connect(grAB, pkAB);

        php.add(fAB);
        php.connect(pkAB, fAB);

        php.merge(part1);

        List<PhysicalOperator> leaves = new ArrayList<PhysicalOperator>();
        for (PhysicalOperator phyOp : php.getLeaves()) {
            leaves.add(phyOp);
        }

        php.add(unABC);
        for (PhysicalOperator physicalOperator : leaves) {
            php.connect(physicalOperator, unABC);
        }

        POStore st = GenPhyOp.topStoreOp();

        php.add(st);
        php.connect(unABC, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC11.gld");
    }

    public void testSpl1() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        POLoad lA = GenPhyOp.topLoadOp();
        POSplit spl = GenPhyOp.topSplitOp();
        php.add(lA);
        php.add(spl);
        php.connect(lA, spl);

        POFilter fl1 = GenPhyOp.topFilterOp();
        POFilter fl2 = GenPhyOp.topFilterOp();
        php.add(fl1);
        php.add(fl2);
        php.connect(spl, fl1);
        php.connect(spl, fl2);

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
        php.add(lr1);
        php.add(lr2);
        php.connect(fl1, lr1);
        php.connect(fl2, lr2);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.add(gr);
        php.connect(lr1, gr);
        php.connect(lr2, gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.add(pk);
        php.connect(gr, pk);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);
        php.connect(pk, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC12.gld");

    }

    public void testSpl2() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        POLoad lA = GenPhyOp.topLoadOp();
        POSplit spl = GenPhyOp.topSplitOp();
        php.add(lA);
        php.add(spl);
        php.connect(lA, spl);

        POFilter fl1 = GenPhyOp.topFilterOp();
        POFilter fl2 = GenPhyOp.topFilterOp();
        php.add(fl1);
        php.add(fl2);
        php.connect(spl, fl1);
        php.connect(spl, fl2);

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
        php.add(lr1);
        php.add(lr2);
        php.connect(fl1, lr1);
        php.connect(fl2, lr2);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.addAsLeaf(pk);

        POSplit sp2 = GenPhyOp.topSplitOp();
        php.addAsLeaf(sp2);

        POFilter fl3 = GenPhyOp.topFilterOp();
        POFilter fl4 = GenPhyOp.topFilterOp();
        php.add(fl3);
        php.add(fl4);
        php.connect(sp2, fl3);
        php.connect(sp2, fl4);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC13.gld");

    }

    public void testSpl3() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        POLoad lA = GenPhyOp.topLoadOp();
        POSplit spl = GenPhyOp.topSplitOp();
        php.add(lA);
        php.add(spl);
        php.connect(lA, spl);

        POFilter fl1 = GenPhyOp.topFilterOp();
        fl1.setRequestedParallelism(10);
        POFilter fl2 = GenPhyOp.topFilterOp();
        fl2.setRequestedParallelism(20);
        php.add(fl1);
        php.add(fl2);
        php.connect(spl, fl1);
        php.connect(spl, fl2);

        POSplit sp11 = GenPhyOp.topSplitOp();
        POSplit sp21 = GenPhyOp.topSplitOp();
        php.add(sp11);
        php.add(sp21);
        php.connect(fl1, sp11);
        php.connect(fl2, sp21);

        POFilter fl11 = GenPhyOp.topFilterOp();
        fl11.setRequestedParallelism(10);
        POFilter fl21 = GenPhyOp.topFilterOp();
        fl21.setRequestedParallelism(20);
        POFilter fl22 = GenPhyOp.topFilterOp();
        fl22.setRequestedParallelism(30);
        php.add(fl11);
        php.add(fl21);
        php.add(fl22);
        php.connect(sp11, fl11);
        php.connect(sp21, fl21);
        php.connect(sp21, fl22);

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        lr1.setRequestedParallelism(40);
        POLocalRearrange lr21 = GenPhyOp.topLocalRearrangeOp();
        lr21.setRequestedParallelism(15);
        POLocalRearrange lr22 = GenPhyOp.topLocalRearrangeOp();
        lr22.setRequestedParallelism(35);
        php.add(lr1);
        php.add(lr21);
        php.add(lr22);
        php.connect(fl11, lr1);
        php.connect(fl21, lr21);
        php.connect(fl22, lr22);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        POPackage pk = GenPhyOp.topPackageOp();
        pk.setRequestedParallelism(25);
        php.addAsLeaf(pk);

        POSplit sp2 = GenPhyOp.topSplitOp();
        php.addAsLeaf(sp2);

        POFilter fl3 = GenPhyOp.topFilterOp();
        fl3.setRequestedParallelism(100);
        POFilter fl4 = GenPhyOp.topFilterOp();
        fl4.setRequestedParallelism(80);
        php.add(fl3);
        php.add(fl4);
        php.connect(sp2, fl3);
        php.connect(sp2, fl4);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC14.gld");

    }

     // Tests Single input case for both blocking and non-blocking
     // with both map and reduce phases
    public void testSim1() throws Exception {
        PhysicalPlan php = new PhysicalPlan();
        POLoad ld = GenPhyOp.topLoadOp();
        php.add(ld);
        PhysicalPlan grpChain1 = GenPhyOp.grpChain();
        php.merge(grpChain1);

        php.connect(ld, grpChain1.getRoots().get(0));

        PhysicalOperator leaf = php.getLeaves().get(0);

        PhysicalPlan grpChain2 = GenPhyOp.grpChain();
        php.merge(grpChain2);

        php.connect(leaf, grpChain2.getRoots().get(0));

        leaf = php.getLeaves().get(0);
        POFilter fl = GenPhyOp.topFilterOp();
        php.add(fl);

        php.connect(leaf, fl);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);

        php.connect(fl, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC1.gld");

    }

    public void testSim2() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);

        php.connect(un, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC2.gld");

    }

    public void testSim3() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();

        php.merge(ldFil1);
        php.connect(ldFil1.getLeaves().get(0), un);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);

        php.connect(un, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC3.gld");
    }

    public void intTestSim4() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();

        php.merge(ldFil1);
        php.connect(ldFil1.getLeaves().get(0), un);

        php.merge(ldFil2);
        php.connect(ldFil2.getLeaves().get(0), un);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);

        php.connect(un, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC4.gld");
    }

    public void testSim5() throws Exception {
        PhysicalPlan php = new PhysicalPlan();
        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();
        php.merge(ldFil1);
        php.merge(ldFil2);

        POUnion un = GenPhyOp.topUnionOp();
        php.addAsLeaf(un);

        POStore st = GenPhyOp.topStoreOp();
        php.add(st);

        php.connect(un, st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC5.gld");
    }

    public void testSim6() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();

        ldGrpChain1.addAsLeaf(lr1);
        ldGrpChain2.addAsLeaf(lr2);

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.addAsLeaf(pk);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC6.gld");

    }

    public void testSim7() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();

        ldGrpChain1.addAsLeaf(lr1);
        ldGrpChain2.addAsLeaf(lr2);

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();

        php.merge(ldFil1);
        php.connect(ldFil1.getLeaves().get(0), gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.addAsLeaf(pk);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC7.gld");
    }

    public void testSim8() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
        PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

        POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
        POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();

        ldGrpChain1.addAsLeaf(lr1);
        ldGrpChain2.addAsLeaf(lr2);

        php.merge(ldGrpChain1);
        php.merge(ldGrpChain2);

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();

        php.merge(ldFil1);
        php.connect(ldFil1.getLeaves().get(0), gr);

        php.merge(ldFil2);
        php.connect(ldFil2.getLeaves().get(0), gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.addAsLeaf(pk);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC8.gld");
    }

    public void testSim9() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
        php.addAsLeaf(gr);

        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();

        php.merge(ldFil1);
        php.connect(ldFil1.getLeaves().get(0), gr);

        php.merge(ldFil2);
        php.connect(ldFil2.getLeaves().get(0), gr);

        POPackage pk = GenPhyOp.topPackageOp();
        php.addAsLeaf(pk);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC9.gld");
    }
   
    public void testSortUDF1() throws Exception {
        PhysicalPlan php = new PhysicalPlan();
        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        php.merge(ldFil1);
       
        // set up order by *
        String funcName = WeirdComparator.class.getName();
        POUserComparisonFunc comparator = new POUserComparisonFunc(
                new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcName));
        POSort sort = new POSort(new OperatorKey("", r.nextLong()), -1, ldFil1.getLeaves(),
                null, new ArrayList<Boolean>(), comparator);
        sort.setRequestedParallelism(20);
        PhysicalPlan nesSortPlan = new PhysicalPlan();
        POProject topPrj = new POProject(new OperatorKey("", r.nextLong()));
        topPrj.setColumn(1);
        topPrj.setOverloaded(true);
        topPrj.setResultType(DataType.TUPLE);
        nesSortPlan.add(topPrj);
       
        POProject prjStar2 = new POProject(new OperatorKey("", r.nextLong()));
        prjStar2.setResultType(DataType.TUPLE);
        prjStar2.setStar(true);
        nesSortPlan.add(prjStar2);
       
        nesSortPlan.connect(topPrj, prjStar2);
        List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();
        nesSortPlanLst.add(nesSortPlan);
       
        sort.setSortPlans(nesSortPlanLst);
       
        php.add(sort);
        php.connect(ldFil1.getLeaves().get(0), sort);
        // have a foreach which takes the sort output
        // and send it two two udfs
        List<String> udfs = new ArrayList<String>();
        udfs.add(COUNT.class.getName());
        udfs.add(SUM.class.getName());
        POForEach fe3 = GenPhyOp.topForEachOPWithUDF(udfs);
        php.add(fe3);
        php.connect(sort, fe3);
       
        // add a group above the foreach
        PhysicalPlan grpChain1 = GenPhyOp.grpChain();
        php.merge(grpChain1);
        php.connect(fe3,grpChain1.getRoots().get(0));
       
       
        udfs.clear();
        udfs.add(AVG.class.getName());
        POForEach fe4 = GenPhyOp.topForEachOPWithUDF(udfs);
        php.addAsLeaf(fe4);
       
        PhysicalPlan grpChain2 = GenPhyOp.grpChain();
        php.merge(grpChain2);
        php.connect(fe4,grpChain2.getRoots().get(0));

        udfs.clear();
        udfs.add(GFCross.class.getName());
        POForEach fe5 = GenPhyOp.topForEachOPWithUDF(udfs);
        php.addAsLeaf(fe5);
       
        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC15.gld");
    }
   
    public void testDistinct1() throws Exception {
        PhysicalPlan php = new PhysicalPlan();
        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        php.merge(ldFil1);
       
        PODistinct op = new PODistinct(new OperatorKey("", r.nextLong()),
                -1, null);
       
        php.addAsLeaf(op);
       
        PhysicalPlan grpChain1 = GenPhyOp.grpChain();
        php.merge(grpChain1);
        php.connect(op,grpChain1.getRoots().get(0));
       
        PODistinct op1 = new PODistinct(new OperatorKey("", r.nextLong()),
                -1, null);
       
        php.addAsLeaf(op1);
        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC16.gld");
    }
   
   
    public void testLimit() throws Exception {
        PhysicalPlan php = new PhysicalPlan();

        POLoad lC = GenPhyOp.topLoadOp();
        php.add(lC);

        POLimit op = new POLimit(new OperatorKey("", r.nextLong()),
                -1, null);

        php.add(op);
        php.connect(lC, op);

        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC17.gld");
    }
   
    @Test
    public void testMRCompilerErr() throws Exception {
      planTester.buildPlan("a = load 'input';");
      LogicalPlan lp = planTester.buildPlan("b = filter a by $0 > 5;");
     
      PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
      pp.remove(pp.getRoots().get(0));
      try {
        Util.buildMRPlan(new PhysicalPlan(), pc);
        fail("Expected failure.");
      } catch (MRCompilerException mrce) {
        assertTrue(mrce.getErrorCode() == 2053);
      }
    }

    @Test
    public void testMRCompilerErr1() throws Exception {    
        PhysicalPlan pp = new PhysicalPlan();
        PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
        pp.merge(ldFil1);
       
        POSplit op = GenPhyOp.topSplitOp();
        pp.addAsLeaf(op);

      try {
        Util.buildMRPlan(pp, pc);
        fail("Expected failure.");
      } catch (MRCompilerException mrce) {
        assertTrue(mrce.getErrorCode() == 2025);
      }
    }

    /**
     * Test to ensure that the order by without parallel followed by a limit, i.e., top k
     * always produces the correct number of map reduce jobs. In the testcase below since
     * we are running the unit test locally, we will get reduce parallelism as 1. So we will
     * NOT introduce the extra MR job to do a final limit
     */
    @Test
    public void testNumReducersInLimit() throws Exception {
      planTester.buildPlan("a = load 'input';");
      planTester.buildPlan("b = order a by $0;");
      planTester.buildPlan("c = limit b 10;");
      LogicalPlan lp = planTester.buildPlan("store c into '/tmp';");
     
      PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
      MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
      MapReduceOper mrOper = mrPlan.getRoots().get(0);
      int count = 1;
     
      while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
      }       
      assertTrue(count == 3);
    }
   
    /**
     * Test to ensure that the order by with parallel followed by a limit, i.e., top k
     * always produces the correct number of map reduce jobs
     */
    @Test
    public void testNumReducersInLimitWithParallel() throws Exception {
      planTesterMR.buildPlan("a = load 'input';");
      planTesterMR.buildPlan("b = order a by $0 parallel 2;");
      planTesterMR.buildPlan("c = limit b 10;");
      LogicalPlan lp = planTesterMR.buildPlan("store c into '/tmp';");
     
      PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
      MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
      MapReduceOper mrOper = mrPlan.getRoots().get(0);
      int count = 1;
     
      while(mrPlan.getSuccessors(mrOper) != null) {
        mrOper = mrPlan.getSuccessors(mrOper).get(0);
        ++count;
      }       
      assertTrue(count == 4);
    }

    @Test
    public void testUDFInJoin() throws Exception {
        planTester.buildPlan("a = load 'input1' using BinStorage();");
        planTester.buildPlan("b = load 'input2';");
        planTester.buildPlan("c = join a by $0, b by $0;");
        LogicalPlan lp = planTester.buildPlan("store c into '/tmp';");
       
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
        MapReduceOper mrOper = mrPlan.getRoots().get(0);
       
        assertTrue(mrOper.UDFs.size()==2);
        assertTrue(mrOper.UDFs.size()==2);
        assertTrue(mrOper.UDFs.contains("BinStorage"));
        assertTrue(mrOper.UDFs.contains("org.apache.pig.builtin.PigStorage"));
    }

    @Test
    public void testMergeJoin() throws Exception{

        //generate = true;
        planTester.buildPlan("a = load '/tmp/input1';");
        planTester.buildPlan("b = load '/tmp/input2';");
        planTester.buildPlan("c = join a by $0, b by $0 using \"merge\";");
        LogicalPlan lp = planTester.buildPlan("store c into '/tmp';");
       
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        run(pp, "test/org/apache/pig/test/data/GoldenFiles/MRC18.gld");
    }
   
    public static class WeirdComparator extends ComparisonFunc {

        @Override
        public int compare(Tuple t1, Tuple t2) {
            // TODO Auto-generated method stub
            int result = 0;
            try {
                int i1 = (Integer) t1.get(1);
                int i2 = (Integer) t2.get(1);
                result = (i1 - 50) * (i1 - 50) - (i2 - 50) * (i2 - 50);
            } catch (ExecException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return result;
        }

    }
   
    @Test
    public void testMergeJoinWithIndexableLoadFunc() throws Exception{

        //generate = true;
        planTester.buildPlan("a = load '/tmp/input1';");
        planTester.buildPlan("b = load '/tmp/input2' using " +
            TestMergeJoin.DummyIndexableLoader.class.getName() + ";");
        planTester.buildPlan("c = join a by $0, b by $0 using \"merge\";");
        LogicalPlan lp = planTester.buildPlan("store c into '/tmp';");
       
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mp = Util.buildMRPlan(pp, pc);
        assertEquals("Checking number of MR Jobs for merge join with " +
            "IndexableLoadFunc:", 1, mp.size());
       
    }
   
    @Test
    public void testCastFuncShipped() throws Exception{
       
        planTester.buildPlan("a = load '/tmp/input1' using " + PigStorageNoDefCtor.class.getName() +
                "('\t') as (a0, a1, a2);");
        planTester.buildPlan("b = group a by a0;");
        planTester.buildPlan("c = foreach b generate flatten(a);");
        planTester.buildPlan("d = order c by a0;");
        planTester.buildPlan("e = foreach d generate a1+a2;");
        LogicalPlan lp = planTester.buildPlan("store e into '/tmp';");
        planTester.typeCheckPlan(lp);
       
        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
        MROperPlan mp = Util.buildMRPlan(pp, pc);
        MapReduceOper op = mp.getLeaves().get(0);
        assertTrue(op.UDFs.contains(new FuncSpec(PigStorageNoDefCtor.class.getName())+"('\t')"));
    }
   
    /**
     * Test that POSortedDistinct gets printed as POSortedDistinct
     * @throws Exception
     */
    @Test
    public void testSortedDistinctInForeach() throws Exception {
        PhysicalPlan php = new PhysicalPlan();
        PhysicalPlan grpChain1 = GenPhyOp.loadedGrpChain();
        php.merge(grpChain1);
       
        List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>();
        PhysicalPlan inplan = new PhysicalPlan();
        PODistinct op1 = new POSortedDistinct(new OperatorKey("", r.nextLong()),
                -1, null);
        inplan.addAsLeaf(op1);
        inputs.add(inplan);
        List<Boolean> toFlattens = new ArrayList<Boolean>();
        toFlattens.add(false);
        POForEach pofe = new POForEach(new OperatorKey("", r.nextLong()), 1,
                inputs, toFlattens);

        php.addAsLeaf(pofe);
        POStore st = GenPhyOp.topStoreOp();
        php.addAsLeaf(st);
        run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC19.gld");
    }
   
    private void run(PhysicalPlan pp, String expectedFile) throws Exception {
        String compiledPlan, goldenPlan = null;
        int MAX_SIZE = 100000;
        MRCompiler comp = new MRCompiler(pp, pc);
        comp.compile();

        MROperPlan mrp = comp.getMRPlan();
        PlanPrinter ppp = new PlanPrinter(mrp);
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        ppp.print(baos);
        compiledPlan = baos.toString();

        if(generate ){
            FileOutputStream fos = new FileOutputStream(expectedFile);
            fos.write(baos.toByteArray());
            return;
        }
        FileInputStream fis = new FileInputStream(expectedFile);
        byte[] b = new byte[MAX_SIZE];
        int len = fis.read(b);
        goldenPlan = new String(b, 0, len);
        if (goldenPlan.charAt(len-1) == '\n')
            goldenPlan = goldenPlan.substring(0, len-1);

        pp.explain(System.out);
        System.out.println();
        System.out.println("<<<" + compiledPlan + ">>>");
        System.out.println("-------------");
        System.out.println("Golden");
        System.out.println("<<<" + goldenPlan + ">>>");
        System.out.println("-------------");
        assertEquals(goldenPlan, compiledPlan);
    }


   

}
TOP

Related Classes of org.apache.pig.test.TestMRCompiler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.