Package org.apache.pig.test

Source Code of org.apache.pig.test.TestNewPlanColumnPrune$MyPlanOptimizer

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License" + you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.impl.PigContext;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
import org.apache.pig.newplan.logical.rules.AddForEach;
import org.apache.pig.newplan.logical.rules.ColumnMapKeyPrune;
import org.apache.pig.newplan.logical.rules.MapKeysPruneHelper;
import org.apache.pig.newplan.optimizer.PlanOptimizer;
import org.apache.pig.newplan.optimizer.Rule;
import org.junit.Test;

public class TestNewPlanColumnPrune {
    LogicalPlan plan = null;
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());

    private LogicalPlan buildPlan(String query) throws Exception{
        PigServer pigServer = new PigServer( pc );
        return Util.buildLp(pigServer, query);
    }

    @Test
    public void testNoPrune() throws Exception  {
        // no foreach
        String query = "a = load 'd.txt' as (id, v1, v2);" +
        "b = filter a by v1==NULL;" +
        "store b into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);

        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query  = "a = load 'd.txt' as (id, v1, v2);" +
        "b = filter a by v1==NULL;"  +
        "store b into 'empty';";
        LogicalPlan expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));

        // no schema
        query = "a = load 'd.txt';" +
        "b = foreach a generate $0, $1;" +
        "store b into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a = load 'd.txt';"+
        "b = foreach a generate $0, $1;"+
        "store b into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));
    }

    @Test
    public void testPrune() throws Exception  {
        // only foreach
        String query = "a = load 'd.txt' as (id, v1, v2);" +
        "b = foreach a generate id;"+
        "store b into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);

        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a = load 'd.txt' as (id);" +
        "b = foreach a generate id;"+
        "store b into 'empty';";
        LogicalPlan expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));

        // with filter
        query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);"+
        "b = filter a by v1 != NULL AND (v2+v3)<100;"+
        "c = foreach b generate id;"+
        "store c into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a = load 'd.txt' as (id, v1, v3, v2);" +
        "b = filter a by v1 != NULL AND (v2+v3)<100;" +
        "c = foreach b generate id;" +
        "store c into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

        // with 2 foreach
        query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v2, v5, v4;" +
        "c = foreach b generate v5, v4;" +
        "store c into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a = load 'd.txt' as (v5, v4);" +
        "b = foreach a generate v5, v4;" +
        "c = foreach b generate v5, v4;" +
        "store c into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

        // with 2 foreach
        query = "a = load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate id, v1, v5, v3, v4;" +
        "c = foreach b generate v5, v4;" +
        "store c into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a = load 'd.txt' as (v5, v4);" +
        "b = foreach a generate v5, v4;" +
        "c = foreach b generate v5, v4;" +
        "store c into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

        // with 2 foreach and filter in between
        query = "a =load 'd.txt' as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v2, v5, v4;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5, v4;" +
        "store d into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (v5, v4, v2);" +
        "b = foreach a generate v2, v5, v4;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5, v4;" +
        "store d into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

        // with 2 foreach after join
        query = "a =load 'd.txt' as (id, v1, v2, v3);" +
        "b = load 'c.txt' as (id, v4, v5, v6);" +
        "c = join a by id, b by id;" +
        "d = foreach c generate a::id, v5, v3, v4;" +
        "store d into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, v3);" +
        "b = load 'c.txt' as (id, v4, v5);" +
        "c = join a by id, b by id;" +
        "d = foreach c generate a::id, v5, v3, v4;" +
        "store d into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

        // with BinStorage, insert foreach after load
        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "c = filter a by v2 != NULL;" +
        "d = foreach c generate v5, v4;" +
        "store d into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v5, v4, v2;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5, v4;" +
        "store d into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

       // with BinStorage, not to insert foreach after load if there is already one
        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v5, v4, v2;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5;" +
        "store d into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v5, v2;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5;" +
        "store d into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));

       // with BinStorage, not to insert foreach after load if there is already one
        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v5, v4, v2, 10;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5;" +
        "store d into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);" +
        "b = foreach a generate v5, v2, 10;" +
        "c = filter b by v2 != NULL;" +
        "d = foreach c generate v5;" +
        "store d into 'empty';";
        expected = buildPlan(query);
        assertTrue(expected.isEqual(newLogicalPlan));
    }

    @Test
    @SuppressWarnings("unchecked")
    public void testPruneWithMapKey() throws Exception {
         // only foreach
        String query = "a =load 'd.txt' as (id, v1, m:map[]);" +
        "b = foreach a generate id, m#'path';" +
        "store b into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);

        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, m:map[]);" +
        "b = foreach a generate id, m#'path';" +
        "store b into 'empty';";
        LogicalPlan expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));

        LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
        Map<Integer,Set<String>> annotation =
                (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
        assertEquals(1, annotation.size());
        Set<String> s = new HashSet<String>();
        s.add("path");
        assertEquals(annotation.get(2), s);

        // foreach with join
        query = "a =load 'd.txt' as (id, v1, m:map[]);" +
        "b = load 'd.txt' as (id, v1, m:map[]);" +
        "c = join a by id, b by id;" +
        "d = filter c by a::m#'path' != NULL;" +
        "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
        "store e into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, m:map[]);" +
        "b = load 'd.txt' as (id, m:map[]);" +
        "c = join a by id, b by id;" +
        "d = filter c by a::m#'path' != NULL;" +
        "e = foreach d generate a::id, b::id, b::m#'path', a::m;" +
        "store e into 'empty';";
        expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));

        List<Operator> ll = newLogicalPlan.getSources();
        assertEquals(2, ll.size());
        LOLoad loada = null;
        LOLoad loadb = null;
        for(Operator opp: ll) {
            if (((LogicalRelationalOperator)opp).getAlias().equals("a")) {
                loada = (LOLoad)opp;
                continue;
            }

            if (((LogicalRelationalOperator)opp).getAlias().equals("b")) {
                loadb = (LOLoad)opp;
                continue;
            }
        }

        annotation =
                (Map<Integer, Set<String>>) loada.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
        assertNull(annotation);

        annotation =
            (Map<Integer, Set<String>>) loadb.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
        assertEquals(1, annotation.size());

        s = new HashSet<String>();
        s.add("path");
        assertEquals(annotation.get(2), s);
    }

    @Test
    public void testPruneWithBag() throws Exception  {
        // filter above foreach
        String query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
        "b = filter a by id>10;" +
        "c = foreach b generate id, FLATTEN(v);" +
        "d = foreach c generate id, v::s2;" +
        "store d into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);

        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});" +
        "b = filter a by id>10;" +
        "c = foreach b generate id, FLATTEN(v);" +
        "d = foreach c generate id, v::s2;" +
        "store d into 'empty';";
        LogicalPlan expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));
    }

    @Test
    public void testAddForeach() throws Exception  {
        // filter above foreach
        String query = "a =load 'd.txt' as (id, v1, v2);" +
        "b = filter a by v1>10;" +
        "c = foreach b generate id;" +
        "store c into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);

        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, v1);" +
        "b = filter a by v1>10;" +
        "c = foreach b generate id;" +
        "store c into 'empty';";
        LogicalPlan expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));

        // join with foreach
        query = "a =load 'd.txt' as (id, v1, v2);" +
        "b = load 'd.txt' as (id, v1, v2);" +
        "c = join a by id, b by id;" +
        "d = filter c by a::v1>b::v1;" +
        "e = foreach d generate a::id;" +
        "store e into 'empty';";
        newLogicalPlan = buildPlan(query);

        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();

        query = "a =load 'd.txt' as (id, v1);" +
        "b = load 'd.txt' as (id, v1);" +
        "c = join a by id, b by id;" +
        "d = foreach c generate a::id, a::v1, b::v1;" +
        "e = filter d by a::v1>b::v1;" +
        "f = foreach e generate a::id;" +
        "store f into 'empty';";
        expected = buildPlan(query);

        assertTrue(expected.isEqual(newLogicalPlan));
    }

    @Test
    public void testPruneSubTreeForEach() throws Exception  {
        String query = "a =load 'd.txt' as (id, v1);" +
            "b = group a by id;" +
            "c = foreach b { d = a.v1; " +
                          " e = distinct d; " +
                          " generate group, e; };" +
            "f = foreach c generate group ;" +
            "store f into 'empty';";
        LogicalPlan newLogicalPlan = buildPlan(query);
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        try {
            optimizer.optimize();
        } catch (Exception e) {
            //PIG-2968 throws ConcurrentModificationException
            e.printStackTrace();
            fail("Unexpected Exception: " + e);
        }
    }

    @Test
    public void testDistinct() throws Exception {
        //Test for bug where distinct wasn't being pruned properly causing union
        //to fail to get a schema since the distinct relation had an incompatible schema
        //with the other relation being unioned.
       
        String testQuery =
                "a = load 'd.txt' as (id, v1, v2);" +
                "b = load 'd.txt' as (id, v1, v2);" +
                "c = distinct a;" +
                "d = union b, c;" +
                "e = foreach d generate id, v1;" +
                "store e into 'empty';";
       
        //Generate optimized plan.
        LogicalPlan newLogicalPlan = buildPlan(testQuery);
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        Iterator<Operator> iter = newLogicalPlan.getOperators();
        while (iter.hasNext()) {
            Operator o = iter.next();
            LogicalRelationalOperator lro = (LogicalRelationalOperator)o;
            if (lro == null || lro.getAlias() == null) continue;
            if (lro.getAlias().equals("d")) {
                assertNotNull(lro.getSchema());
            }
        }
    }

    public class MyPlanOptimizer extends LogicalPlanOptimizer {

        protected MyPlanOptimizer(OperatorPlan p,  int iterations) {
            super(p, iterations, null);
        }

        protected List<Set<Rule>> buildRuleSets() {
            List<Set<Rule>> ls = new ArrayList<Set<Rule>>();

            Rule r = new ColumnMapKeyPrune("ColumnMapKeyPrune");
            Set<Rule> s = new HashSet<Rule>();
            s.add(r);
            ls.add(s);

            r = new AddForEach("AddForEach");
            s = new HashSet<Rule>();
            s.add(r);
            ls.add(s);

            return ls;
        }
    }
}
TOP

Related Classes of org.apache.pig.test.TestNewPlanColumnPrune$MyPlanOptimizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.