Package org.apache.pig.test

Source Code of org.apache.pig.test.TestNewPlanLogicalOptimizer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;

import java.io.IOException;
import java.util.HashSet;

import org.apache.pig.FuncSpec;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.newplan.logical.expression.AndExpression;
import org.apache.pig.newplan.logical.expression.ConstantExpression;
import org.apache.pig.newplan.logical.expression.EqualExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.expression.ProjectExpression;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer;
import org.apache.pig.newplan.logical.optimizer.SchemaResetter;
import org.apache.pig.newplan.logical.relational.LOFilter;
import org.apache.pig.newplan.logical.relational.LOForEach;
import org.apache.pig.newplan.logical.relational.LOJoin;
import org.apache.pig.newplan.logical.relational.LOLoad;
import org.apache.pig.newplan.logical.relational.LogicalPlan;
import org.apache.pig.newplan.logical.relational.LogicalSchema;
import org.apache.pig.newplan.logical.relational.LOJoin.JOINTYPE;
import org.apache.pig.newplan.logical.relational.LogicalSchema.LogicalFieldSchema;
import org.junit.Test;

import junit.framework.TestCase;

/**
* Test end to end logical optimizations.
*/
public class TestNewPlanLogicalOptimizer extends TestCase {
   
    @Test
    public void testFilterPushDown() throws IOException {
        // A logical plan for:
        // A = load 'bla' as (x, y);
        // B = load 'morebla' as (a, b);
        // C = join A on x, B on a;
        // D = filter C by x = a and x = 0 and b = 1 and y = b;
        // store D into 'whatever';
       
        // A = load
        LogicalPlan lp = new LogicalPlan();
        {
          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, lp, null);
          A.setAlias("A");
          lp.add(A);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, lp, null);
          B.setAlias("B");
          lp.add(B);
         
          // C = join
          LogicalSchema cschema = new LogicalSchema();
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          MultiMap<Integer, LogicalExpressionPlan> mm =
                new MultiMap<Integer, LogicalExpressionPlan>();
          LogicalExpressionPlan aprojplan = new LogicalExpressionPlan();
          LOJoin C = new LOJoin(lp, mm, JOINTYPE.HASH, new boolean[] {true, true});
          new ProjectExpression(aprojplan, 0, 0, C);
          LogicalExpressionPlan bprojplan = new LogicalExpressionPlan();
          new ProjectExpression(bprojplan, 1, 0, C);
          mm.put(0, aprojplan);
          mm.put(1, bprojplan);
         
          C.setAlias("C");
          lp.add(C);
          lp.connect(A, C);
          lp.connect(B, C);
       
          // D = filter
          LogicalExpressionPlan filterPlan = new LogicalExpressionPlan();
          LOFilter D = new LOFilter(lp, filterPlan);
          ProjectExpression fx = new ProjectExpression(filterPlan, 0, 0, D);
          ConstantExpression fc0 = new ConstantExpression(filterPlan, new Integer(0));
          EqualExpression eq1 = new EqualExpression(filterPlan, fx, fc0);
          ProjectExpression fanotherx = new ProjectExpression(filterPlan, 0, 0, D);
          ProjectExpression fa = new ProjectExpression(filterPlan, 0, 2, D);
          EqualExpression eq2 = new EqualExpression(filterPlan, fanotherx, fa);
          AndExpression and1 = new AndExpression(filterPlan, eq1, eq2);
          ProjectExpression fb = new ProjectExpression(filterPlan, 0, 3, D);
          ConstantExpression fc1 = new ConstantExpression(filterPlan, new Integer(1));
          EqualExpression eq3 = new EqualExpression(filterPlan, fb, fc1);
          AndExpression and2 = new AndExpression(filterPlan, and1, eq3);
          ProjectExpression fanotherb = new ProjectExpression(filterPlan, 0, 3, D);
          ProjectExpression fy = new ProjectExpression(filterPlan, 0, 1, D);
          EqualExpression eq4 = new EqualExpression(filterPlan, fy, fanotherb);
          new AndExpression(filterPlan, and2, eq4);
         
          D.setAlias("D");
          // Connect D to B, since the transform has happened.
          lp.add(D);
          lp.connect(C, D);
        }
       
        System.out.println(lp);
        LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer(lp, 500, null);
        optimizer.optimize();
       
        LogicalPlan expected = new LogicalPlan();
        {
            // A = load
          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, expected, null);
          expected.add(A);
         
          // DA = filter
          LogicalExpressionPlan DAfilterPlan = new LogicalExpressionPlan();
          LOFilter DA = new LOFilter(expected, DAfilterPlan);
          ProjectExpression fx = new ProjectExpression(DAfilterPlan, 0, 0, DA);
          fx.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ConstantExpression fc0 = new ConstantExpression(DAfilterPlan, new Integer(0));
          new EqualExpression(DAfilterPlan, fx, fc0);
         
          DA.neverUseForRealSetSchema(aschema);
          expected.add(DA);
          expected.connect(A, DA);
         
          // A = foreach
            LOForEach foreachA = org.apache.pig.newplan.logical.Util.addForEachAfter(expected, DA, 0, new HashSet<Integer>());
            foreachA.setAlias("A");
            foreachA.neverUseForRealSetSchema(aschema);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, expected, null);
          expected.add(B);
         
          // DB = filter
          LogicalExpressionPlan DBfilterPlan = new LogicalExpressionPlan();
            LOFilter DB = new LOFilter(expected, DBfilterPlan);
          ProjectExpression fb = new ProjectExpression(DBfilterPlan, 0, 1, DB);
          fb.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ConstantExpression fc1 = new ConstantExpression(DBfilterPlan, new Integer(1));
          new EqualExpression(DBfilterPlan, fb, fc1);

          DB.neverUseForRealSetSchema(bschema);
          expected.add(DB);
          expected.connect(B, DB);
         
            // B = foreach
            LOForEach foreachB = org.apache.pig.newplan.logical.Util.addForEachAfter(expected, DB, 0, new HashSet<Integer>());
            foreachB.setAlias("B");
            foreachB.neverUseForRealSetSchema(bschema);
           
          // C = join
          LogicalSchema cschema = new LogicalSchema();
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "D::x", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "D::y", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "null::a", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "null::b", null, DataType.BYTEARRAY));
          cschema.getField(0).uid = 1;
          cschema.getField(1).uid = 2;
          cschema.getField(2).uid = 3;
          cschema.getField(3).uid = 4;
          LogicalExpressionPlan aprojplan = new LogicalExpressionPlan();
            MultiMap<Integer, LogicalExpressionPlan> mm =
                new MultiMap<Integer, LogicalExpressionPlan>();
            LOJoin C = new LOJoin(expected, mm, JOINTYPE.HASH, new boolean[] {true, true});

          ProjectExpression x = new ProjectExpression(aprojplan, 0, 0, C);
          x.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          LogicalExpressionPlan bprojplan = new LogicalExpressionPlan();
          ProjectExpression y = new ProjectExpression(bprojplan, 1, 0, C);
          y.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          mm.put(0, aprojplan);
          mm.put(1, bprojplan);
          C.neverUseForRealSetSchema(cschema);
          expected.add(C);
          expected.connect(foreachA, C);
          expected.connect(foreachB, C);
         
          // D = filter
          LogicalExpressionPlan filterPlan = new LogicalExpressionPlan();
            LOFilter D = new LOFilter(expected, filterPlan);
          ProjectExpression fanotherx = new ProjectExpression(filterPlan, 0, 0, D);
          fanotherx.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ProjectExpression fa = new ProjectExpression(filterPlan, 0, 2, D);
          fa.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq2 = new EqualExpression(filterPlan, fanotherx, fa);
          ProjectExpression fanotherb = new ProjectExpression(filterPlan, 0, 3, D);
          fanotherb.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ProjectExpression fy = new ProjectExpression(filterPlan, 0, 1, D);
          fy.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq4 = new EqualExpression(filterPlan, fy, fanotherb);
          new AndExpression(filterPlan, eq2, eq4);
         
          D.neverUseForRealSetSchema(cschema);
          expected.add(D);
          expected.connect(C, D);
        }
       
        SchemaResetter schemaResetter = new SchemaResetter(lp);
        schemaResetter.visit();
       
        schemaResetter = new SchemaResetter(expected);
        schemaResetter.visit();
       
       
        assertTrue( lp.isEqual(expected) );
        // assertEquals(lp, expected);
    }

}
TOP

Related Classes of org.apache.pig.test.TestNewPlanLogicalOptimizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.