Package org.apache.pig.newplan.logical.relational

Examples of org.apache.pig.newplan.logical.relational.LogicalPlan


        // no foreach
        LogicalPlanTester lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = filter a by v1==NULL;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = filter a by v1==NULL;");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // no schema
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt';");
        lpt.buildPlan("b = foreach a generate $0, $1;");
        plan = lpt.buildPlan("store b into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt';");
        lpt.buildPlan("b = foreach a generate $0, $1;");
        plan = lpt.buildPlan("store b into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
    }
View Full Code Here


        // only foreach
        LogicalPlanTester lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = foreach a generate id;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id);");
        lpt.buildPlan("b = foreach a generate id;");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with filter
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
        lpt.buildPlan("c = foreach b generate id;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v3, v2);");
        lpt.buildPlan("b = filter a by v1 != NULL AND (v2+v3)<100;");
        lpt.buildPlan("c = foreach b generate id;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
        lpt.buildPlan("b = foreach a generate v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate id, v1, v5, v3, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (v5, v4);");
        lpt.buildPlan("b = foreach a generate v5, v4;");
        lpt.buildPlan("c = foreach b generate v5, v4;");
        plan = lpt.buildPlan("store c into 'empty';");
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach and filter in between
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (v5, v4, v2);");
        lpt.buildPlan("b = foreach a generate v2, v5, v4;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with 2 foreach after join
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2, v3);");
        lpt.buildPlan("b = load 'c.txt' as (id, v4, v5, v6);");
        lpt.buildPlan("c = join a by id, b by id;");      
        lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v3);");
        lpt.buildPlan("b = load 'c.txt' as (id, v4, v5);");
        lpt.buildPlan("c = join a by id, b by id;")
        lpt.buildPlan("d = foreach c generate a::id, v5, v3, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // with BinStorage, insert foreach after load
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");       
        lpt.buildPlan("c = filter a by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v4, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5, v4;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
       // with BinStorage, not to insert foreach after load if there is already one
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");   
        lpt.buildPlan("b = foreach a generate v5, v4, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v2;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
       
       // with BinStorage, not to insert foreach after load if there is already one
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");   
        lpt.buildPlan("b = foreach a generate v5, v4, v2, 10;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' using BinStorage() as (id, v1, v5, v3, v4, v2);");
        lpt.buildPlan("b = foreach a generate v5, v2, 10;");
        lpt.buildPlan("c = filter b by v2 != NULL;");
        lpt.buildPlan("d = foreach c generate v5;");
        plan = lpt.buildPlan("store d into 'empty';")
        expected = migratePlan(plan);
        assertTrue(expected.isEqual(newLogicalPlan));
    }
View Full Code Here

         // only foreach
        LogicalPlanTester lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("b = foreach a generate id, m#'path';");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("b = foreach a generate id, m#'path';");       
        plan = lpt.buildPlan("store b into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        LOLoad op = (LOLoad)newLogicalPlan.getSources().get(0);
        Map<Integer,Set<String>> annotation =
                (Map<Integer, Set<String>>) op.getAnnotation(MapKeysPruneHelper.REQUIRED_MAPKEYS);
        assertEquals(annotation.size(), 1);
        Set<String> s = new HashSet<String>();
        s.add("path");
        assertEquals(annotation.get(2), s);
       
        // foreach with join
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1, m:map[]);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = filter c by a::m#'path' != NULL;");
        lpt.buildPlan("e = foreach d generate a::id, b::id, b::m#'path', a::m;");       
        plan = lpt.buildPlan("store e into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("b = load 'd.txt' as (id, m:map[]);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = filter c by a::m#'path' != NULL;");
        lpt.buildPlan("e = foreach d generate a::id, b::id, b::m#'path', a::m;");       
        plan = lpt.buildPlan("store e into 'empty';")
        expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        List<Operator> ll = newLogicalPlan.getSources();
        assertEquals(ll.size(), 2);
        LOLoad loada = null;
        LOLoad loadb = null;
View Full Code Here

        lpt.buildPlan("a = load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});");
        lpt.buildPlan("b = filter a by id>10;");
        lpt.buildPlan("c = foreach b generate id, FLATTEN(v);");   
        lpt.buildPlan("d = foreach c generate id, v::s2;");   
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store d into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v:bag{t:(s1,s2,s3)});");
        lpt.buildPlan("b = filter a by id>10;");
        lpt.buildPlan("c = foreach b generate id, FLATTEN(v);");   
        lpt.buildPlan("d = foreach c generate id, v::s2;");   
        plan = lpt.buildPlan("store d into 'empty';");
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
    }
View Full Code Here

        }                     
       
        // Loader does not support column pruning, insert foreach     
        if (columnPrune) {
            if (response==null || !response.getRequiredFieldResponse()) {
                LogicalPlan p = (LogicalPlan)load.getPlan();                       
                Operator next = p.getSuccessors(load).get(0);
                // if there is already a LOForEach after load, we don't need to
                // add another LOForEach
                if (next instanceof LOForEach) {
                    return;
                }
               
                LOForEach foreach = new LOForEach(load.getPlan());
               
                // add foreach to the base plan                      
                p.add(foreach);
                              
                p.insertBetween(load, foreach, next);
               
                LogicalPlan innerPlan = new LogicalPlan();
                foreach.setInnerPlan(innerPlan);
               
                // build foreach inner plan
                List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();             
                LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[requiredFields.getFields().size()]);
                innerPlan.add(gen);
               
                for (int i=0; i<requiredFields.getFields().size(); i++) {
                    LoadPushDown.RequiredField rf = requiredFields.getFields().get(i);
                    LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, rf.getIndex());                   
                    innerPlan.add(innerLoad);         
                    innerPlan.connect(innerLoad, gen);
                   
                    LogicalExpressionPlan exp = new LogicalExpressionPlan();
                    ProjectExpression prj = new ProjectExpression(exp, i, -1, gen);
                    exp.add(prj);
                    exps.add(exp);
View Full Code Here

        LogicalPlanTester lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = filter a by v1>10;");
        lpt.buildPlan("c = foreach b generate id;");       
        org.apache.pig.impl.logicalLayer.LogicalPlan plan = lpt.buildPlan("store c into 'empty';")
        LogicalPlan newLogicalPlan = migratePlan(plan);
              
        PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1);");
        lpt.buildPlan("b = filter a by v1>10;");
        lpt.buildPlan("c = foreach b generate id;");     
        plan = lpt.buildPlan("store c into 'empty';")
        LogicalPlan expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
       
        // join with foreach
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1, v2);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = filter c by a::v1>b::v1;");
        lpt.buildPlan("e = foreach d generate a::id;");       
        plan = lpt.buildPlan("store e into 'empty';")
        newLogicalPlan = migratePlan(plan);
              
        optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
        optimizer.optimize();
       
        lpt = new LogicalPlanTester(pc);
        lpt.buildPlan("a = load 'd.txt' as (id, v1);");
        lpt.buildPlan("b = load 'd.txt' as (id, v1);");
        lpt.buildPlan("c = join a by id, b by id;");
        lpt.buildPlan("d = foreach c generate a::id, a::v1, b::v1;");       
        lpt.buildPlan("e = filter d by a::v1>b::v1;")
        lpt.buildPlan("f = foreach e generate a::id;");       
        plan = lpt.buildPlan("store f into 'empty';")
        expected = migratePlan(plan);
       
        assertTrue(expected.isEqual(newLogicalPlan));
    }
View Full Code Here

       
        // get column numbers from input uids
        Set<Long> inputUids = (Set<Long>)foreach.getAnnotation(ColumnPruneHelper.INPUTUIDS);
       
        // Get all top level projects
        LogicalPlan innerPlan = foreach.getInnerPlan();
        List<LOInnerLoad> innerLoads= new ArrayList<LOInnerLoad>();
        List<Operator> sources = innerPlan.getSources();
        for (Operator s : sources) {
            if (s instanceof LOInnerLoad)
                innerLoads.add((LOInnerLoad)s);
        }
       
        // If project of the innerLoad is not in INPUTUIDS, remove this innerLoad
        Set<LOInnerLoad> innerLoadsToRemove = new HashSet<LOInnerLoad>();
        for (LOInnerLoad innerLoad: innerLoads) {
            ProjectExpression project = innerLoad.getProjection();
            if (project.isProjectStar()) {
                LogicalSchema.LogicalFieldSchema tupleFS = project.getFieldSchema();
                // Check the first component of the star projection
                long uid = tupleFS.schema.getField(0).uid;
                if (!inputUids.contains(uid))
                    innerLoadsToRemove.add(innerLoad);
            }
            else {
                if (!inputUids.contains(project.getFieldSchema().uid))
                    innerLoadsToRemove.add(innerLoad);
            }
        }
       
        // Find the logical operator immediate precede LOGenerate which should be removed (the whole branch)
        Set<LogicalRelationalOperator> branchHeadToRemove = new HashSet<LogicalRelationalOperator>();
        for (LOInnerLoad innerLoad : innerLoadsToRemove) {
            Operator op = innerLoad;
            while (!(innerPlan.getSuccessors(op).get(0) instanceof LOGenerate)) {
                op = innerPlan.getSuccessors(op).get(0);
            }
            branchHeadToRemove.add((LogicalRelationalOperator)op);
        }
       
        // Find the expression plan to remove
        LOGenerate gen = (LOGenerate)innerPlan.getSinks().get(0);
        List<LogicalExpressionPlan> genPlansToRemove = new ArrayList<LogicalExpressionPlan>();
       
        List<LogicalExpressionPlan> genPlans = gen.getOutputPlans();
        for (int i=0;i<genPlans.size();i++) {
            LogicalExpressionPlan expPlan = genPlans.get(i);
            List<Operator> expSources = expPlan.getSinks();
           
            for (Operator expSrc : expSources) {
                if (expSrc instanceof ProjectExpression) {
                    LogicalRelationalOperator reference = ((ProjectExpression)expSrc).findReferent();
                    if (branchHeadToRemove.contains(reference)) {
                        genPlansToRemove.add(expPlan);
                    }
                }
            }
        }
       
        // Build the temporary structure based on genPlansToRemove, which include:
        // * flattenList
        // * outputPlanSchemas
        // * uidOnlySchemas
        // * inputsRemoved
        //     We first construct inputsNeeded, and inputsRemoved = (all inputs) - inputsNeeded.
        //     We cannot figure out inputsRemoved directly since the inputs may be used by other output plan.
        //     We can only get inputsRemoved after visiting all output plans.
        List<Boolean> flattenList = new ArrayList<Boolean>();
        Set<Integer> inputsNeeded = new HashSet<Integer>();
        Set<Integer> inputsRemoved = new HashSet<Integer>();
        List<LogicalSchema> outputPlanSchemas = new ArrayList<LogicalSchema>();
        List<LogicalSchema> uidOnlySchemas = new ArrayList<LogicalSchema>();
        List<LogicalSchema> userDefinedSchemas = null;
       
        if (gen.getUserDefinedSchema()!=null)
            userDefinedSchemas = new ArrayList<LogicalSchema>();
       
        for (int i=0;i<genPlans.size();i++) {
            LogicalExpressionPlan genPlan = genPlans.get(i);
            if (!genPlansToRemove.contains(genPlan)) {
                flattenList.add(gen.getFlattenFlags()[i]);
                outputPlanSchemas.add(gen.getOutputPlanSchemas().get(i));
                uidOnlySchemas.add(gen.getUidOnlySchemas().get(i));
                if (gen.getUserDefinedSchema()!=null) {
                    userDefinedSchemas.add(gen.getUserDefinedSchema().get(i));
                }
                List<Operator> sinks = genPlan.getSinks();
                for(Operator s: sinks) {
                    if (s instanceof ProjectExpression) {
                        inputsNeeded.add(((ProjectExpression)s).getInputNum());
                    }
                }
            }
        }
       
        List<Operator> preds = innerPlan.getPredecessors(gen);
       
        if (preds!=null) {  // otherwise, all gen plan are based on constant, no need to adjust
            for (int i=0;i<preds.size();i++) {
                if (!inputsNeeded.contains(i))
                    inputsRemoved.add(i);
View Full Code Here

        }
    }
   
    // remove all the operators starting from an operator
    private void removeSubTree(LogicalRelationalOperator op) throws FrontendException {
        LogicalPlan p = (LogicalPlan)op.getPlan();
        List<Operator> ll = p.getPredecessors(op);
        if (ll != null) {
            for(Operator pred: ll) {
                removeSubTree((LogicalRelationalOperator)pred);
            }
        }
               
        if (p.getSuccessors(op) != null) {
            Operator[] succs = p.getSuccessors(op).toArray(new Operator[0]);           
            for(Operator s: succs) {
                p.disconnect(op, s);
            }
        }
       
        p.remove(op);
    }
View Full Code Here

    }

    @Override
    protected OperatorPlan buildPattern() {
        // the pattern that this rule looks for is load
        LogicalPlan plan = new LogicalPlan();
        LogicalRelationalOperator op;
        if (getOperatorClassName().equals(LOLoad.class.getName()))
            op = new LOLoad(null, null, plan, null);
        else // LOStream
            op = new LOStream(plan, null, null, null);
        plan.add(op);
        return plan;
    }
View Full Code Here

            LogicalRelationalOperator op = (LogicalRelationalOperator)matched.getSources().get(0);
            LogicalSchema s = op.getSchema();
            // For every field, build a logical plan.  If the field has a type
            // other than byte array, then the plan will be cast(project).  Else
            // it will just be project.
            LogicalPlan innerPlan = new LogicalPlan();
           
            LOForEach foreach = new LOForEach(currentPlan);
            foreach.setInnerPlan(innerPlan);
            foreach.setAlias(op.getAlias());
           
            // Insert the foreach into the plan and patch up the plan.
            Operator next = currentPlan.getSuccessors(op).get(0);
            currentPlan.insertBetween(op, foreach, next);
           
            List<LogicalExpressionPlan> exps = new ArrayList<LogicalExpressionPlan>();
            LOGenerate gen = new LOGenerate(innerPlan, exps, new boolean[s.size()]);
            innerPlan.add(gen);
           
            // if we are inserting casts in a load and if the loader
            // implements determineSchema(), insert casts only where necessary
            // Note that in this case, the data coming out of the loader is not
            // a BYTEARRAY but is whatever determineSchema() says it is.
            LogicalSchema determinedSchema = null;
            if(LOLoad.class.getName().equals(getOperatorClassName())) {
                determinedSchema = ((LOLoad)op).getDeterminedSchema();
            }
            else {
                determinedSchema = new LogicalSchema();
                for (int i=0;i<s.size();i++) {
                    determinedSchema.addField(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
                }
            }
            for (int i = 0; i < s.size(); i++) {
                LogicalSchema.LogicalFieldSchema fs = s.getField(i);
               
                LOInnerLoad innerLoad = new LOInnerLoad(innerPlan, foreach, i);
                innerPlan.add(innerLoad);         
                innerPlan.connect(innerLoad, gen);
               
                LogicalExpressionPlan exp = new LogicalExpressionPlan();
               
                ProjectExpression prj = new ProjectExpression(exp, i, 0, gen);
                exp.add(prj);
               
                if (fs.type != DataType.BYTEARRAY && (determinedSchema == null || (!fs.isEqual(determinedSchema.getField(i))))) {
                    // Either no schema was determined by loader OR the type
                    // from the "determinedSchema" is different
                    // from the type specified - so we need to cast
                    CastExpression cast = new CastExpression(exp, prj, new LogicalSchema.LogicalFieldSchema(fs));
                    exp.add(cast);
                    FuncSpec loadFuncSpec = null;
                    if(op instanceof LOLoad) {
                        loadFuncSpec = ((LOLoad)op).getFileSpec().getFuncSpec();
                    } else if (op instanceof LOStream) {
                        StreamingCommand command = ((LOStream)op).getStreamingCommand();
                        HandleSpec streamOutputSpec = command.getOutputSpec();
                        loadFuncSpec = new FuncSpec(streamOutputSpec.getSpec());
                    } else {
                        String msg = "TypeCastInserter invoked with an invalid operator class name: " + innerPlan.getClass().getSimpleName();
                        throw new FrontendException(msg, 2242);
                    }
                    cast.setFuncSpec(loadFuncSpec);
                }
                exps.add(exp);
View Full Code Here

TOP

Related Classes of org.apache.pig.newplan.logical.relational.LogicalPlan

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.