Package org.apache.pig.impl.logicalLayer

Examples of org.apache.pig.impl.logicalLayer.LogicalOperator


        }

        // merge all the inner plan outputs so we know what type
        // our group column should be
        for(int i=0;i < cg.getInputs().size(); i++) {
            LogicalOperator input = cg.getInputs().get(i) ;
            List<LogicalPlan> innerPlans
                        = new ArrayList<LogicalPlan>(cg.getGroupByPlans().get(input)) ;

            for(int j=0;j < innerPlans.size(); j++) {
                byte innerType = innerPlans.get(j).getSingleLeafPlanOutputType() ;
View Full Code Here


                    throw new TypeCheckerException(msg, errCode, PigException.INPUT) ;
                }

                List<LogicalOperator> rootList = plan.getRoots() ;
                for(int j=0; j<rootList.size(); j++) {
                    LogicalOperator innerRoot = rootList.get(j) ;
                    // TODO: Support MAP dereference
                    if (innerRoot instanceof LOProject) {
                        resolveLOProjectType((LOProject) innerRoot) ;
                    }
                    else if (innerRoot instanceof LOConst || innerRoot instanceof LOUserFunc) {
                        // it's ok because LOConst always has
                        // the right type information
                    }
                    else {
                        int errCode = 2064;
                        String msg = "Unsupported root type in "
                            +"LOForEach: " + innerRoot.getClass().getSimpleName();
                        throw new TypeCheckerException(msg, errCode, PigException.BUG) ;
                    }
                }

                checkInnerPlan(f.getAlias(), plan) ;
View Full Code Here

    }

    @Override
    public boolean check(List<LogicalOperator> nodes) throws OptimizerException {      
        try {
            LogicalOperator op = getOperator(nodes);
            Schema s = op.getSchema();
            if (s == null) return false;
   
            boolean sawOne = false;
            List<Schema.FieldSchema> fss = s.getFields();
            List<Byte> types = new ArrayList<Byte>(s.size());
View Full Code Here

            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
       
        LogicalOperator lo = nodes.get(0);
        if(LOLoad.class.getName().equals(operatorClassName)) {
            if (lo == null || !(lo instanceof LOLoad)) {
                int errCode = 2005;
                String msg = "Expected " + LOLoad.class.getSimpleName()
                        + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else if(LOStream.class.getName().equals(operatorClassName)){
            if (lo == null || !(lo instanceof LOStream)) {
                int errCode = 2005;
                String msg = "Expected " + LOStream.class.getSimpleName()
                        + ", got "
                        + (lo == null ? lo : lo.getClass().getSimpleName());
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else {
View Full Code Here

    }

    @Override
    public void transform(List<LogicalOperator> nodes) throws OptimizerException {
        try {
            LogicalOperator lo = getOperator(nodes);
            Schema s = lo.getSchema();
            String scope = lo.getOperatorKey().scope;
            // For every field, build a logical plan.  If the field has a type
            // other than byte array, then the plan will be cast(project).  Else
            // it will just be project.
            ArrayList<LogicalPlan> genPlans = new ArrayList<LogicalPlan>(s.size());
            ArrayList<Boolean> flattens = new ArrayList<Boolean>(s.size());
            Map<String, Byte> typeChanges = new HashMap<String, Byte>();
            // if we are inserting casts in a load and if the loader
            // implements determineSchema(), insert casts only where necessary
            // Note that in this case, the data coming out of the loader is not
            // a BYTEARRAY but is whatever determineSchema() says it is.
            Schema determinedSchema = null;
            if(LOLoad.class.getName().equals(operatorClassName)) {
                determinedSchema = ((LOLoad)lo).getDeterminedSchema();
            }
            for (int i = 0; i < s.size(); i++) {
                LogicalPlan p = new LogicalPlan();
                genPlans.add(p);
                flattens.add(false);
                List<Integer> toProject = new ArrayList<Integer>(1);
                toProject.add(i);
                LOProject proj = new LOProject(p, OperatorKey.genOpKey(scope),
                    lo, toProject);
                p.add(proj);
                Schema.FieldSchema fs = s.getField(i);
                if (fs.type != DataType.BYTEARRAY) {
                    if(determinedSchema == null || (fs.type != determinedSchema.getField(i).type)) {
                            // Either no schema was determined by loader OR the type
                            // from the "determinedSchema" is different
                            // from the type specified - so we need to cast
                            LOCast cast = new LOCast(p,
                                        OperatorKey.genOpKey(scope), fs.type);
                            cast.setFieldSchema(fs);
                            p.add(cast);
                            p.connect(proj, cast);
                           
                            cast.setFieldSchema(fs.clone());
                            FuncSpec loadFuncSpec = null;
                            if(lo instanceof LOLoad) {
                                loadFuncSpec = ((LOLoad)lo).getInputFile().getFuncSpec();
                            } else if (lo instanceof LOStream) {
                                StreamingCommand command = ((LOStream)lo).getStreamingCommand();
                                HandleSpec streamOutputSpec = command.getOutputSpec();
                                loadFuncSpec = new FuncSpec(streamOutputSpec.getSpec());
                            } else {
                                int errCode = 2006;
                                String msg = "TypeCastInserter invoked with an invalid operator class name: " + lo.getClass().getSimpleName();
                                throw new OptimizerException(msg, errCode, PigException.BUG);
                            }
                            cast.setLoadFuncSpec(loadFuncSpec);
                            typeChanges.put(fs.canonicalName, fs.type);
                            if(determinedSchema == null) {
                                // Reset the loads field schema to byte array so that it
                                // will reflect reality.
                                fs.type = DataType.BYTEARRAY;
                            } else {
                                // Reset the type to what determinedSchema says it is
                                fs.type = determinedSchema.getField(i).type;
                            }
                        }
                }
            }

            // Build a foreach to insert after the load, giving it a cast for each
            // position that has a type other than byte array.
            LOForEach foreach = new LOForEach(mPlan,
                OperatorKey.genOpKey(scope), genPlans, flattens);
            foreach.setAlias(lo.getAlias());
            // Insert the foreach into the plan and patch up the plan.
            insertAfter(lo, foreach, null);

            rebuildSchemas();
View Full Code Here

            // return false
            if (successors == null || successors.size() == 0 || successors.size() > 1) {
                return false;
            }

            LogicalOperator successor = successors.get(0);

            List<LogicalOperator> peers = (mPlan.getPredecessors(successor) == null ? null
                    : new ArrayList<LogicalOperator>(mPlan.getPredecessors(successor)));
           
            // check if any of the foreach's peers is a foreach flatten
            // if so then this rule does not apply
            if (peers != null){
                for(LogicalOperator peer: peers) {
                    if(!peer.equals(foreach)) {
                        if(peer instanceof LOForEach) {
                            LOForEach peerForeach = (LOForEach)peer;
                            if(peerForeach.hasFlatten().first) {
                                return false;
                            }
                        }
                    }
                }
            }
           
            IndexHelper<LogicalOperator> indexHelper = new IndexHelper<LogicalOperator>(peers);
            Integer foreachPosition = indexHelper.getIndex(foreach);
           
            // Check if flattened fields is required by successor, if so, don't optimize
            List<RequiredFields> requiredFieldsList = ((RelationalOperator)successor).getRequiredFields();
            RequiredFields requiredFields = requiredFieldsList.get(foreachPosition.intValue());
           
            MultiMap<Integer, Column> foreachMappedFields = foreachProjectionMap.getMappedFields();
           
            if (requiredFields.getFields()!=null) {
                for (Pair<Integer, Integer> pair : requiredFields.getFields()) {
                    Collection<Column> columns = foreachMappedFields.get(pair.second);
                    if (columns!=null) {
                        for (Column column : columns) {
                            Pair<Integer, Integer> foreachInputColumn = column.getInputColumn();
                            if (foreach.isInputFlattened(foreachInputColumn.second))
                                return false;
                        }
                    }
                }
            }
           
            // the foreach with flatten can be swapped with an order by
            // as the order by will have lesser number of records to sort
            // also the sort does not alter the records that are processed
           
            // the foreach with flatten can be pushed down a cross or a join
            // for the same reason. In this case the foreach has to be first
            // unflattened and then a new foreach has to be inserted after
            // the cross or join. In both cross and foreach the actual columns
            // from the foreach are not altered but positions might be changed
           
            // in the case of union the column is transformed and as a result
            // the foreach flatten cannot be pushed down
           
            // for distinct the output before flattening and the output
            // after flattening might be different. For example, consider
            // {(1), (1)}. Distinct of this bag is still {(1), (1)}.
            // distinct(flatten({(1), (1)})) is (1). However,
            // flatten(distinct({(1), (1)})) is (1), (1)
           
            // in both cases correctness is not affected
            if(successor instanceof LOSort) {
                LOSort sort = (LOSort) successor;
                RequiredFields sortRequiredField = sort.getRequiredFields().get(0);
               
                if(sortRequiredField.getNeedAllFields()) {
                    return false;
                }
               
                List<Pair<Integer, Integer>> sortInputs = sortRequiredField.getFields();
                Set<Integer> requiredInputs = new HashSet<Integer>();
                for(Pair<Integer, Integer> pair: sortInputs) {
                    requiredInputs.add(pair.second);
                }
               
                requiredInputs.retainAll(flattenedColumnSet);
                // the intersection of the sort's required inputs
                // and the flattened columns in the foreach should
                // be null, i.e., the size of required inputs == 0
                if(requiredInputs.size() != 0) {
                    return false;
                }
               
                mSwap = true;
                return true;
            } else if (successor instanceof LOCross
                    || successor instanceof LOJoin) {
               
                List<LogicalOperator> children = mPlan.getSuccessors(successor);
               
                if(children == null || children.size() > 1) {
                    return false;
                }
               
                ProjectionMap succProjectionMap = successor.getProjectionMap();
               
                if(succProjectionMap == null) {
                    return false;
                }
               
View Full Code Here

            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }

        LogicalOperator lo = nodes.get(0);
        if (lo == null || !(lo instanceof LOForEach)) {
            // we should never be called with any other operator class name
            int errCode = 2005;
            String msg = "Expected " + LOForEach.class.getSimpleName()
                    + ", got "
                    + (lo == null ? lo : lo.getClass().getSimpleName());
            throw new OptimizerException(msg, errCode, PigException.INPUT);
        } else {
            return lo;
        }
View Full Code Here

    @Override
    public void transform(List<LogicalOperator> nodes)
            throws OptimizerException {
        try {
            LOForEach foreach = (LOForEach) getOperator(nodes);
            LogicalOperator successor = mPlan.getSuccessors(foreach).get(0);
            if (mSwap) {
                mPlan.swap(successor, foreach);
            } else if (mInsertBetween) {
                // mark the flattened columns as not flattened in the foreach
                // create a new foreach operator that projects each column of the
                // successor. Mark the remapped flattened columns as flattened
                // in the new foreach operator
               
                if(mFlattenedColumnReMap == null) {
                    int errCode = 2153;
                    String msg = "Internal error. The mapping for the flattened columns is empty";
                    throw new OptimizerException(msg, errCode, PigException.BUG);
                }
               
                // set flatten to false for all columns in the mapping
               
                ArrayList<Boolean> flattenList = (ArrayList<Boolean>)foreach.getFlatten();               
                for(Integer key: mFlattenedColumnReMap.keySet()) {
                    flattenList.set(key, false);
                }
               
                // rebuild schemas of the foreach and the successor after the foreach modification
                foreach.regenerateSchema();
                successor.regenerateSchema();
               
                Schema successorSchema = successor.getSchema();
               
                if(successorSchema == null) {
                    int errCode = 2154;
                    String msg = "Internal error. Schema of successor cannot be null for pushing down foreach with flatten.";
                    throw new OptimizerException(msg, errCode, PigException.BUG);
View Full Code Here

    @Override
    public Schema getSchema() throws FrontendException{
        if (!mIsSchemaComputed) {
            // get our parent's schema
            try {
                LogicalOperator input = mPlan.getPredecessors(this).get(0);
                if (null == input) {
                    int errCode = 1006;
                    String msg = "Could not find operator in plan";
                    throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                }
                if (input.getSchema()!=null) {
                    mSchema = Schema.copyAndLink(input.getSchema(), input);
                }
                else
                    mSchema = null;
                mIsSchemaComputed = true;
            } catch (FrontendException fe) {
View Full Code Here

     * @see org.apache.pig.impl.plan.Operator#rewire(org.apache.pig.impl.plan.Operator, org.apache.pig.impl.plan.Operator)
     */
    @Override
    public void rewire(Operator<LOVisitor> oldPred, int oldPredIndex, Operator<LOVisitor> newPred, boolean useOldPred) throws PlanException {
        super.rewire(oldPred, oldPredIndex, newPred, useOldPred);
        LogicalOperator previous = (LogicalOperator) oldPred;
        LogicalOperator current = (LogicalOperator) newPred;
        try {
            ProjectFixerUpper projectFixer = new ProjectFixerUpper(
                    mCondPlan, previous, oldPredIndex, current, useOldPred, this);
            projectFixer.visit();
        } catch (VisitorException ve) {
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.logicalLayer.LogicalOperator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.