Package org.apache.pig.impl.logicalLayer.schema

Examples of org.apache.pig.impl.logicalLayer.schema.Schema


    }
   
    @Override
    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
        Schema s = new Schema();
        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
        s.add(new Schema.FieldSchema(null, DataType.INTEGER));
        funcList.add(new FuncSpec(this.getClass().getName(), s));
        return funcList;
    }
View Full Code Here


        }
    }

    @Override
    public Schema outputSchema(Schema input) {
        return new Schema(new Schema.FieldSchema(null, DataType.LONG));
    }
View Full Code Here

  for (int i = 0; i < cols.size(); i++) {
      fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
        .findPigDataType(types.get(i))));
  }

  pigSchema = new ResourceSchema(new Schema(fieldSchemaList));

  props = new Properties();

  // setting table schema properties for ColumnarSerDe
  // these properties are never changed by the columns to read filter,
View Full Code Here

             * compute the multimaps <group_column_number, alias> and <group_column_number, operator>
             * and <alias, expression_operator>
             * Also set the lookup table for each alias to false
             */

            Schema groupBySchema = null;
            List<Schema.FieldSchema> groupByFss = new ArrayList<Schema.FieldSchema>();
            Map<String, Boolean> aliasLookup = new HashMap<String, Boolean>();
            MultiMap<String, ExpressionOperator> aliasExop = new MultiMap<String, ExpressionOperator>();
            MultiMap<Integer, String> positionAlias = new MultiMap<Integer, String>();
            MultiMap<Integer, ExpressionOperator> positionOperators = new MultiMap<Integer, ExpressionOperator>();
           
            for (LogicalOperator op : inputs) {
                int position = 0;
                for(LogicalPlan plan: mGroupByPlans.get(op)) {
                    for(LogicalOperator eOp: plan.getLeaves()) {
                        Schema.FieldSchema fs = ((ExpressionOperator)eOp).getFieldSchema();
                        if (null != fs) {
                            String alias = fs.alias;
                            if(null != alias) {
                                aliasLookup.put(alias, false);
                                aliasExop.put(alias, (ExpressionOperator)eOp);                           
                                positionAlias.put(position, alias);
                            }
                            //store the operators for each position in the group
                        } else {
                            log.warn("Field Schema of an expression operator cannot be null");
                        }
                        positionOperators.put(position, (ExpressionOperator)eOp);
                    }
                    ++position;
                }
            }
           
            /*
             * Now that the multi maps and the look up table are computed, do the following:
             * for each column in the group, in order check if the alias is alaready used or not
             * If the alias is already used, check for the next unused alias.
             * IF none of the aliases can be used then the alias of that column is null
             * If an alias is found usable, then use that alias and the schema of the expression operator
             * corresponding to that position. Note that the first operator for that position is
             * picked. The type checker will ensure that the correct schema is merged
             */
            int arity = mGroupByPlans.get(inputs.get(0)).size();
            for (int i = 0; i < arity; ++i) {
                Schema.FieldSchema groupByFs;
                Collection<String> cAliases = positionAlias.get(i);
                if(null != cAliases) {
                    Object[] aliases = cAliases.toArray();
                    for(int j = 0; j < aliases.length; ++j) {
                        String alias = (String) aliases[j];
                        if(null != alias) {
                            //Collection<ExpressionOperator> cEops = aliasExop.get(alias);
                            Collection<ExpressionOperator> cEops = positionOperators.get(i);
                            if(null != cEops) {
                                ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0];
                                if(null != eOp) {
                                    if(!aliasLookup.get(alias)) {
                                        Schema.FieldSchema fs = eOp.getFieldSchema();
                                        if(null != fs) {
                                            groupByFs = new Schema.FieldSchema(alias, fs.schema, fs.type);
                                            groupByFss.add(groupByFs);
                                            aliasLookup.put(alias, true);
                                        } else {
                                            groupByFs = new Schema.FieldSchema(alias, null, DataType.BYTEARRAY);
                                            groupByFss.add(groupByFs);
                                        }
                                        setFieldSchemaParent(groupByFs, positionOperators, i);
                                        break;
                                    } else {
                                        if((j + 1) < aliases.length) {
                                            continue;
                                        } else {
                                            //we have seen this alias before
                                            //just add the schema of the expression operator with the null alias
                                            Schema.FieldSchema fs = eOp.getFieldSchema();
                                            if(null != fs) {
                                                groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type);
                                                groupByFss.add(groupByFs);
                                                for(ExpressionOperator op: cEops) {
                                                    Schema.FieldSchema opFs = op.getFieldSchema();
                                                    if(null != opFs) {
                                                        groupByFs.setParent(opFs.canonicalName, eOp);
                                                    } else {
                                                        groupByFs.setParent(null, eOp);
                                                    }
                                                }
                                            } else {
                                                groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY);
                                                groupByFss.add(groupByFs);
                                            }
                                            setFieldSchemaParent(groupByFs, positionOperators, i);
                                            break;
                                        }
                                    }
                                } else {
                                    //should not be here
                                    log.debug("Cannot be here: we cannot have a collection of null expression operators");
                                }
                            } else {
                                //should not be here
                                log.debug("Cannot be here: we should have an expression operator at each position");
                            }
                        } else {
                            //should not be here
                            log.debug("Cannot be here: we cannot have a collection of null aliases ");
                        }
                    }
                } else {
                    //We do not have any alias for this position in the group by columns
                    //We have positions $1, $2, etc.
                    Collection<ExpressionOperator> cEops = positionOperators.get(i);
                    if(null != cEops) {
                        ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0];
                        if(null != eOp) {
                            Schema.FieldSchema fs = eOp.getFieldSchema();
                            if(null != fs) {
                                groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type);
                                groupByFss.add(groupByFs);
                            } else {
                                groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY);
                                groupByFss.add(groupByFs);
                            }
                        } else {
                            groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                            groupByFss.add(groupByFs);
                        }
                    } else {
                        groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                        groupByFss.add(groupByFs);
                    }
                    setFieldSchemaParent(groupByFs, positionOperators, i);
                }
            }           

            groupBySchema = new Schema(groupByFss);

            if(1 == arity) {
                byte groupByType = getAtomicGroupByType();
                Schema groupSchema = groupByFss.get(0).schema;
                Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", groupSchema, groupByType);
                setFieldSchemaParent(groupByFs, positionOperators, 0);
                fss.add(groupByFs);
            } else {
                Schema mergedGroupSchema = getTupleGroupBySchema();
                if(mergedGroupSchema.size() != groupBySchema.size()) {
                    mSchema = null;
                    mIsSchemaComputed = false;
                    int errCode = 2000;
                    String msg = "Internal error. Mismatch in group by arities. Expected: " + mergedGroupSchema + ". Found: " + groupBySchema;
                    throw new FrontendException(msg, errCode, PigException.BUG, false, null);
                } else {
                    for(int i = 0; i < mergedGroupSchema.size(); ++i) {
                        Schema.FieldSchema mergedFs = mergedGroupSchema.getField(i);
                        Schema.FieldSchema groupFs = groupBySchema.getField(i);
                        mergedFs.alias = groupFs.alias;
                        mergedGroupSchema.addAlias(mergedFs.alias, mergedFs);
                    }
                }
               
                Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", mergedGroupSchema);
                fss.add(groupByFs);
                for(int i = 0; i < arity; ++i) {
                    setFieldSchemaParent(groupByFs, positionOperators, i);
                }
            }
            for (LogicalOperator op : inputs) {
                try {
                    Schema.FieldSchema bagFs = new Schema.FieldSchema(op.getAlias(),
                            op.getSchema(), DataType.BAG);
                    fss.add(bagFs);
                    setFieldSchemaParent(bagFs, op);
                } catch (FrontendException ioe) {
                    mIsSchemaComputed = false;
                    mSchema = null;
                    throw ioe;
                }
            }
            mIsSchemaComputed = true;
            mSchema = new Schema(fss);
            mType = DataType.BAG;//mType is from the super class
        }
        return mSchema;
    }
View Full Code Here

                throw new FrontendException(msg, errCode, PigException.INPUT, false, null);               
            }

        }

        return new Schema(fsList) ;
    }
View Full Code Here

            }
        }
    }

    private void setFieldSchemaParent(Schema.FieldSchema fs, LogicalOperator op) throws FrontendException {
        Schema s = op.getSchema();
        if(null != s) {
            for(Schema.FieldSchema inputFs: s.getFields()) {
                if(null != inputFs) {
                    fs.setParent(inputFs.canonicalName, op);
                } else {
                    fs.setParent(null, op);
                }
View Full Code Here

    public ProjectionMap getProjectionMap() {
       
        if(mIsProjectionMapComputed) return mProjectionMap;
        mIsProjectionMapComputed = true;
       
        Schema outputSchema;
       
        try {
            outputSchema = getSchema();
        } catch (FrontendException fee) {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        if(outputSchema == null) {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this);
        if(predecessors == null) {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        //the column with the alias 'group' can be mapped in several ways
        //1. group A by $0;
        //Here the mapping is 0 -> (0, 0)
        //2. group A by ($0, $1);
        //Here there is no direct mapping and 'group' is an added column
        //3. cogroup A by $0, B by $0;
        //Here the mapping is 0 -> ((0, 0), (1, 0))
        //4. cogroup A by ($0, $1), B by ($0, $1);
        //Here there is no direct mapping and 'group' is an added column
        //For anything other than a simple project 'group' is an added column
       
        MultiMap<LogicalOperator, LogicalPlan> groupByPlans = getGroupByPlans();
       
        boolean groupByAdded = false;
        MultiMap<Integer, ProjectionMap.Column> mapFields = new MultiMap<Integer, ProjectionMap.Column>();
        List<Pair<Integer, Integer>> removedFields = new ArrayList<Pair<Integer, Integer>>();
       
        for(int inputNum = 0; (inputNum < predecessors.size()) && (!groupByAdded); ++inputNum) {
            LogicalOperator predecessor = predecessors.get(inputNum);

            List<LogicalPlan> predecessorPlans = (ArrayList<LogicalPlan>) groupByPlans.get(predecessor);

            int inputColumn = -1;
            for(LogicalPlan predecessorPlan: predecessorPlans) {               
                List<LogicalOperator> leaves = predecessorPlan.getLeaves();
                if(leaves == null || leaves.size() > 1) {
                    groupByAdded = true;
                    break;
                }
               
                if(leaves.get(0) instanceof LOProject) {
                    //find out if this project is a chain of projects
                    Pair<LOProject, LOCast> pair = LogicalPlan.chainOfProjects(predecessorPlan);
                    if (pair != null) {
                        LOProject topProject = pair.first;
                        if (topProject != null) {
                            inputColumn = topProject.getCol();
                            LOCast cast = pair.second;
                            if (cast != null) {
                                mapFields.put(0,
                                        new ProjectionMap.Column(
                                                new Pair<Integer, Integer>(inputNum, inputColumn), true, cast.getType()
                                        )
                                );
                            } else {
                                mapFields.put(0, new ProjectionMap.Column(new Pair<Integer, Integer>(inputNum, inputColumn)));
                            }
                        }
                    }
                } else {
                    groupByAdded = true;
                }               
            }
           
            Schema inputSchema;           
            try {
                inputSchema = predecessor.getSchema();
            } catch (FrontendException fee) {
                mProjectionMap = null;
                return mProjectionMap;
            }
           
            if(inputSchema != null) {
                for(int column = 0; column < inputSchema.size(); ++column) {
                    if(!groupByAdded && inputColumn != column) {
                        removedFields.add(new Pair<Integer, Integer>(inputNum, column));
                    }
                }
            }
View Full Code Here

        }
    }

    @Override
    public Schema outputSchema(Schema input) {
        return new Schema(new Schema.FieldSchema(null, DataType.FLOAT));
    }
View Full Code Here

            for(LogicalOperator op: fifo) {
                if(!(op instanceof LOProject) && nestedAlias.equalsIgnoreCase(op.mAlias)) {
                    found = true;
                    // Expression operators do not have any schema
                    if(op instanceof RelationalOperator) {
                        Schema nestedSc = op.getSchema();
                        if(nestedSc == null) {
                            System.out.println("Schema for "+ alias+ "::" + nestedAlias + " unknown.");
                        } else {
                            System.out.println(alias+ "::" + nestedAlias + ": " + nestedSc.toString());
                        }
                        return nestedSc;
                    }
                    else {
                        int errCode = 1113;
View Full Code Here

        }
       
        if(mUserDefinedSchema != null) {
            forEachClone.mUserDefinedSchema = new ArrayList<Schema>();
            for (Iterator<Schema> it = mUserDefinedSchema.iterator(); it.hasNext();) {
                Schema s = it.next();
                forEachClone.mUserDefinedSchema.add(s != null ? s.clone() : null);
            }
        }
        return forEachClone;
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.logicalLayer.schema.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.