Package org.apache.pig.impl.logicalLayer.schema

Examples of org.apache.pig.impl.logicalLayer.schema.Schema


     * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
     */
    @Override
    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
        Schema s = new Schema();
        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
        s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
        funcList.add(new FuncSpec(this.getClass().getName(), s));
        return funcList;
    }
View Full Code Here


        }
    }

    @Override
    public Schema outputSchema(Schema input) {
        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
    }
View Full Code Here

        }
    }

    @Override
    public Schema outputSchema(Schema input) {
        return new Schema(new Schema.FieldSchema(null, DataType.LONG));
    }
View Full Code Here

               
                // rebuild schemas of the foreach and the successor after the foreach modification
                foreach.regenerateSchema();
                successor.regenerateSchema();
               
                Schema successorSchema = successor.getSchema();
               
                if(successorSchema == null) {
                    int errCode = 2154;
                    String msg = "Internal error. Schema of successor cannot be null for pushing down foreach with flatten.";
                    throw new OptimizerException(msg, errCode, PigException.BUG);
                }
               
                flattenList = new ArrayList<Boolean>();
               
                ArrayList<LogicalPlan> foreachInnerPlans = new ArrayList<LogicalPlan>();
               
                for(int i = 0; i < successorSchema.size(); ++i) {
                    LogicalPlan innerPlan = new LogicalPlan();
                    LOProject project = new LOProject(innerPlan, OperatorKey
                            .genOpKey(foreach.getOperatorKey().scope),
                            successor, i);
                    innerPlan.add(project);
View Full Code Here

                }
               
                try {
                  planFs = ((ExpressionOperator)op).getFieldSchema();
                    log.debug("planFs: " + planFs);
                    Schema userDefinedSchema = null;
                    if(null != mUserDefinedSchema) {
                        userDefinedSchema = mUserDefinedSchema.get(planCtr);
                    }
          if(null != planFs) {
            String outerCanonicalAlias = op.getAlias();
            if(null == outerCanonicalAlias) {
              outerCanonicalAlias = planFs.alias;
            }
            log.debug("Outer canonical alias: " + outerCanonicalAlias);
            if(mFlatten.get(planCtr)) {
              //need to extract the children and create the aliases
              //assumption here is that flatten is only for one column
              //i.e., flatten(A), flatten(A.x) and NOT
              //flatten(B.(x,y,z))
              Schema s = planFs.schema;
              if(null != s && s.isTwoLevelAccessRequired()) {
                  // this is the case where the schema is that of
                      // a bag which has just one tuple fieldschema which
                      // in turn has a list of fieldschemas. The schema
                  // after flattening would consist of the fieldSchemas
                  // present in the tuple
                     
                      // check that indeed we only have one field schema
                      // which is that of a tuple
                      if(s.getFields().size() != 1) {
                          int errCode = 1008;
                          String msg = "Expected a bag schema with a single " +
                                    "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                                    " but got a bag schema with multiple elements.";
                          throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                      }
                      Schema.FieldSchema tupleFS = s.getField(0);
                      if(tupleFS.type != DataType.TUPLE) {
                          int errCode = 1009;
                          String msg = "Expected a bag schema with a single " +
                                    "element of type "+ DataType.findTypeName(DataType.TUPLE) +
                                    " but got an element of type " +
                                    DataType.findTypeName(tupleFS.type);
                          throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                      }
                      s = tupleFS.schema;
                  }
                if(null != s && s.size()!=0) {
                    for(int i = 0; i < s.size(); ++i) {
                                    Schema.FieldSchema fs;
                                    fs = Schema.FieldSchema.copyAndLink(s.getField(i), op);
                  log.debug("fs: " + fs);
                                    if(null != userDefinedSchema) {
                                        Schema.FieldSchema userDefinedFieldSchema;
                                        try {
                                            if(i < userDefinedSchema.size()) {
                                                userDefinedFieldSchema = userDefinedSchema.getField(i);
                                                fs = fs.mergePrefixFieldSchema(userDefinedFieldSchema);
                                            }
                                        } catch (SchemaMergeException sme) {
                                            int errCode = 1016;
                                            String msg = "Problems in merging user defined schema";
                                            throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
                                        }
                                        outerCanonicalAlias = null;
                                    }
                  String innerCanonicalAlias = fs.alias;
                                    Schema.FieldSchema newFs;
                  if((null != outerCanonicalAlias) && (null != innerCanonicalAlias)) {
                    String disambiguatorAlias = outerCanonicalAlias + "::" + innerCanonicalAlias;
                    newFs = new Schema.FieldSchema(disambiguatorAlias, fs.schema, fs.type);
                                        newFs.setParent(s.getField(i).canonicalName, op);
                                        fss.add(newFs);
                                        mSchemaPlanMapping.add(plan);
                                        updateAliasCount(aliases, disambiguatorAlias);
                                        //it's fine if there are duplicates
                    //we just need to record if its due to
                    //flattening
                  } else {
                    newFs = new Schema.FieldSchema(fs);
                                        newFs.setParent(s.getField(i).canonicalName, op);
                    fss.add(newFs);
                    mSchemaPlanMapping.add(plan);
                  }
                                    updateAliasCount(aliases, innerCanonicalAlias);
                  flattenAlias.put(newFs, innerCanonicalAlias);
                  inverseFlattenAlias.put(innerCanonicalAlias, true);
                }
              } else {
                                Schema.FieldSchema newFs;
                                if(null != userDefinedSchema) {
                                    if(!DataType.isSchemaType(planFs.type)) {
                                        if(userDefinedSchema.size() > 1) {
                                            int errCode = 1017;
                                            String msg = "Schema mismatch. A basic type on flattening cannot have more than one column. User defined schema: " + userDefinedSchema;
                                            throw new FrontendException(msg, errCode, PigException.INPUT, false, null);
                                        }
                        newFs = new Schema.FieldSchema(null, planFs.type);
                                        try {
                                            newFs = newFs.mergePrefixFieldSchema(userDefinedSchema.getField(0));
                                        } catch (SchemaMergeException sme) {
                                            int errCode = 1016;
                                            String msg = "Problems in merging user defined schema";
                                            throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
                                        }
                                        updateAliasCount(aliases, newFs.alias);
                                        fss.add(newFs);
                                        mSchemaPlanMapping.add(plan);
                                        newFs.setParent(planFs.canonicalName, op);
                                    } else {
                                        for(Schema.FieldSchema ufs: userDefinedSchema.getFields()) {
                                            Schema.FieldSchema.setFieldSchemaDefaultType(ufs, DataType.BYTEARRAY);
                                            newFs = new Schema.FieldSchema(ufs);
                                            fss.add(newFs);
                                            mSchemaPlanMapping.add(plan);
                                            newFs.setParent(null, op);
                                            updateAliasCount(aliases, ufs.alias);
                                        }
                                    }
                } else {
                                    if(!DataType.isSchemaType(planFs.type)) {
                        newFs = new Schema.FieldSchema(planFs.alias, planFs.type);
                                    } else {
                        newFs = new Schema.FieldSchema(null, DataType.BYTEARRAY);
                                    }
                                    fss.add(newFs);
                                    mSchemaPlanMapping.add(plan);
                                    newFs.setParent( planFs.canonicalName, op );
                                }
              }
            } else {
              //just populate the schema with the field schema of the expression operator
                            //check if the user has defined a schema for the operator; compare the schema
                            //with that of the expression operator field schema and then add it to the list
                            Schema.FieldSchema newFs = Schema.FieldSchema.copyAndLink(planFs, op);
                            if(null != userDefinedSchema) {
                                try {
                                    newFs = newFs.mergePrefixFieldSchema(userDefinedSchema.getField(0));
                                    updateAliasCount(aliases, newFs.alias);
                                } catch (SchemaMergeException sme) {
                                    int errCode = 1016;
                                    String msg = "Problems in merging user defined schema";
                                    throw new FrontendException(msg, errCode, PigException.INPUT, false, null, sme);
                                }
                            }
                            newFs.setParent(planFs.canonicalName, op);
                            fss.add(newFs);
                            mSchemaPlanMapping.add(plan);
            }
          } else {
            //did not get a valid list of field schemas
                        String outerCanonicalAlias = null;
                        if(null != userDefinedSchema) {
                            Schema.FieldSchema userDefinedFieldSchema = new Schema.FieldSchema(userDefinedSchema.getField(0));
                            fss.add(userDefinedFieldSchema);
                            mSchemaPlanMapping.add(plan);
                            userDefinedFieldSchema.setParent(null, op);
                            updateAliasCount(aliases, userDefinedFieldSchema.alias);
                        } else {
                            mSchema = null;
                            mIsSchemaComputed = true;
                            return mSchema;
                        }
          }
                } catch (FrontendException fee) {
                    mSchema = null;
                    mIsSchemaComputed = false;
                    throw fee;
                }
            }
      //check for duplicate column names and throw an error if there are duplicates
      //ensure that flatten gets rid of duplicate column names when the checks are
      //being done
      log.debug(" flattenAlias: " + flattenAlias);
      log.debug(" inverseFlattenAlias: " + inverseFlattenAlias);
      log.debug(" aliases: " + aliases);
      log.debug(" fss.size: " + fss.size());
      boolean duplicates = false;
      Map<String, Integer> duplicateAliases = new HashMap<String, Integer>();
      for(Map.Entry<String, Integer> e: aliases.entrySet()) {
        Integer count = e.getValue();
        if(count > 1) {//not checking for null here as counts are intitalized to 1
          Boolean inFlatten = false;
          log.debug("inFlatten: " + inFlatten + " inverseFlattenAlias: " + inverseFlattenAlias);
          inFlatten = inverseFlattenAlias.get(e.getKey());
          log.debug("inFlatten: " + inFlatten + " inverseFlattenAlias: " + inverseFlattenAlias);
          if((null == inFlatten) || (!inFlatten)) {
            duplicates = true;
            duplicateAliases.put(e.getKey(), count);
          }
        }
      }
      if(duplicates) {
        String errMessage;
                StringBuffer sb = new StringBuffer("Found duplicates in schema. ");
        if(duplicateAliases.size() > 0) {
          Set<Map.Entry<String, Integer>> es = duplicateAliases.entrySet();
          Iterator<Map.Entry<String, Integer>> iter = es.iterator();
          Map.Entry<String, Integer> e = iter.next();
                    sb.append(": ");
                    sb.append(e.getValue());
                    sb.append(" columns");
          while(iter.hasNext()) {
            e = iter.next();
                        sb.append(", ");
                        sb.append(e.getKey());
                        sb.append(": ");
                        sb.append(e.getValue());
                        sb.append(" columns");
          }
        }
                sb.append(". Please alias the columns with unique names.");
        errMessage = sb.toString();
        log.debug(errMessage);
        int errCode = 1007;
        throw new FrontendException(errMessage, errCode, PigException.INPUT, false, null);
      }
            mSchema = new Schema(fss);
      //add the aliases that are unique after flattening
            for(int i=0;i<mSchema.getFields().size();i++) {
                Schema.FieldSchema fs = mSchema.getFields().get(i);
        String alias = flattenAlias.get(fs);
        Integer count = aliases.get(alias);
View Full Code Here

            new LogicalPlanValidationExecutor(mPlan, pigContext, isBeforeOptimizer);
        validator.validate(mPlan, collector);
        List<LogicalOperator> preds = mPlan.getPredecessors(loUnion);

        //create the merged schema
        Schema mergedSchema ;
        try {
            mergedSchema = loUnion.getSchema();
        }catch(FrontendException e)                 {
            String msg = "Error creating merged schemas for union-onschema operator : "
                + e.getMessage();
            throw new UnionOnSchemaSetException(msg, 1116, PigException.INPUT, e);
        }


        //create a user defined schema list for use in LOForeach
        // using merged schema
        ArrayList<Schema> mergedSchemaList = new ArrayList<Schema>();
        for(Schema.FieldSchema fs : mergedSchema.getFields()){
            // Use NULL datatype because the type will be set by the TypeChecking
            // visitors
            mergedSchemaList.add(
                    new Schema(new Schema.FieldSchema(fs.alias, DataType.NULL))
            );
        }

        // add a foreach for inputs that don't match mergedSchema, projecting
        // null for columns that don't exist in the input
        for(LogicalOperator lop : preds)                
        {                    
            try {
                if(! lop.getSchema().equals(mergedSchema))
                {
                    //the mergedSchema is different from this operators schema
                    // so add a foreach to project columns appropriately
                    int mergeSchSz = mergedSchema.size();
                    ArrayList<LogicalPlan> generatePlans =
                        new ArrayList<LogicalPlan>(mergeSchSz);
                    ArrayList<Boolean> flattenList =
                        new ArrayList<Boolean>(mergeSchSz);

                    String scope = loUnion.getOperatorKey().getScope();
                    for(Schema.FieldSchema fs : mergedSchema.getFields()) {
                        LogicalPlan projectPlan = new LogicalPlan();
                        Schema inpSchema = lop.getSchema();
                        flattenList.add(Boolean.FALSE);

                        int inpPos = inpSchema.getPositionSubName(fs.alias);

                        LogicalOperator columnProj = null;
                        boolean isCastNeeded = false;
                        if(inpPos == -1){  
                            //the column is not present in schema of this input,
                            // so project null
                            columnProj =
                                new LOConst(mPlan, getNextId(scope), null);
                            // cast is necessary if the type in schema is
                            // not a BYTEARRAY
                            if(fs.type != DataType.BYTEARRAY){
                                isCastNeeded = true;
                            }
                        }else {
                            //project the column from input
                            columnProj =
                                new LOProject(projectPlan,
                                        new OperatorKey(
                                                scope,
                                                NodeIdGenerator.getGenerator().getNextNodeId(scope)
                                        ),
                                        lop, inpPos
                                );

                            //cast is needed if types are different.   
                            //compatibility of types has already been checked
                            //during creation of mergedSchema
                            Schema.FieldSchema inpFs = inpSchema.getFieldSubNameMatch(fs.alias);
                            if(inpFs.type != fs.type)
                                isCastNeeded = true;
                        }
                        projectPlan.add(columnProj);
View Full Code Here

    /**
     * @throws FrontendException
     *
     */
    private void setupColNameMaps() throws FrontendException {
        Schema loadFuncSchema = loLoad.getDeterminedSchema();
        Schema loLoadSchema = loLoad.getSchema();
        for(int i = 0; i < loadFuncSchema.size(); i++) {
            colNameMap.put(loadFuncSchema.getField(i).alias,
                    (i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                        loadFuncSchema.getField(i).alias));
           
            reverseColNameMap.put((i < loLoadSchema.size() ? loLoadSchema.getField(i).alias :
                        loadFuncSchema.getField(i).alias),
                        loadFuncSchema.getField(i).alias);
        }
    }
View Full Code Here

    public static Schema translateSchema(LogicalSchema schema) {      
        if (schema == null) {
            return null;
        }
       
        Schema s2 = new Schema();
        List<LogicalSchema.LogicalFieldSchema> ll = schema.getFields();
        for (LogicalSchema.LogicalFieldSchema f: ll) {
            Schema.FieldSchema f2 = null;
            try {
                f2 = new Schema.FieldSchema(f.alias, translateSchema(f.schema), f.type);
                f2.canonicalName = ((Long)f.uid).toString();
                s2.add(f2);
            } catch (FrontendException e) {
            }
        }
       
        s2.setTwoLevelAccessRequired(schema.isTwoLevelAccessRequired());
       
        return s2;
    }
View Full Code Here

    public ProjectionMap getProjectionMap() {
       
        if(mIsProjectionMapComputed) return mProjectionMap;
        mIsProjectionMapComputed = true;
       
        Schema outputSchema;
       
        try {
            outputSchema = getSchema();
        } catch (FrontendException fee) {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        if(outputSchema == null) {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        Schema inputSchema = null;       
       
        List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this);
        if(predecessors != null) {
            try {
                inputSchema = predecessors.get(0).getSchema();
            } catch (FrontendException fee) {
                mProjectionMap = null;
                return mProjectionMap;
            }
        } else {
            mProjectionMap = null;
            return mProjectionMap;
        }
       
        List<Integer> addedFields = new ArrayList<Integer>();
        List<Pair<Integer, Integer>> removedFields = new ArrayList<Pair<Integer, Integer>>();
       
        for(int i = 0; i < outputSchema.size(); ++i) {
            //add all the elements of the output schema to the added fields
            addedFields.add(i);
        }
       
        if(inputSchema != null) {
            //add all the elements of the input schema to the removed fields
            for(int i = 0; i < inputSchema.size(); ++i) {
                removedFields.add(new Pair<Integer, Integer>(0, i));
            }
        }
        mProjectionMap = new ProjectionMap(null, (removedFields.size() == 0? null: removedFields), addedFields);
        return mProjectionMap;
View Full Code Here

       
        return s2;
    }
   
    public static Schema.FieldSchema translateFieldSchema(LogicalSchema.LogicalFieldSchema fs) {     
        Schema newSchema = null;
        if (fs.schema!=null) {
            newSchema = translateSchema(fs.schema);
        }
       
        Schema.FieldSchema newFs = null;
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.logicalLayer.schema.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.