Package org.apache.pig.ResourceSchema

Examples of org.apache.pig.ResourceSchema.ResourceFieldSchema


        // get default marshallers and validators
        List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(2)));

        // will become the bag of tuples
        ResourceFieldSchema bagFieldSchema = new ResourceFieldSchema();
        bagFieldSchema.setName("columns");
        bagFieldSchema.setType(DataType.BAG);
        ResourceSchema bagSchema = new ResourceSchema();

        List<ResourceFieldSchema> tupleFields = new ArrayList<ResourceFieldSchema>();

        // default comparator/validator
        ResourceSchema innerTupleSchema = new ResourceSchema();
        ResourceFieldSchema tupleField = new ResourceFieldSchema();
        tupleField.setType(DataType.TUPLE);
        tupleField.setSchema(innerTupleSchema);

        ResourceFieldSchema colSchema = new ResourceFieldSchema();
        colSchema.setName("name");
        colSchema.setType(getPigType(marshallers.get(0)));
        tupleFields.add(colSchema);

        ResourceFieldSchema valSchema = new ResourceFieldSchema();
        AbstractType validator = marshallers.get(1);
        valSchema.setName("value");
        valSchema.setType(getPigType(validator));
        tupleFields.add(valSchema);

        // defined validators/indexes
        for (ColumnDef cdef : cfDef.column_metadata)
        {
            colSchema = new ResourceFieldSchema();
            colSchema.setName(new String(cdef.getName()));
            colSchema.setType(getPigType(marshallers.get(0)));
            tupleFields.add(colSchema);

            valSchema = new ResourceFieldSchema();
            validator = validators.get(cdef.getName());
            if (validator == null)
                validator = marshallers.get(1);
            valSchema.setName("value");
            valSchema.setType(getPigType(validator));
            tupleFields.add(valSchema);
        }
        innerTupleSchema.setFields(tupleFields.toArray(new ResourceFieldSchema[tupleFields.size()]));

        // a bag can contain only one tuple, but that tuple can contain anything
View Full Code Here


  public static ResourceSchema getResourceSchema(HowlSchema howlSchema) throws IOException {

    List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
    for (HowlFieldSchema hfs : howlSchema.getFields()){
      ResourceFieldSchema rfSchema;
      rfSchema = getResourceSchemaFromFieldSchema(hfs);
      rfSchemaList.add(rfSchema);
    }
    ResourceSchema rSchema = new ResourceSchema();
    rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0]));
View Full Code Here

  }

  private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HowlFieldSchema hfs)
      throws IOException {
    ResourceFieldSchema rfSchema;
    // if we are dealing with a bag or tuple column - need to worry about subschema
    if(hfs.getType() == Type.STRUCT) {
        rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(getTupleSubSchema(hfs));
    } else if(hfs.getType() == Type.ARRAY) {
        rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(getBagSubSchema(hfs));
    } else {
      rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(null); // no munging inner-schemas
    }
View Full Code Here

    // in either case the element type of the array is represented in a
    // tuple field schema in the bag's field schema - the second case (struct)
    // more naturally translates to the tuple - in the first case (array<Type>)
    // we simulate the tuple by putting the single field in a tuple
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HowlFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if(arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else {
      ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield")
        .setDescription("The inner field in the tuple in the bag")
        .setType(getPigType(arrayElementFieldSchema))
        .setSchema(null); // the element type is not a tuple - so no subschema
      bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    }
View Full Code Here

        ExpressionOperator from = (ExpressionOperator) logToPhyMap.get(op
                .getExpression());
        physOp.setResultType(op.getType());
        try {
            if (op.getType()==DataType.BAG || op.getType()==DataType.TUPLE) {
                physOp.setFieldSchema(new ResourceFieldSchema(op.getFieldSchema()));
            }
        } catch (FrontendException e) {
            int errCode = 2216;
            String msg = "Cannot get field schema for "+op;
            throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
View Full Code Here

    private BagFactory bagFactory = DefaultBagFactory.getInstance();
    PigStorage ps = new PigStorage();
   

    ResourceFieldSchema getTupleFieldSchema() throws IOException {
        ResourceFieldSchema stringfs = new ResourceFieldSchema();
        stringfs.setType(DataType.CHARARRAY);
        ResourceFieldSchema intfs = new ResourceFieldSchema();
        intfs.setType(DataType.INTEGER);
       
        ResourceSchema tupleSchema = new ResourceSchema();
        tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs});
        ResourceFieldSchema tuplefs = new ResourceFieldSchema();
        tuplefs.setSchema(tupleSchema);
        tuplefs.setType(DataType.TUPLE);
       
        return tuplefs;
    }
View Full Code Here

       
        return tuplefs;
    }
   
    public ResourceFieldSchema getBagFieldSchema() throws IOException{
        ResourceFieldSchema tuplefs = getTupleFieldSchema();
       
        ResourceSchema outBagSchema = new ResourceSchema();
        outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
        ResourceFieldSchema outBagfs = new ResourceFieldSchema();
        outBagfs.setSchema(outBagSchema);
        outBagfs.setType(DataType.BAG);
       
        return outBagfs;
    }
View Full Code Here

       
        return outBagfs;
    }
   
    ResourceFieldSchema getLongFieldSchema() {
        ResourceFieldSchema longfs = new ResourceFieldSchema();
        longfs.setType(DataType.LONG);
        return longfs;
    }
View Full Code Here

        // get default marshallers and validators
        List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(2)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(0)));
        bagvalSchema.setType(getPigType(marshallers.get(1)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        // defined validators/indexes
        for (ColumnDef cdef : cfDef.column_metadata)
        {
            // make a new tuple for each col/val pair
            ResourceSchema innerTupleSchema = new ResourceSchema();
            ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
            innerTupleField.setType(DataType.TUPLE);
            innerTupleField.setSchema(innerTupleSchema);
            innerTupleField.setName(new String(cdef.getName()));

            ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
            idxColSchema.setName("name");
            idxColSchema.setType(getPigType(marshallers.get(0)));

            ResourceFieldSchema valSchema = new ResourceFieldSchema();
            AbstractType validator = validators.get(cdef.name);
            if (validator == null)
                validator = marshallers.get(1);
            valSchema.setName("value");
            valSchema.setType(getPigType(validator));

            innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
            allSchemaFields.add(innerTupleField);
        }
        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(1);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

        BufferedReader br = new BufferedReader(new FileReader(outputFileName));
        for(String line=br.readLine();line!=null;line=br.readLine()){
            String[] flds = line.split(":",-1);
            Tuple t = new DefaultTuple();
           
            ResourceFieldSchema bagfs = GenRandomData.getSmallTupDataBagFieldSchema();
            ResourceFieldSchema tuplefs = GenRandomData.getSmallTupleFieldSchema();
           
            t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema.ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.