Package org.apache.pig.ResourceSchema

Examples of org.apache.pig.ResourceSchema.ResourceFieldSchema


    @Test
    public void testGetBagSubSchema() throws Exception {

        // Define the expected schema.
        ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
        bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
            .setDescription("The tuple in the bag").setType(DataType.TUPLE);

        ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
        innerTupleFieldSchemas[0] =
            new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);

        bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
        ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);

        // Get the actual converted schema.
View Full Code Here


        System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple");
        UDFContext.getUDFContext().setClientSystemProps();

        // Define the expected schema.
        ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
        bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t")
            .setDescription("The tuple in the bag").setType(DataType.TUPLE);

        ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
        innerTupleFieldSchemas[0] =
            new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY);

        bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
        ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);

        // Get the actual converted schema.
View Full Code Here

    public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {

        List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
        for (HCatFieldSchema hfs : hcatSchema.getFields()) {
            ResourceFieldSchema rfSchema;
            rfSchema = getResourceSchemaFromFieldSchema(hfs);
            rfSchemaList.add(rfSchema);
        }
        ResourceSchema rSchema = new ResourceSchema();
        rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0]));
View Full Code Here

    }

    private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs)
        throws IOException {
        ResourceFieldSchema rfSchema;
        // if we are dealing with a bag or tuple column - need to worry about subschema
        if (hfs.getType() == Type.STRUCT) {
            rfSchema = new ResourceFieldSchema()
                .setName(hfs.getName())
                .setDescription(hfs.getComment())
                .setType(getPigType(hfs))
                .setSchema(getTupleSubSchema(hfs));
        } else if (hfs.getType() == Type.ARRAY) {
            rfSchema = new ResourceFieldSchema()
                .setName(hfs.getName())
                .setDescription(hfs.getComment())
                .setType(getPigType(hfs))
                .setSchema(getBagSubSchema(hfs));
        } else {
            rfSchema = new ResourceFieldSchema()
                .setName(hfs.getName())
                .setDescription(hfs.getComment())
                .setType(getPigType(hfs))
                .setSchema(null); // no munging inner-schemas
        }
View Full Code Here

            innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)
                .replaceAll("FIELDNAME", hfs.getName());
        }

        ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
        bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
            .setDescription("The tuple in the bag")
            .setType(DataType.TUPLE);
        HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
        if (arrayElementFieldSchema.getType() == Type.STRUCT) {
            bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
        } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
            ResourceSchema s = new ResourceSchema();
            List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
            s.setFields(lrfs.toArray(new ResourceFieldSchema[0]));
            bagSubFieldSchemas[0].setSchema(s);
        } else {
            ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
            innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName)
                .setDescription("The inner field in the tuple in the bag")
                .setType(getPigType(arrayElementFieldSchema))
                .setSchema(null); // the element type is not a tuple - so no subschema
            bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
        }
View Full Code Here

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();

        for (ColumnDef cdef : cfDef.column_metadata)
        {
            ResourceFieldSchema valSchema = new ResourceFieldSchema();
            AbstractType validator = validators.get(cdef.name);
            if (validator == null)
                validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
            valSchema.setName(new String(cdef.getName()));
            valSchema.setType(getPigType(validator));
            allSchemaFields.add(valSchema);
        }

        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

        // get default marshallers and validators
        Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));
        bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        if (!widerows && (cfInfo.compactCqlTable || !cfInfo.cql3Table))
        {
            // defined validators/indexes
            for (ColumnDef cdef : cfDef.column_metadata)
            {
                // make a new tuple for each col/val pair
                ResourceSchema innerTupleSchema = new ResourceSchema();
                ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
                innerTupleField.setType(DataType.TUPLE);
                innerTupleField.setSchema(innerTupleSchema);
                innerTupleField.setName(new String(cdef.getName()));

                ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
                idxColSchema.setName("name");
                idxColSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));

                ResourceFieldSchema valSchema = new ResourceFieldSchema();
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                valSchema.setName("value");
                valSchema.setType(getPigType(validator));

                innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
                allSchemaFields.add(innerTupleField);
            }  
        }

        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

     * @throws IOException
     */
    static public void checkQueryOutputsAfterSortRecursive(Iterator<Tuple> actualResultsIt,
            String[] expectedResArray, LogicalSchema schema) throws IOException {
        LogicalFieldSchema fs = new LogicalFieldSchema("tuple", schema, DataType.TUPLE);
        ResourceFieldSchema rfs = new ResourceFieldSchema(fs);

        LoadCaster caster = new Utf8StorageConverter();
        List<Tuple> actualResList = new ArrayList<Tuple>();
        while(actualResultsIt.hasNext()){
            actualResList.add(actualResultsIt.next());
View Full Code Here

    /** wrap a pig schema as tuple */
    public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException {
        ResourceSchema listSchema = new ResourceSchema();
        listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema });

        ResourceFieldSchema tupleWrapper = new ResourceFieldSchema();
        tupleWrapper.setType(DataType.TUPLE);
        tupleWrapper.setName(PIG_TUPLE_WRAPPER);
        tupleWrapper.setSchema(listSchema);

        return tupleWrapper;
    }
View Full Code Here

     * @param fieldName
     * @return ResourceSchema
     */
    public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) {
        ResourceFieldSchema[] fieldSchemas = schema.getFields();
        ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY));
        ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1];
        fieldSchemasWithSourceTag[0] = sourceTagSchema;
        for(int j = 0; j < fieldSchemas.length; j++) {
            fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j];
        }
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema.ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.