Package org.apache.pig.ResourceSchema

Examples of org.apache.pig.ResourceSchema.ResourceFieldSchema


    }
   
    @Test
    public  void testBytesToBag() throws IOException
    {
        ResourceFieldSchema fs = GenRandomData.getFullTupTextDataBagFieldSchema();
       
        for (int i = 0; i < MAX; i++) {
            DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100);
            DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
            assertTrue(TestHelper.bagEquals(b, convertedBag));
View Full Code Here


    }
   
    @Test
    public void testBytesToBagWithConversion() throws IOException {
        DataBag b = GenRandomData.genFloatDataBag(r,5,100);
        ResourceFieldSchema fs = GenRandomData.getFloatDataBagFieldSchema(5);
        DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);
       
        Iterator<Tuple> iter1 = b.iterator();
        Iterator<Tuple> iter2 = convertedBag.iterator();
        for (int i=0;i<100;i++) {
View Full Code Here

   
    @Test
    public void testBytesToTupleWithConversion() throws IOException {
        for (int i=0;i<100;i++) {
            Tuple t = GenRandomData.genMixedTupleToConvert(r);
            ResourceFieldSchema fs = GenRandomData.getMixedTupleToConvertFieldSchema();
            Tuple convertedTuple = ps.getLoadCaster().bytesToTuple(t.toString().getBytes(), fs);
           
            assertTrue(convertedTuple.get(0) instanceof String);
            assertTrue(convertedTuple.get(0).equals(((Integer)t.get(0)).toString()));           
           
View Full Code Here

    }
   
    public void testBytesToComplexTypeMisc() throws IOException, ParseException {
        String s = "(a,b";
        Schema schema = Utils.getSchemaFromString("t:tuple(a:chararray, b:chararray)");
        ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
        Tuple t = ps.getLoadCaster().bytesToTuple(s.getBytes(), rfs);
        assertTrue(t==null);
       
        s = "{(a,b}";
        schema = Utils.getSchemaFromString("b:bag{t:tuple(a:chararray, b:chararray)}");
View Full Code Here

        // get default marshallers and validators
        Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));
        bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        if (!widerows && (cfInfo.compactCqlTable || !cfInfo.cql3Table))
        {
            // defined validators/indexes
            for (ColumnDef cdef : cfDef.column_metadata)
            {
                // make a new tuple for each col/val pair
                ResourceSchema innerTupleSchema = new ResourceSchema();
                ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
                innerTupleField.setType(DataType.TUPLE);
                innerTupleField.setSchema(innerTupleSchema);
                innerTupleField.setName(new String(cdef.getName()));

                ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
                idxColSchema.setName("name");
                idxColSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));

                ResourceFieldSchema valSchema = new ResourceFieldSchema();
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                valSchema.setName("value");
                valSchema.setType(getPigType(validator));

                innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
                allSchemaFields.add(innerTupleField);
            }  
        }

        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

    public ResourceFieldSchema getSchema() {
        return schemaField;
    }

    public void setSchema(ResourceSchema schema) {
        schemaField = new ResourceFieldSchema();
        schemaField.setType(DataType.TUPLE);
        try {
            schemaField.setSchema(schema);
        } catch (IOException ex) {
            throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex);
View Full Code Here

        // expect PigTuple holding a Tuple with only one field - chararray or bytearray
        Assert.isTrue(from instanceof PigTuple,
                String.format("Unexpected object type, expecting [%s], given [%s]", PigTuple.class, from.getClass()));

        PigTuple pt = (PigTuple) from;
        ResourceFieldSchema schema = pt.getSchema();

        // unwrap the tuple
        ResourceSchema tupleSchema = schema.getSchema();

        // empty tuple shortcut
        if (tupleSchema == null) {
            // write empty doc
            to.bytes("{}");
View Full Code Here

        // get default marshallers and validators
        Map<MarshallerType, AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(MarshallerType.KEY_VALIDATOR)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));
        bagvalSchema.setType(getPigType(marshallers.get(MarshallerType.DEFAULT_VALIDATOR)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        // defined validators/indexes
        for (ColumnDef cdef : cfDef.column_metadata)
        {
            // make a new tuple for each col/val pair
            ResourceSchema innerTupleSchema = new ResourceSchema();
            ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
            innerTupleField.setType(DataType.TUPLE);
            innerTupleField.setSchema(innerTupleSchema);
            innerTupleField.setName(new String(cdef.getName()));

            ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
            idxColSchema.setName("name");
            idxColSchema.setType(getPigType(marshallers.get(MarshallerType.COMPARATOR)));

            ResourceFieldSchema valSchema = new ResourceFieldSchema();
            AbstractType validator = validators.get(cdef.name);
            if (validator == null)
                validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
            valSchema.setName("value");
            valSchema.setType(getPigType(validator));

            innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
            allSchemaFields.add(innerTupleField);
        }
        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(MarshallerType.DEFAULT_VALIDATOR);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

  public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {

    List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
    for (HCatFieldSchema hfs : hcatSchema.getFields()){
      ResourceFieldSchema rfSchema;
      rfSchema = getResourceSchemaFromFieldSchema(hfs);
      rfSchemaList.add(rfSchema);
    }
    ResourceSchema rSchema = new ResourceSchema();
    rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0]));
View Full Code Here

  }

  private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs)
      throws IOException {
    ResourceFieldSchema rfSchema;
    // if we are dealing with a bag or tuple column - need to worry about subschema
    if(hfs.getType() == Type.STRUCT) {
        rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(getTupleSubSchema(hfs));
    } else if(hfs.getType() == Type.ARRAY) {
        rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(getBagSubSchema(hfs));
    } else {
      rfSchema = new ResourceFieldSchema()
          .setName(hfs.getName())
          .setDescription(hfs.getComment())
          .setType(getPigType( hfs))
          .setSchema(null); // no munging inner-schemas
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema.ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.