Package org.apache.pig.ResourceSchema

Examples of org.apache.pig.ResourceSchema.ResourceFieldSchema


     * @throws IOException
     */
    @Test(expected=FrontendException.class)
    public void testToPigSchemaWithInvalidSchema2() throws IOException {
        ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("fld0").setType(DataType.CHARARRAY)
        };
       
        ResourceSchema rSchema0 = new ResourceSchema()
            .setFields(level0);
       
        ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
        };
        
    }
View Full Code Here


        BufferedReader br = new BufferedReader(new FileReader(outputFileName));
        for(String line=br.readLine();line!=null;line=br.readLine()){
            String[] flds = line.split(":",-1);
            Tuple t = new DefaultTuple();
           
            ResourceFieldSchema bagfs = GenRandomData.getSmallTupDataBagFieldSchema();
            ResourceFieldSchema tuplefs = GenRandomData.getSmallTupleFieldSchema();
           
            t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
View Full Code Here

            System.err.println("Complex data: ");
            System.err.println(line);
            String[] flds = line.split(":",-1);
            Tuple t = new DefaultTuple();
           
            ResourceFieldSchema stringfs = new ResourceFieldSchema();
            stringfs.setType(DataType.CHARARRAY);
            ResourceFieldSchema intfs = new ResourceFieldSchema();
            intfs.setType(DataType.INTEGER);
           
            ResourceSchema tupleSchema = new ResourceSchema();
            tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs});
            ResourceFieldSchema tuplefs = new ResourceFieldSchema();
            tuplefs.setSchema(tupleSchema);
            tuplefs.setType(DataType.TUPLE);
           
            ResourceSchema bagSchema = new ResourceSchema();
            bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
            ResourceFieldSchema bagfs = new ResourceFieldSchema();
            bagfs.setSchema(bagSchema);
            bagfs.setType(DataType.BAG);
           
            t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
View Full Code Here

        for( int i = 0; i < fieldCount; i++ ) {
            org.apache.hadoop.zebra.schema.Schema.ColumnSchema cSchema = tSchema.getColumn( i );
            if( cSchema != null )
                rFields[i] = convertToResourceFieldSchema( cSchema );
            else
                rFields[i] = new ResourceFieldSchema();
        }
        rSchema.setFields( rFields );
        return rSchema;
    }
View Full Code Here

        return rSchema;
    }

    private static ResourceFieldSchema convertToResourceFieldSchema(
            ColumnSchema cSchema) throws IOException {
        ResourceFieldSchema field = new ResourceFieldSchema();

        if( cSchema.getType() ==ColumnType.ANY && cSchema.getName().isEmpty() ) { // For anonymous column
            field.setName( null );
            field.setTypeDataType.UNKNOWN );
            field.setSchema( null );
        } else {
            field.setName( cSchema.getName() );
            field.setType( cSchema.getType().pigDataType() );
            if( cSchema.getType() == ColumnType.MAP ) {
              // Pig doesn't want any schema for a map field.
                field.setSchema( null );
            } else {
              org.apache.hadoop.zebra.schema.Schema fs = cSchema.getSchema();
              ResourceSchema rs = convertToResourceSchema( fs  );
              if( cSchema.getType() == ColumnType.COLLECTION ) {
                int count = fs.getNumColumns();
                if( count > 1 || ( count == 1 && fs.getColumn( 0 ).getType() != ColumnType.RECORD ) ) {
                  // Pig requires a record (tuple) as the schema for a BAG field.
                  ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
                  fieldSchema.setSchema( rs );
                  fieldSchema.setType( ColumnType.RECORD.pigDataType() );
                  rs = new ResourceSchema();
                  rs.setFields( new ResourceFieldSchema[] { fieldSchema } );
                }
              }
                field.setSchema( rs );
View Full Code Here

      if (!descending) {
        ResourceSchema.ResourceFieldSchema[] fields = schema.getFields();
        int[] index = schema.getSortKeys();
     
        for( int i = 0; i< index.length; i++ ) {
          ResourceFieldSchema field = fields[index[i]];
          String name = field.getName();
          if( name == null )
              throw new IOException("Zebra does not support column positional reference yet");
          if( !org.apache.pig.data.DataType.isAtomic( field.getType() ) )
              throw new IOException( "Field [" + name + "] is not of simple type as required for a sort column now." );
          if( i > 0 )
              sortColumnNames.append( "," );
          sortColumnNames.append( name );
        }
View Full Code Here

     */
    @Test
    public void testToPigSchemaWithTwoLevelAccess() throws IOException {
        ResourceFieldSchema[] level0 =
            new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("fld0").setType(DataType.CHARARRAY),
                new ResourceFieldSchema()
                    .setName("fld1").setType(DataType.DOUBLE),
                new ResourceFieldSchema()
                    .setName("fld2").setType(DataType.INTEGER)
        };
              
        ResourceSchema rSchema0 = new ResourceSchema()
            .setFields(level0);
       
        ResourceFieldSchema[] level1 =
            new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("t1").setType(DataType.TUPLE)
                    .setSchema(rSchema0)
        };
       
        ResourceSchema rSchema1 = new ResourceSchema()
            .setFields(level1);
       
        ResourceFieldSchema[] level2 =
            new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("t2").setType(DataType.BAG)
                    .setSchema(rSchema1)
        };
       
        ResourceSchema origSchema = new ResourceSchema()
View Full Code Here

     * @throws IOException
     */
    @Test(expected=FrontendException.class)
    public void testToPigSchemaWithInvalidSchema() throws IOException {
        ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("fld0").setType(DataType.CHARARRAY),
                new ResourceFieldSchema()
                    .setName("fld1").setType(DataType.DOUBLE),       
                new ResourceFieldSchema()
                    .setName("fld2").setType(DataType.INTEGER)
        };
       
        ResourceSchema rSchema0 = new ResourceSchema()
            .setFields(level0);
       
        ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
        };
    }
View Full Code Here

     * @throws IOException
     */
    @Test(expected=FrontendException.class)
    public void testToPigSchemaWithInvalidSchema2() throws IOException {
        ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("fld0").setType(DataType.CHARARRAY)
        };
       
        ResourceSchema rSchema0 = new ResourceSchema()
            .setFields(level0);
       
        ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
                new ResourceFieldSchema()
                    .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
        };
        
    }
View Full Code Here

        // get default marshallers and validators
        List<AbstractType> marshallers = getDefaultMarshallers(cfDef);
        Map<ByteBuffer,AbstractType> validators = getValidatorMap(cfDef);

        // add key
        ResourceFieldSchema keyFieldSchema = new ResourceFieldSchema();
        keyFieldSchema.setName("key");
        keyFieldSchema.setType(getPigType(marshallers.get(2)));

        ResourceSchema bagSchema = new ResourceSchema();
        ResourceFieldSchema bagField = new ResourceFieldSchema();
        bagField.setType(DataType.BAG);
        bagField.setName("columns");
        // inside the bag, place one tuple with the default comparator/validator schema
        ResourceSchema bagTupleSchema = new ResourceSchema();
        ResourceFieldSchema bagTupleField = new ResourceFieldSchema();
        bagTupleField.setType(DataType.TUPLE);
        ResourceFieldSchema bagcolSchema = new ResourceFieldSchema();
        ResourceFieldSchema bagvalSchema = new ResourceFieldSchema();
        bagcolSchema.setName("name");
        bagvalSchema.setName("value");
        bagcolSchema.setType(getPigType(marshallers.get(0)));
        bagvalSchema.setType(getPigType(marshallers.get(1)));
        bagTupleSchema.setFields(new ResourceFieldSchema[] { bagcolSchema, bagvalSchema });
        bagTupleField.setSchema(bagTupleSchema);
        bagSchema.setFields(new ResourceFieldSchema[] { bagTupleField });
        bagField.setSchema(bagSchema);

        // will contain all fields for this schema
        List<ResourceFieldSchema> allSchemaFields = new ArrayList<ResourceFieldSchema>();
        // add the key first, then the indexed columns, and finally the bag
        allSchemaFields.add(keyFieldSchema);

        // defined validators/indexes
        for (ColumnDef cdef : cfDef.column_metadata)
        {
            // make a new tuple for each col/val pair
            ResourceSchema innerTupleSchema = new ResourceSchema();
            ResourceFieldSchema innerTupleField = new ResourceFieldSchema();
            innerTupleField.setType(DataType.TUPLE);
            innerTupleField.setSchema(innerTupleSchema);
            innerTupleField.setName(new String(cdef.getName()));

            ResourceFieldSchema idxColSchema = new ResourceFieldSchema();
            idxColSchema.setName("name");
            idxColSchema.setType(getPigType(marshallers.get(0)));

            ResourceFieldSchema valSchema = new ResourceFieldSchema();
            AbstractType validator = validators.get(cdef.name);
            if (validator == null)
                validator = marshallers.get(1);
            valSchema.setName("value");
            valSchema.setType(getPigType(validator));

            innerTupleSchema.setFields(new ResourceFieldSchema[] { idxColSchema, valSchema });
            allSchemaFields.add(innerTupleField);
        }
        // bag at the end for unknown columns
        allSchemaFields.add(bagField);

        // add top-level index elements if needed
        if (usePartitionFilter)
        {
            for (ColumnDef cdef : getIndexes())
            {
                ResourceFieldSchema idxSchema = new ResourceFieldSchema();
                idxSchema.setName("index_" + new String(cdef.getName()));
                AbstractType validator = validators.get(cdef.name);
                if (validator == null)
                    validator = marshallers.get(1);
                idxSchema.setType(getPigType(validator));
                allSchemaFields.add(idxSchema);
            }
        }
        // top level schema contains everything
        schema.setFields(allSchemaFields.toArray(new ResourceFieldSchema[allSchemaFields.size()]));
View Full Code Here

TOP

Related Classes of org.apache.pig.ResourceSchema.ResourceFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.