Package org.apache.hadoop.zebra.pig

Source Code of org.apache.hadoop.zebra.pig.SchemaConverter

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.zebra.pig;

import java.io.IOException;

import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.schema.ColumnType;
import org.apache.hadoop.zebra.schema.Schema.ColumnSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataType;
import org.apache.pig.ResourceSchema;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;

class SchemaConverter {
    public static ColumnType toTableType(byte ptype) {
        ColumnType ret;
        switch (ptype) {
        case DataType.INTEGER:
            ret = ColumnType.INT;
            break;
        case DataType.LONG:
            ret = ColumnType.LONG;
            break;
        case DataType.FLOAT:
            ret = ColumnType.FLOAT;
            break;
        case DataType.DOUBLE:
            ret = ColumnType.DOUBLE;
            break;
        case DataType.BOOLEAN:
            ret = ColumnType.BOOL;
            break;
        case DataType.DATETIME:
            ret = ColumnType.DATETIME;
            break;
        case DataType.BAG:
            ret = ColumnType.COLLECTION;
            break;
        case DataType.MAP:
            ret = ColumnType.MAP;
            break;
        case DataType.TUPLE:
            ret = ColumnType.RECORD;
            break;
        case DataType.CHARARRAY:
            ret = ColumnType.STRING;
            break;
        case DataType.BYTEARRAY:
            ret = ColumnType.BYTES;
            break;
        default:
            ret = null;
        break;
        }
        return ret;
    }

    public static Schema toPigSchema(
            org.apache.hadoop.zebra.schema.Schema tschema)
    throws FrontendException {
        Schema ret = new Schema();
        for (String col : tschema.getColumns()) {
            org.apache.hadoop.zebra.schema.Schema.ColumnSchema columnSchema =
                tschema.getColumn(col);
            if (columnSchema != null) {
                ColumnType ct = columnSchema.getType();
                if (ct == org.apache.hadoop.zebra.schema.ColumnType.RECORD ||
                        ct == org.apache.hadoop.zebra.schema.ColumnType.COLLECTION)
                    ret.add(new FieldSchema(col, toPigSchema(columnSchema.getSchema()), ct.pigDataType()));
                else
                    ret.add(new FieldSchema(col, ct.pigDataType()));
            } else {
                ret.add(new FieldSchema(null, null));
            }
        }
        return ret;
    }

    public static org.apache.hadoop.zebra.schema.Schema fromPigSchema(
            Schema pschema) throws FrontendException, ParseException {
        org.apache.hadoop.zebra.schema.Schema tschema = new org.apache.hadoop.zebra.schema.Schema();
        Schema.FieldSchema columnSchema;
        for (int i = 0; i < pschema.size(); i++) {
            columnSchema = pschema.getField(i);
            if (columnSchema != null) {
                if (DataType.isSchemaType(columnSchema.type))
                    tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias,
                            fromPigSchema(columnSchema.schema), toTableType(columnSchema.type)));
                else if (columnSchema.type == DataType.MAP)
                    tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias,
                            new org.apache.hadoop.zebra.schema.Schema(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(null,
                                    org.apache.hadoop.zebra.schema.ColumnType.BYTES)), toTableType(columnSchema.type)));
                else
                    tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(columnSchema.alias, toTableType(columnSchema.type)));
            } else {
                tschema.add(new org.apache.hadoop.zebra.schema.Schema.ColumnSchema(null, ColumnType.ANY));
            }
        }
        return tschema;
    }

    public static org.apache.hadoop.zebra.schema.Schema convertFromResourceSchema(ResourceSchema rSchema)
    throws ParseException {
        if( rSchema == null )
            return null;

        org.apache.hadoop.zebra.schema.Schema schema = new org.apache.hadoop.zebra.schema.Schema();
        ResourceSchema.ResourceFieldSchema[] fields = rSchema.getFields();
        for( ResourceSchema.ResourceFieldSchema field : fields ) {
            String name = field.getName();
            ColumnType type = toTableType( field.getType() );
            org.apache.hadoop.zebra.schema.Schema cSchema = convertFromResourceSchema( field.getSchema() );
            if( type == ColumnType.MAP && cSchema == null ) {
                cSchema = new org.apache.hadoop.zebra.schema.Schema();
                cSchema.add( new org.apache.hadoop.zebra.schema.Schema.ColumnSchema( "", ColumnType.BYTES ) );
            }
            org.apache.hadoop.zebra.schema.Schema.ColumnSchema columnSchema =
                new org.apache.hadoop.zebra.schema.Schema.ColumnSchema( name, cSchema, type );
            schema.add( columnSchema );
        }

        return schema;
    }

    public static ResourceSchema convertToResourceSchema(org.apache.hadoop.zebra.schema.Schema tSchema)
    throws IOException {
        if( tSchema == null )
            return null;

        ResourceSchema rSchema = new ResourceSchema();
        int fieldCount = tSchema.getNumColumns();
        ResourceFieldSchema[] rFields = new ResourceFieldSchema[fieldCount];
        for( int i = 0; i < fieldCount; i++ ) {
            org.apache.hadoop.zebra.schema.Schema.ColumnSchema cSchema = tSchema.getColumn( i );
            if( cSchema != null )
                rFields[i] = convertToResourceFieldSchema( cSchema );
            else
                rFields[i] = new ResourceFieldSchema();
        }
        rSchema.setFields( rFields );
        return rSchema;
    }

    private static ResourceFieldSchema convertToResourceFieldSchema(
            ColumnSchema cSchema) throws IOException {
        ResourceFieldSchema field = new ResourceFieldSchema();

        if( cSchema.getType() ==ColumnType.ANY && cSchema.getName().isEmpty() ) { // For anonymous column
            field.setName( null );
            field.setTypeDataType.BYTEARRAY );
            field.setSchema( null );
        } else {
            field.setName( cSchema.getName() );
            field.setType( cSchema.getType().pigDataType() );
            if( cSchema.getType() == ColumnType.MAP ) {
              // Pig doesn't want any schema for a map field.
                field.setSchema( null );
            } else {
              org.apache.hadoop.zebra.schema.Schema fs = cSchema.getSchema();
              ResourceSchema rs = convertToResourceSchema( fs  );
              if( cSchema.getType() == ColumnType.COLLECTION ) {
                int count = fs.getNumColumns();
                if( count > 1 || ( count == 1 && fs.getColumn( 0 ).getType() != ColumnType.RECORD ) ) {
                  // Pig requires a record (tuple) as the schema for a BAG field.
                  ResourceFieldSchema fieldSchema = new ResourceFieldSchema();
                  fieldSchema.setSchema( rs );
                  fieldSchema.setType( ColumnType.RECORD.pigDataType() );
                  rs = new ResourceSchema();
                  rs.setFields( new ResourceFieldSchema[] { fieldSchema } );
                }
              }
                field.setSchema( rs );
            }
        }

        return field;
    }
 
}
TOP

Related Classes of org.apache.hadoop.zebra.pig.SchemaConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.