Package org.apache.pig.piggybank.storage

Source Code of org.apache.pig.piggybank.storage.PigStorageSchema

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.piggybank.storage;

import java.io.IOException;
import java.util.Properties;

import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.Expression;
import org.apache.pig.LoadCaster;
import org.apache.pig.LoadMetadata;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreMetadata;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.util.CastUtils;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;

/**
*  This Load/Store Func reads/writes metafiles that allow the schema and
*  aliases to be determined at load time, saving one from having to manually
*  enter schemas for pig-generated datasets. 
*  It also creates a ".pig_headers" file that simply lists the delimited aliases.
*  This is intended to make export to tools that can read files with header
*  lines easier (just cat the header to your data).
*  Due to StoreFunc limitations, you can only write the metafiles in MapReduce
*  mode. You can read them in Local or MapReduce mode.
*/
public class PigStorageSchema extends PigStorage implements LoadMetadata, StoreMetadata {

    private ResourceSchema schema;
    LoadCaster caster;

    public PigStorageSchema() {
        super();
    }
   
    public PigStorageSchema(String delim) {
        super(delim);
    }
    
    @Override
    public Tuple getNext() throws IOException {
        Tuple tup = super.getNext();
        if (tup == null) return null;

        if ( caster == null) {
            caster = getLoadCaster();
        }
        if (signature != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                    new String[] {signature});
            String serializedSchema = p.getProperty(signature+".schema");
            if (serializedSchema == null) return tup;
            try {
                schema = new ResourceSchema(Utils.getSchemaFromString(serializedSchema));
            } catch (ParseException e) {
                mLog.error("Unable to parse serialized schema " + serializedSchema, e);
            }
        }

        if (schema != null) {

            ResourceFieldSchema[] fieldSchemas = schema.getFields();
            int tupleIdx = 0;
            // If some fields have been projected out, the tuple
            // only contains required fields.
            // We walk the requiredColumns array to find required fields,
            // and cast those.
            for (int i = 0; i < fieldSchemas.length; i++) {
                if (mRequiredColumns == null || (mRequiredColumns.length>i && mRequiredColumns[i])) {
                    Object val = null;
                    if(tup.get(tupleIdx) != null){
                        byte[] bytes = ((DataByteArray) tup.get(tupleIdx)).get();
                        val = CastUtils.convertToType(caster, bytes,
                                fieldSchemas[i], fieldSchemas[i].getType());
                    }
                    tup.set(tupleIdx, val);
                    tupleIdx++;
                }
            }
        }
        return tup;
    }

    //------------------------------------------------------------------------
    // Implementation of LoadMetaData interface
   
    @Override
    public ResourceSchema getSchema(String location,
            Job job) throws IOException {
        schema = (new JsonMetadata()).getSchema(location, job);
        if (signature != null && schema != null) {
            Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(),
                    new String[] {signature});
            p.setProperty(signature + ".schema", schema.toString());
    }
        return schema;
    }

    @Override
    public ResourceStatistics getStatistics(String location,
            Job job) throws IOException {       
        return null;
    }

    @Override
    public void setPartitionFilter(Expression partitionFilter)
            throws IOException {
    }
   
    @Override
    public String[] getPartitionKeys(String location, Job job)
            throws IOException {
        return null;
    }

    //------------------------------------------------------------------------
    // Implementation of StoreMetadata

    @Override
    public void storeSchema(ResourceSchema schema, String location,
            Job job) throws IOException {
        JsonMetadata metadataWriter = new JsonMetadata();
        byte fieldDel = '\t';
        byte recordDel = '\n';
        metadataWriter.setFieldDel(fieldDel);
        metadataWriter.setRecordDel(recordDel);
        metadataWriter.storeSchema(schema, location, job);              
    }

    @Override
    public void storeStatistics(ResourceStatistics stats, String location,
            Job job) throws IOException {
       
    }
}
TOP

Related Classes of org.apache.pig.piggybank.storage.PigStorageSchema

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.