Package org.apache.pig.impl.logicalLayer.optimizer

Source Code of org.apache.pig.impl.logicalLayer.optimizer.TypeCastInserter

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.impl.logicalLayer.optimizer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.LOCast;
import org.apache.pig.impl.logicalLayer.LOForEach;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LOProject;
import org.apache.pig.impl.logicalLayer.LOStream;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.plan.DepthFirstWalker;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigException;
import org.apache.pig.impl.streaming.StreamingCommand;
import org.apache.pig.impl.streaming.StreamingCommand.HandleSpec;

/**
* A visitor to discover if any schema has been specified for a file being
* loaded.  If so, a projection will be injected into the plan to cast the
* data being loaded to the appropriate types.  The optimizer can then come
* along and move those casts as far down as possible, or in some cases remove
* them altogether.  This visitor does not handle finding the schemas for the
* file, that has already been done as part of parsing.
*
*/
public class TypeCastInserter extends LogicalTransformer {

    private String operatorClassName;

    public TypeCastInserter(LogicalPlan plan, String operatorClassName) {
        super(plan, new DepthFirstWalker<LogicalOperator, LogicalPlan>(plan));
        this.operatorClassName = operatorClassName;
    }

    @Override
    public boolean check(List<LogicalOperator> nodes) throws OptimizerException {      
        try {
            LogicalOperator op = getOperator(nodes);
            Schema s = op.getSchema();
            if (s == null) return false;
   
            boolean sawOne = false;
            List<Schema.FieldSchema> fss = s.getFields();
            List<Byte> types = new ArrayList<Byte>(s.size());
            Schema determinedSchema = null;
            if(LOLoad.class.getName().equals(operatorClassName)) {
                determinedSchema = ((LOLoad)op).getDeterminedSchema();
            }
            for (int i = 0; i < fss.size(); i++) {
                if (fss.get(i).type != DataType.BYTEARRAY) {
                    if(determinedSchema == null ||
                            (fss.get(i).type != determinedSchema.getField(i).type)) {
                            // Either no schema was determined by loader OR the type
                            // from the "determinedSchema" is different
                            // from the type specified - so we need to cast
                            sawOne = true;
                        }
                }
                types.add(fss.get(i).type);
            }

            // If all we've found are byte arrays, we don't need a projection.
            return sawOne;
        } catch(OptimizerException oe) {
            throw oe;
        } catch (Exception e) {
            int errCode = 2004;
            String msg = "Internal error while trying to check if type casts are needed";
            throw new OptimizerException(msg, errCode, PigException.BUG, e);
        }
    }
   
    private LogicalOperator getOperator(List<LogicalOperator> nodes) throws FrontendException {
        if((nodes == null) || (nodes.size() <= 0)) {
            int errCode = 2052;
            String msg = "Internal error. Cannot retrieve operator from null or empty list.";
            throw new OptimizerException(msg, errCode, PigException.BUG);
        }
       
        LogicalOperator lo = nodes.get(0);
        if(LOLoad.class.getName().equals(operatorClassName)) {
            if (lo == null || !(lo instanceof LOLoad)) {
                int errCode = 2005;
                String msg = "Expected " + LOLoad.class.getSimpleName() + ", got " + lo.getClass().getSimpleName();
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else if(LOStream.class.getName().equals(operatorClassName)){
            if (lo == null || !(lo instanceof LOStream)) {
                int errCode = 2005;
                String msg = "Expected " + LOStream.class.getSimpleName() + ", got " + lo.getClass().getSimpleName();
                throw new OptimizerException(msg, errCode, PigException.BUG);
            }
   
            return lo;
        } else {
            // we should never be called with any other operator class name
            int errCode = 1034;
            String msg = "TypeCastInserter invoked with an invalid operator class name:" + operatorClassName;
            throw new OptimizerException(msg, errCode, PigException.INPUT);
        }
  
    }

    @Override
    public void transform(List<LogicalOperator> nodes) throws OptimizerException {
        try {
            LogicalOperator lo = getOperator(nodes);
            Schema s = lo.getSchema();
            String scope = lo.getOperatorKey().scope;
            // For every field, build a logical plan.  If the field has a type
            // other than byte array, then the plan will be cast(project).  Else
            // it will just be project.
            ArrayList<LogicalPlan> genPlans = new ArrayList<LogicalPlan>(s.size());
            ArrayList<Boolean> flattens = new ArrayList<Boolean>(s.size());
            Map<String, Byte> typeChanges = new HashMap<String, Byte>();
            // if we are inserting casts in a load and if the loader
            // implements determineSchema(), insert casts only where necessary
            // Note that in this case, the data coming out of the loader is not
            // a BYTEARRAY but is whatever determineSchema() says it is.
            Schema determinedSchema = null;
            if(LOLoad.class.getName().equals(operatorClassName)) {
                determinedSchema = ((LOLoad)lo).getDeterminedSchema();
            }
            for (int i = 0; i < s.size(); i++) {
                LogicalPlan p = new LogicalPlan();
                genPlans.add(p);
                flattens.add(false);
                List<Integer> toProject = new ArrayList<Integer>(1);
                toProject.add(i);
                LOProject proj = new LOProject(p, OperatorKey.genOpKey(scope),
                    lo, toProject);
                p.add(proj);
                Schema.FieldSchema fs = s.getField(i);
                if (fs.type != DataType.BYTEARRAY) {
                    if(determinedSchema == null || (fs.type != determinedSchema.getField(i).type)) {
                            // Either no schema was determined by loader OR the type
                            // from the "determinedSchema" is different
                            // from the type specified - so we need to cast
                            LOCast cast = new LOCast(p,
                                        OperatorKey.genOpKey(scope), fs.type);
                            p.add(cast);
                            p.connect(proj, cast);
                           
                            cast.setFieldSchema(fs.clone());
                            FuncSpec loadFuncSpec = null;
                            if(lo instanceof LOLoad) {
                                loadFuncSpec = ((LOLoad)lo).getInputFile().getFuncSpec();
                            } else if (lo instanceof LOStream) {
                                StreamingCommand command = ((LOStream)lo).getStreamingCommand();
                                HandleSpec streamOutputSpec = command.getOutputSpec();
                                loadFuncSpec = new FuncSpec(streamOutputSpec.getSpec());
                            } else {
                                int errCode = 2006;
                                String msg = "TypeCastInserter invoked with an invalid operator class name: " + lo.getClass().getSimpleName();
                                throw new OptimizerException(msg, errCode, PigException.BUG);
                            }
                            cast.setLoadFuncSpec(loadFuncSpec);
                            typeChanges.put(fs.canonicalName, fs.type);
                            if(determinedSchema == null) {
                                // Reset the loads field schema to byte array so that it
                                // will reflect reality.
                                fs.type = DataType.BYTEARRAY;
                            } else {
                                // Reset the type to what determinedSchema says it is
                                fs.type = determinedSchema.getField(i).type;
                            }
                        }
                }
            }

            // Build a foreach to insert after the load, giving it a cast for each
            // position that has a type other than byte array.
            LOForEach foreach = new LOForEach(mPlan,
                OperatorKey.genOpKey(scope), genPlans, flattens);
            foreach.setAlias(lo.getAlias());
            // Insert the foreach into the plan and patch up the plan.
            insertAfter(lo, foreach, null);

            rebuildSchemas();

        } catch (OptimizerException oe) {
            throw oe;
        } catch (Exception e) {
            int errCode = 2007;
            String msg = "Unable to insert type casts into plan";
            throw new OptimizerException(msg, errCode, PigException.BUG, e);
        }
    }
}

 
TOP

Related Classes of org.apache.pig.impl.logicalLayer.optimizer.TypeCastInserter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.