Package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer

Source Code of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.Launcher

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TaskReport;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapred.jobcontrol.JobControl;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecutionEngine;
import org.apache.pig.backend.hadoop.datastorage.HConfiguration;
import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine;
import org.apache.pig.impl.PigContext;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.LogUtils;

public abstract class Launcher {
    private static final Log log = LogFactory.getLog(Launcher.class);
   
    long totalHadoopTimeSpent;   
    String newLine = "\n";
    boolean pigException = false;
    boolean outOfMemory = false;
    final String OOM_ERR = "OutOfMemoryError";
   
    protected Launcher(){
        totalHadoopTimeSpent = 0;
        //handle the windows portion of \r
        if (System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")) {
            newLine = "\r\n";
        }
    }
    /**
     * Method to launch pig for hadoop either for a cluster's
     * job tracker or for a local job runner. THe only difference
     * between the two is the job client. Depending on the pig context
     * the job client will be initialize to one of the two.
     * Launchers for other frameworks can overide these methods.
     * Given an input PhysicalPlan, it compiles it
     * to get a MapReduce Plan. The MapReduce plan which
     * has multiple MapReduce operators each one of which
     * has to be run as a map reduce job with dependency
     * information stored in the plan. It compiles the
     * MROperPlan into a JobControl object. Each Map Reduce
     * operator is converted into a Job and added to the JobControl
     * object. Each Job also has a set of dependent Jobs that
     * are created using the MROperPlan.
     * The JobControl object is obtained from the JobControlCompiler
     * Then a new thread is spawned that submits these jobs
     * while respecting the dependency information.
     * The parent thread monitors the submitted jobs' progress and
     * after it is complete, stops the JobControl thread.
     * @param php
     * @param grpName
     * @param pc
     * @throws PlanException
     * @throws VisitorException
     * @throws IOException
     * @throws ExecException
     * @throws JobCreationException
     */
    public abstract boolean launchPig(PhysicalPlan php, String grpName, PigContext pc)
            throws PlanException, VisitorException, IOException, ExecException,
            JobCreationException, Exception;

    /**
     * Explain how a pig job will be executed on the underlying
     * infrastructure.
     * @param pp PhysicalPlan to explain
     * @param pc PigContext to use for configuration
     * @param ps PrintStream to write output on.
     * @throws VisitorException
     * @throws IOException
     */
    public abstract void explain(
            PhysicalPlan pp,
            PigContext pc,
            PrintStream ps) throws PlanException,
                                   VisitorException,
                                   IOException;
   
    protected boolean isComplete(double prog){
        return (int)(Math.ceil(prog)) == (int)1;
    }
   
    protected void getStats(Job job, JobClient jobClient, boolean errNotDbg, PigContext pigContext) throws Exception {
        JobID MRJobID = job.getAssignedJobID();
        String jobMessage = job.getMessage();
        if(MRJobID == null) {
          try {
                throw getExceptionFromString(jobMessage);
            } catch (Exception e) {
                //just get the first line in the message and log the rest
                String firstLine = getFirstLineFromMessage(jobMessage);

                LogUtils.writeLog(new Exception(jobMessage), pigContext.getProperties().getProperty("pig.logfile"),
                        log, false, null, false, false);
                int errCode = 2997;
                String msg = "Unable to recreate exception from backend error: " + firstLine;
                throw new ExecException(msg, errCode, PigException.BUG, e);
            }
        }
        try {
            TaskReport[] mapRep = jobClient.getMapTaskReports(MRJobID);
            getErrorMessages(mapRep, "map", errNotDbg, pigContext);
            totalHadoopTimeSpent += computeTimeSpent(mapRep);
            TaskReport[] redRep = jobClient.getReduceTaskReports(MRJobID);
            getErrorMessages(redRep, "reduce", errNotDbg, pigContext);
            totalHadoopTimeSpent += computeTimeSpent(mapRep);
        } catch (IOException e) {
            if(job.getState() == Job.SUCCESS) {
                // if the job succeeded, let the user know that
                // we were unable to get statistics
                log.warn("Unable to get job related diagnostics");
            } else {
                throw e;
            }
        }
    }
   
    protected long computeTimeSpent(TaskReport[] mapReports) {
        long timeSpent = 0;
        for (TaskReport r : mapReports) {
            timeSpent += (r.getFinishTime() - r.getStartTime());
        }
        return timeSpent;
    }
   
   
    protected void getErrorMessages(TaskReport reports[], String type, boolean errNotDbg, PigContext pigContext) throws Exception
    {       
      for (int i = 0; i < reports.length; i++) {
            String msgs[] = reports[i].getDiagnostics();
            ArrayList<Exception> exceptions = new ArrayList<Exception>();
            boolean jobFailed = false;
            float successfulProgress = 1.0f;
            if (msgs.length > 0) {
              //if the progress reported is not 1.0f then the map or reduce job failed
              //this comparison is in place till Hadoop 0.20 provides methods to query
              //job status             
              if(reports[i].getProgress() != successfulProgress) {
                jobFailed = true;
              }
                Set<String> errorMessageSet = new HashSet<String>();
                for (int j = 0; j < msgs.length; j++) {                 
                if(!errorMessageSet.contains(msgs[j])) {
                    errorMessageSet.add(msgs[j]);
                  if (errNotDbg) {
                    //errNotDbg is used only for failed jobs
                      //keep track of all the unique exceptions
                            try {
                                Exception e = getExceptionFromString(msgs[j]);
                                exceptions.add(e);
                            } catch (Exception e1) {
                                String firstLine = getFirstLineFromMessage(msgs[j]);                               
                                LogUtils.writeLog(new Exception(msgs[j]), pigContext.getProperties().getProperty("pig.logfile"),
                                        log, false, null, false, false);
                                int errCode = 2997;
                                String msg = "Unable to recreate exception from backed error: " + firstLine;
                                throw new ExecException(msg, errCode, PigException.BUG, e1);
                            }
                    } else {
                        log.debug("Error message from task (" + type + ") " +
                            reports[i].getTaskID() + msgs[j]);
                    }
                }
              }
            }
           
            //if its a failed job then check if there is more than one exception
            //more than one exception implies possibly different kinds of failures
            //log all the different failures and throw the exception corresponding
            //to the first failure
            if(jobFailed) {
                if(exceptions.size() > 1) {
                    for(int j = 0; j < exceptions.size(); ++j) {
                        String headerMessage = "Error message from task (" + type + ") " + reports[i].getTaskID();
                        LogUtils.writeLog(exceptions.get(j), pigContext.getProperties().getProperty("pig.logfile"), log, false, headerMessage, false, false);
                    }
                    throw exceptions.get(0);
                } else if(exceptions.size() == 1) {
                  throw exceptions.get(0);
                } else {
                  int errCode = 2115;
                  String msg = "Internal error. Expected to throw exception from the backend. Did not find any exception to throw.";
                  throw new ExecException(msg, errCode, PigException.BUG);
                }
            }
        }
    }
   
    /**
     * Compute the progress of the current job submitted
     * through the JobControl object jc to the JobClient jobClient
     * @param jc - The JobControl object that has been submitted
     * @param jobClient - The JobClient to which it has been submitted
     * @return The progress as a precentage in double format
     * @throws IOException
     */
    protected double calculateProgress(JobControl jc, JobClient jobClient) throws IOException{
        double prog = 0.0;
        prog += jc.getSuccessfulJobs().size();
       
        List runnJobs = jc.getRunningJobs();
        for (Object object : runnJobs) {
            Job j = (Job)object;
            prog += progressOfRunningJob(j, jobClient);
        }
        return prog;
    }
   
    /**
     * Returns the progress of a Job j which is part of a submitted
     * JobControl object. The progress is for this Job. So it has to
     * be scaled down by the num of jobs that are present in the
     * JobControl.
     * @param j - The Job for which progress is required
     * @param jobClient - the JobClient to which it has been submitted
     * @return Returns the percentage progress of this Job
     * @throws IOException
     */
    protected double progressOfRunningJob(Job j, JobClient jobClient) throws IOException{
        JobID mrJobID = j.getAssignedJobID();
        RunningJob rj = jobClient.getJob(mrJobID);
        if(rj==null && j.getState()==Job.SUCCESS)
            return 1;
        else if(rj==null)
            return 0;
        else{
            double mapProg = rj.mapProgress();
            double redProg = rj.reduceProgress();
            return (mapProg + redProg)/2;
        }
    }
    public long getTotalHadoopTimeSpent() {
        return totalHadoopTimeSpent;
    }


    /**
     *
     * @param stackTraceLine The string representation of {@link Throwable#printStackTrace() printStackTrace}
     * Handles internal PigException and its subclasses that override the {@link Throwable#toString() toString} method
     * @return An exception object whose string representation of printStackTrace is the input stackTrace
     * @throws Exception
     */
    Exception getExceptionFromString(String stackTrace) throws Exception{
        String[] lines = stackTrace.split(newLine);
        Throwable t = getExceptionFromStrings(lines, 0);
       
        if(!pigException) {
            int errCode = 6015;
            String msg = "During execution, encountered a Hadoop error.";
            ExecException ee = new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT, t);
            ee.setStackTrace(t.getStackTrace());
            return ee;
        } else {
            pigException = false;
            if(outOfMemory) {
                outOfMemory = false;
                int errCode = 6016;
                String msg = "Out of memory.";
                ExecException ee = new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT, t);
                ee.setStackTrace(t.getStackTrace());
                return ee;
            }
            return (Exception)t;
        }
    }

    /**
     *
     * @param stackTraceLine An array of strings that represent {@link Throwable#printStackTrace() printStackTrace}
     * output, split by newline
     * @return An exception object whose string representation of printStackTrace is the input stackTrace
     * @throws Exception
     */
    private Throwable getExceptionFromStrings(String[] stackTraceLines, int startingLineNum) throws Exception{
        /*
         * parse the array of string and throw the appropriate exception
         * first: from the line startingLineNum extract the exception name extract the message if any
         * fourth: create the appropriate exception and return it
         * An example of the stack trace:
    org.apache.pig.backend.executionengine.ExecException: ERROR 1075: Received a bytearray from the UDF. Cannot determine how to convert the bytearray to int.
        at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POCast.getNext(POCast.java:152)
        at org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.LessThanExpr.getNext(LessThanExpr.java:85)
        at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter.getNext(POFilter.java:148)
        at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.runPipeline(PigMapBase.java:184)
        at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapBase.map(PigMapBase.java:174)
        at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.map(PigMapOnly.java:65)
        at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:47)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:227)
        at org.apache.hadoop.mapred.TaskTracker$Child.main(TaskTracker.java:2207)
         */

        int prevStartingLineNum = startingLineNum;
       
        if(stackTraceLines.length > 0 && startingLineNum < (stackTraceLines.length - 1)) {
           
            //the regex for matching the exception class name; note the use of the $ for matching nested classes
            String exceptionNameDelimiter = "(\\w+(\\$\\w+)?\\.)+\\w+";
            Pattern exceptionNamePattern = Pattern.compile(exceptionNameDelimiter);
           
          //from the first line extract the exception name and the exception message
            Matcher exceptionNameMatcher = exceptionNamePattern.matcher(stackTraceLines[startingLineNum]);
            String exceptionName = null;
            String exceptionMessage = null;
            if (exceptionNameMatcher.find()) {
              exceptionName = exceptionNameMatcher.group();
              /*
               * note that the substring is from end + 2
               * the regex matcher ends at one position beyond the match
               * in this case it will end at colon (:)
               * the exception message will have a preceding space (after the colon (:))
               */
              if (exceptionName.contains(OOM_ERR)) {
                  outOfMemory = true;
              }
             
              if(stackTraceLines[startingLineNum].length() > exceptionNameMatcher.end()) {
                exceptionMessage = stackTraceLines[startingLineNum].substring(exceptionNameMatcher.end() + 2);
              }
             
              ++startingLineNum;
            }
         
            //the exceptionName should not be null
            if(exceptionName != null) {             

                ArrayList<StackTraceElement> stackTraceElements = new ArrayList<StackTraceElement>();
               
                //Create stack trace elements for the remaining lines
                String stackElementRegex = "\\s+at\\s+(\\w+(\\$\\w+)?\\.)+(\\<)?\\w+(\\>)?";
                Pattern stackElementPattern = Pattern.compile(stackElementRegex);
                String pigExceptionRegex = "org\\.apache\\.pig\\.";
                Pattern pigExceptionPattern = Pattern.compile(pigExceptionRegex);             
                String moreElementRegex = "\\s+\\.\\.\\.\\s+\\d+\\s+more";
                Pattern moreElementPattern = Pattern.compile(moreElementRegex);
               
               
                String pigPackageRegex = "org.apache.pig";
               
                int lineNum = startingLineNum;
                for(; lineNum < (stackTraceLines.length - 1); ++lineNum) {
                    Matcher stackElementMatcher = stackElementPattern.matcher(stackTraceLines[lineNum]);

                    if(stackElementMatcher.find()) {
                        StackTraceElement ste = getStackTraceElement(stackTraceLines[lineNum]);
                        stackTraceElements.add(ste);
                        String className = ste.getClassName();
                        Matcher pigExceptionMatcher = pigExceptionPattern.matcher(className);
                        if(pigExceptionMatcher.find()) {
                            pigException = true;
                        }                      
                    } else {
                        Matcher moreElementMatcher = moreElementPattern.matcher(stackTraceLines[lineNum]);
                        if(moreElementMatcher.find()) {
                            ++lineNum;
                        }
                        break;
                    }
                }
               
                startingLineNum = lineNum;              

              //create the appropriate exception; setup the stack trace and message
              Object object = PigContext.instantiateFuncFromSpec(exceptionName);
             
              if(object instanceof PigException) {
                //extract the error code and message the regex for matching the custom format of ERROR <ERROR CODE>:
                String errMessageRegex = "ERROR\\s+\\d+:";
                Pattern errMessagePattern = Pattern.compile(errMessageRegex);
                Matcher errMessageMatcher = errMessagePattern.matcher(exceptionMessage);
               
                if(errMessageMatcher.find()) {
                  String errMessageStub = errMessageMatcher.group();
                  /*
                   * extract the actual exception message sans the ERROR <ERROR CODE>:
                   * again note that the matcher ends at the space following the colon (:)
                   * the exception message appears after the space and hence the end + 1
                   */                 
                  exceptionMessage = exceptionMessage.substring(errMessageMatcher.end() + 1);
                   
                  //the regex to match the error code wich is a string of numerals
                  String errCodeRegex = "\\d+";
                    Pattern errCodePattern = Pattern.compile(errCodeRegex);
                    Matcher errCodeMatcher = errCodePattern.matcher(errMessageStub);
                   
                    String code = null;
                    if(errCodeMatcher.find()) {
                      code = errCodeMatcher.group()
                    }
                 
                    //could receive a number format exception here but it will be propagated up the stack                   
                    int errCode = Integer.parseInt(code);
                   
                    //create the exception with the message and then set the error code and error source
                    FuncSpec funcSpec = new FuncSpec(exceptionName, exceptionMessage);                       
                    object = PigContext.instantiateFuncFromSpec(funcSpec);
                    ((PigException)object).setErrorCode(errCode);
                    ((PigException)object).setErrorSource(PigException.determineErrorSource(errCode));
                } else { //else for if(errMessageMatcher.find())
                  /*
                   * did not find the error code which means that the PigException or its
                   * subclass is not returning the error code
                   * highly unlikely: should never be here
                   */
                  FuncSpec funcSpec = new FuncSpec(exceptionName, exceptionMessage);                       
                    object = PigContext.instantiateFuncFromSpec(funcSpec);
                    ((PigException)object).setErrorCode(2997);//generic error code
                    ((PigException)object).setErrorSource(PigException.BUG);                         
                }                       
              } else { //else for if(object instanceof PigException)
                //its not PigException; create the exception with the message
                object = PigContext.instantiateFuncFromSpec(exceptionName + "(" + exceptionMessage + ")");
              }
             
              StackTraceElement[] steArr = new StackTraceElement[stackTraceElements.size()];
              ((Throwable)object).setStackTrace((StackTraceElement[])(stackTraceElements.toArray(steArr)));
             
              if(startingLineNum < (stackTraceLines.length - 1)) {
                  Throwable e = getExceptionFromStrings(stackTraceLines, startingLineNum);
                  ((Throwable)object).initCause(e);
              }
             
              return (Throwable)object;
            } else { //else for if(exceptionName != null)
            int errCode = 2055;
            String msg = "Did not find exception name to create exception from string: " + stackTraceLines.toString();
            throw new ExecException(msg, errCode, PigException.BUG);
            }
        } else { //else for if(lines.length > 0)
        int errCode = 2056;
        String msg = "Cannot create exception from empty string.";
        throw new ExecException(msg, errCode, PigException.BUG);
        }
    }
   
    /**
     *
     * @param line the string representation of a stack trace returned by {@link Throwable#printStackTrace() printStackTrace}
     * @return the StackTraceElement object representing the stack trace
     * @throws Exception
     */
    public StackTraceElement getStackTraceElement(String line) throws Exception{
      /*     
       * the format of the line is something like:
       *               at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.map(PigMapOnly.java:65)
       * note the white space before the 'at'. Its not of much importance but noted for posterity.   
       */
      String[] items;
     
      /*
       * regex for matching the fully qualified method Name
       * note the use of the $ for matching nested classes
       * and the use of < and > for constructors
       */
      String qualifiedMethodNameRegex = "(\\w+(\\$\\w+)?\\.)+(<)?\\w+(>)?";
        Pattern qualifiedMethodNamePattern = Pattern.compile(qualifiedMethodNameRegex);
        Matcher contentMatcher = qualifiedMethodNamePattern.matcher(line);
       
        //org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.map(PigMapOnly.java:65)
      String content = null;
        if(contentMatcher.find()) {
          content = line.substring(contentMatcher.start());
        } else {
        int errCode = 2057;
        String msg = "Did not find fully qualified method name to reconstruct stack trace: " + line;
        throw new ExecException(msg, errCode, PigException.BUG);         
        }
       
        Matcher qualifiedMethodNameMatcher = qualifiedMethodNamePattern.matcher(content);
       
        //org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.map
        String qualifiedMethodName = null;
        //(PigMapOnly.java:65)
        String fileDetails = null;
       
        if(qualifiedMethodNameMatcher.find()) {
          qualifiedMethodName = qualifiedMethodNameMatcher.group();
          fileDetails = content.substring(qualifiedMethodNameMatcher.end() + 1);
        } else {
        int errCode = 2057;
        String msg = "Did not find fully qualified method name to reconstruct stack trace: " + line;
        throw new ExecException(msg, errCode, PigException.BUG);         
        }
     
        //From the fully qualified method name, extract the declaring class and method name
        items = qualifiedMethodName.split("\\.");
       
        //initialize the declaringClass (to org in most cases)
        String declaringClass = items[0];
        //the last member is always the method name
        String methodName = items[items.length - 1];
       
        //concatenate the names by adding the dot (.) between the members till the penultimate member
        for(int i = 1; i < items.length - 1; ++i) {
          declaringClass += ".";
          declaringClass += items[i];
        }
       
        //from the file details extract the file name and the line number
        //PigMapOnly.java:65
        fileDetails = fileDetails.substring(0, fileDetails.length() - 1);
        items = fileDetails.split(":");
        //PigMapOnly.java
        String fileName = null;
        int lineNumber = 0;
        if(items.length > 0) {
            fileName = items[0];
            lineNumber = Integer.parseInt(items[1]);
        }
        return new StackTraceElement(declaringClass, methodName, fileName, lineNumber);
    }
   
    private String getFirstLineFromMessage(String message) {
        String[] messages = message.split(newLine);
        if(messages.length > 0) {
            return messages[0];
        } else {
            return new String(message);
        }       
    }

}
TOP

Related Classes of org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.Launcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.