Package org.apache.pig.impl.streaming

Source Code of org.apache.pig.impl.streaming.ExecutableManager$ProcessErrorThread

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.streaming;

import static org.apache.pig.PigConfiguration.PIG_STREAMING_ENVIRONMENT;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.streaming.InputHandler.InputType;
import org.apache.pig.impl.streaming.OutputHandler.OutputType;
import org.apache.pig.impl.util.UDFContext;

/**
* {@link ExecutableManager} manages an external executable which processes data
* in a Pig query.
*
* The <code>ExecutableManager</code> is responsible for startup/teardown of
* the external process and also for managing it. It feeds input records to the
* executable via it's <code>stdin</code>, collects the output records from
* the <code>stdout</code> and also diagnostic information from the
* <code>stdout</code>.
*/
public class ExecutableManager {
    private static final Log LOG = LogFactory.getLog(ExecutableManager.class);
    private static final int SUCCESS = 0;
    private static final String PATH = "PATH";
    private static final String BASH = "bash";
    private static final Result EOS_RESULT = new Result(POStatus.STATUS_EOS, null);

    protected StreamingCommand command; // Streaming command to be run
    String argvAsString; // Parsed commands

    Process process; // Handle to the process
    protected int exitCode = -127; // Exit code of the process

    protected DataOutputStream stdin; // stdin of the process
    ProcessInputThread stdinThread; // thread to send input to process

    ProcessOutputThread stdoutThread; // thread to get process stdout
    InputStream stdout; // stdout of the process

    ProcessErrorThread stderrThread; // thread to get process stderr
    InputStream stderr; // stderr of the process

    // Input/Output handlers
    InputHandler inputHandler;
    OutputHandler outputHandler;

    // Statistics
    protected long inputRecords = 0;
    protected long inputBytes = 0;
    protected long outputRecords = 0;
    protected long outputBytes = 0;

    protected volatile Throwable outerrThreadsError;
    private POStream poStream;
    private ProcessInputThread fileInputThread;

    /**
     * Create a new {@link ExecutableManager}.
     */
    public ExecutableManager() {
    }

    /**
     * Configure and initialize the {@link ExecutableManager}.
     *
     * @param stream POStream operator
     * @throws IOException
     * @throws ExecException
     */
    public void configure(POStream stream) throws IOException, ExecException {
        this.poStream = stream;
        this.command = stream.getCommand();
        String[] argv = this.command.getCommandArgs();
        argvAsString = "";
        for (String arg : argv) {
            argvAsString += arg;
            argvAsString += " ";
        }

        // Create the input/output handlers
        this.inputHandler = HandlerFactory.createInputHandler(command);
        this.outputHandler = HandlerFactory.createOutputHandler(command);
    }

    /**
     * Close and cleanup the {@link ExecutableManager}.
     * @throws IOException
     */
    public void close() throws IOException {
        // Close the InputHandler, which in some cases lets the process
        // terminate
        inputHandler.close(process);

        // Check if we need to start the process now ...
        if (inputHandler.getInputType() == InputType.ASYNCHRONOUS) {
            exec();
        }

        // Wait for the process to exit
        try {
            exitCode = process.waitFor();
        } catch (InterruptedException ie) {
            LOG.error("Unexpected exception while waiting for streaming binary to complete", ie);
            killProcess(process);
        }

        // Wait for stdout thread to complete
        try {
            if (stdoutThread != null) {
                stdoutThread.join(0);
            }
            stdoutThread = null;
        } catch (InterruptedException ie) {
            LOG.error("Unexpected exception while waiting for output thread for streaming binary to complete", ie);
            killProcess(process);
        }

        // Wait for stderr thread to complete
        try {
            if (stderrThread != null) {
                stderrThread.join(0);
            }
            stderrThread = null;
        } catch (InterruptedException ie) {
            LOG.error("Unexpected exception while waiting for input thread for streaming binary to complete", ie);
            killProcess(process);
        }

        LOG.debug("Process exited with: " + exitCode);
        if (exitCode != SUCCESS) {
            LOG.error(command + " failed with exit status: "
                    + exitCode);
        }

        if (outputHandler.getOutputType() == OutputType.ASYNCHRONOUS) {

            // Trigger the outputHandler
            outputHandler.bindTo("", null, 0, -1);

            // start thread to process output from executable's stdout
            stdoutThread = new ProcessOutputThread(outputHandler, poStream);
            stdoutThread.start();
        }

        // Check if there was a problem with the managed process
        if (outerrThreadsError != null) {
            LOG.error("Output/Error thread failed with: "
                    + outerrThreadsError);
        }

    }

    /**
     *  Helper function to close input and output streams
     *  to the process and kill it
     * @param process the process to be killed
     * @throws IOException
     */
    private void killProcess(Process process) throws IOException {
        if (process != null) {
            inputHandler.close(process);
            outputHandler.close();
            process.destroy();
        }
    }

    /**
     * Set up the run-time environment of the managed process.
     *
     * @param pb
     *            {@link ProcessBuilder} used to exec the process
     */
    protected void setupEnvironment(ProcessBuilder pb) {
        String separator = ":";
        Configuration conf = UDFContext.getUDFContext().getJobConf();
        Map<String, String> env = pb.environment();
        addJobConfToEnvironment(conf, env);

        // Add the current-working-directory to the $PATH
        File dir = pb.directory();
        String cwd = (dir != null) ? dir.getAbsolutePath() : System
                .getProperty("user.dir");

        String envPath = env.get(PATH);
        if (envPath == null) {
            envPath = cwd;
        } else {
            envPath = envPath + separator + cwd;
        }
        env.put(PATH, envPath);
    }

    void addJobConfToEnvironment(Configuration conf, Map<String, String> env) {
        String propsToSend = conf.get(PIG_STREAMING_ENVIRONMENT);
        LOG.debug("Properties to ship to streaming environment set in "+PIG_STREAMING_ENVIRONMENT+": " + propsToSend);
        if (propsToSend == null) {
            return;
        }

        for (String prop : propsToSend.split(",")) {
            String value = conf.get(prop);
            if (value == null) {
                LOG.warn("Property set in "+PIG_STREAMING_ENVIRONMENT+" not found in Configuration: " + prop);
                continue;
            }
            LOG.debug("Setting property in streaming environment: " + prop);
            envPut(env, prop, value);
        }
    }

    void envPut(Map<String, String> env, String name, String value) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Add  env entry:" + name + "=" + value);
        }
        env.put(name, value);
    }

    /**
     * Start execution of the external process.
     *
     * This takes care of setting up the environment of the process and also
     * starts ProcessErrorThread to process the <code>stderr</code> of
     * the managed process.
     *
     * @throws IOException
     */
    protected void exec() throws IOException {
        // Set the actual command to run with 'bash -c exec ...'
        List<String> cmdArgs = new ArrayList<String>();

        if (System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")) {
          cmdArgs.add("cmd");
          cmdArgs.add("/c");
          cmdArgs.add(argvAsString);
        } else {
          cmdArgs.add(BASH);
          cmdArgs.add("-c");
          StringBuffer sb = new StringBuffer();
          sb.append("exec ");
          sb.append(argvAsString);
          cmdArgs.add(sb.toString());
        }

        // Start the external process
        ProcessBuilder processBuilder = new ProcessBuilder(cmdArgs
                .toArray(new String[cmdArgs.size()]));
        setupEnvironment(processBuilder);
        process = processBuilder.start();
        LOG.debug("Started the process for command: " + command);

        // Pick up the process' stderr stream and start the thread to
        // process the stderr stream
        stderr = new DataInputStream(new BufferedInputStream(process
                .getErrorStream()));
        stderrThread = new ProcessErrorThread();
        stderrThread.start();

        // Check if we need to handle the process' stdout directly
        if (outputHandler.getOutputType() == OutputType.SYNCHRONOUS) {
            // Get hold of the stdout of the process
            stdout = new DataInputStream(new BufferedInputStream(process
                    .getInputStream()));

            // Bind the stdout to the OutputHandler
            outputHandler.bindTo("", new BufferedPositionedInputStream(stdout),
                    0, Long.MAX_VALUE);

            // start thread to process output from executable's stdout
            stdoutThread = new ProcessOutputThread(outputHandler, poStream);
            stdoutThread.start();
        }
    }

    /**
     * Start execution of the {@link ExecutableManager}.
     *
     * @throws IOException
     */
    public void run() throws IOException {
        // Check if we need to exec the process NOW ...
        if (inputHandler.getInputType() == InputType.ASYNCHRONOUS) {
            // start the thread to handle input. we pass the UDFContext to the
            // fileInputThread because when input type is asynchronous, the
            // exec() is called by fileInputThread, and it needs to access to
            // the UDFContext.
            fileInputThread = new ProcessInputThread(
                    inputHandler, poStream, UDFContext.getUDFContext());
            fileInputThread.start();

            // If Input type is ASYNCHRONOUS that means input to the
            // streaming binary is from a file - that means we cannot exec
            // the process till the input file is completely written. This
            // will be done in close() - so now we return
            return;
        }

        // Start the executable ...
        exec();
        // set up input to the executable
        stdin = new DataOutputStream(new BufferedOutputStream(process
                .getOutputStream()));
        inputHandler.bindTo(stdin);

        // Start the thread to send input to the executable's stdin
        stdinThread = new ProcessInputThread(inputHandler, poStream, null);
        stdinThread.start();
    }

    /**
     * The thread which consumes input from POStream's binaryInput queue
     * and feeds it to the the Process
     */
    class ProcessInputThread extends Thread {

        InputHandler inputHandler;
        private POStream poStream;
        private UDFContext udfContext;
        private BlockingQueue<Result> binaryInputQueue;

        ProcessInputThread(InputHandler inputHandler, POStream poStream, UDFContext udfContext) {
            setDaemon(true);
            this.inputHandler = inputHandler;
            this.poStream = poStream;
            // a copy of UDFContext passed from the ExecutableManager thread
            this.udfContext = udfContext;
            // the input queue from where this thread will read
            // input tuples
            this.binaryInputQueue = poStream.getBinaryInputQueue();
        }

        @Override
        public void run() {
            // If input type is asynchronous, set the udfContext of the current
            // thread to the copy of ExecutableManager thread's udfContext. This
            // is necessary because the exec() method is called by the current
            // thread (fileInputThread) instead of the ExecutableManager thread.
            if (inputHandler.getInputType() == InputType.ASYNCHRONOUS && udfContext != null) {
                UDFContext.setUdfContext(udfContext);
            }
            try {
                // Read tuples from the previous operator in the pipeline
                // and pass it to the executable
                while (true) {
                    Result inp = null;
                    inp = binaryInputQueue.take();
                    synchronized (poStream) {
                        // notify waiting producer
                        // the if check is to keep "findbugs"
                        // happy
                        if(inp != null)
                            poStream.notifyAll();
                    }
                    // We should receive an EOP only when *ALL* input
                    // for this process has already been sent and no
                    // more input is expected
                    if (inp != null && inp.returnStatus == POStatus.STATUS_EOP) {
                        // signal cleanup in ExecutableManager
                        close();
                        return;
                    }
                    if (inp != null && inp.returnStatus == POStatus.STATUS_OK) {
                        // Check if there was a problem with the managed process
                        if (outerrThreadsError != null) {
                            throw new IOException(
                                    "Output/Error thread failed with: "
                                            + outerrThreadsError);
                        }

                        // Pass the serialized tuple to the executable via the
                        // InputHandler
                        Tuple t = null;
                        try {
                            t = (Tuple) inp.result;
                            inputHandler.putNext(t);
                        } catch (IOException e) {
                            // if input type is synchronous then it could
                            // be related to the process terminating
                            if(inputHandler.getInputType() == InputType.SYNCHRONOUS) {
                                LOG.warn("Exception while trying to write to stream binary's input", e);
                                // could be because the process
                                // died OR closed the input stream
                                // we will only call close() here and not
                                // worry about deducing whether the process died
                                // normally or abnormally - if there was any real
                                // issue the ProcessOutputThread should see
                                // a non zero exit code from the process and send
                                // a POStatus.STATUS_ERR back - what if we got
                                // an IOException because there was only an issue with
                                // writing to input of the binary - hmm..hope that means
                                // the process died abnormally!!
                                close();
                                return;
                            } else {
                                // asynchronous case - then this is a real exception
                                LOG.error("Exception while trying to write to stream binary's input", e);
                                // send POStatus.STATUS_ERR to POStream to signal the error
                                // Generally the ProcessOutputThread would do this but now
                                // we should do it here since neither the process nor the
                                // ProcessOutputThread will ever be spawned
                                Result res = new Result(POStatus.STATUS_ERR,
                                        "Exception while trying to write to stream binary's input" + e.getMessage());
                                sendOutput(poStream.getBinaryOutputQueue(), res);
                                throw e;
                            }
                        }
                        inputBytes += t.getMemorySize();
                        inputRecords++;
                    }
                }
            } catch (Throwable t) {


                // Note that an error occurred
                outerrThreadsError = t;
                LOG.error(t);
                try {
                    killProcess(process);
                } catch (IOException ioe) {
                    LOG.warn(ioe);
                }
            }
        }
    }

    private void sendOutput(BlockingQueue<Result> binaryOutputQueue, Result res) {
        try {
            binaryOutputQueue.put(res);
        } catch (InterruptedException e) {
            LOG.error("Error while sending binary output to POStream", e);
        }
        synchronized (poStream) {
            // notify waiting consumer
            // the if is to satisfy "findbugs"
            if(res != null) {
                poStream.notifyAll();
            }
        }
    }

    /**
     * The thread which gets output from the streaming binary and puts it onto
     * the binary output Queue of POStream
     */
    class ProcessOutputThread extends Thread {

        OutputHandler outputHandler;
        private BlockingQueue<Result> binaryOutputQueue;

        ProcessOutputThread(OutputHandler outputHandler, POStream poStream) {
            setDaemon(true);
            this.outputHandler = outputHandler;
            // the output queue where this thread will put
            // output tuples for POStream
            this.binaryOutputQueue = poStream.getBinaryOutputQueue();
        }

        @Override
        public void run() {
            try {
                // Read tuples from the executable and send it to
                // Queue of POStream
                Tuple tuple = null;
                while ((tuple = outputHandler.getNext()) != null) {
                    processOutput(tuple);
                    outputBytes += tuple.getMemorySize();
                }
                // output from binary is done
                processOutput(null);
                outputHandler.close();
            } catch (Throwable t) {
                // Note that an error occurred
                outerrThreadsError = t;
                LOG.error("Caught Exception in OutputHandler of Streaming binary, " +
                        "sending error signal to pipeline", t);
                // send ERROR to POStream
                try {
                    Result res = new Result();
                    res.result = "Error reading output from Streaming binary:" +
                            "'" + argvAsString + "':" + t.getMessage();
                    res.returnStatus = POStatus.STATUS_ERR;
                    sendOutput(binaryOutputQueue, res);
                    killProcess(process);
                } catch (Exception e) {
                    LOG.error("Error while trying to signal Error status to pipeline", e);
                }
            }
        }

        void processOutput(Tuple t) {
            Result res = new Result();

            if (t != null) {
                // we have a valid tuple to pass back
                res.result = t;
                res.returnStatus = POStatus.STATUS_OK;
                outputRecords++;
            } else {
                // t == null means end of output from
                // binary - wait for the process to exit
                // and harvest exit code
                try {
                    exitCode = process.waitFor();
                } catch (InterruptedException ie) {
                    try {
                        killProcess(process);
                    } catch (IOException e) {
                        LOG.warn("Exception trying to kill process while processing null output " +
                                "from binary", e);

                    }
                    // signal error
                    String errMsg = "Failure while waiting for process (" + argvAsString + ")" +
                            ie.getMessage();
                    LOG.error(errMsg, ie);
                    res.result = errMsg;
                    res.returnStatus = POStatus.STATUS_ERR;
                    sendOutput(binaryOutputQueue, res);
                    return;
                }
                if(exitCode == 0) {
                    // signal EOS (End Of Stream output)
                    res = EOS_RESULT;
                } else {
                    // signal Error

                    String errMsg = "'" + argvAsString + "'" + " failed with exit status: "
                            + exitCode;
                    LOG.error(errMsg);
                    res.result = errMsg;
                    res.returnStatus = POStatus.STATUS_ERR;
                }
            }
            sendOutput(binaryOutputQueue, res);

        }
    }



    /**
     * Workhorse to process the stderr stream of the managed process.
     *
     * By default <code>ExecuatbleManager</code> just sends out the received
     * error message to the <code>stderr</code> of itself.
     *
     * @param error
     *            error message from the managed process.
     */
    protected void processError(String error) {
        // Just send it out to our stderr
        System.err.print(error);
    }

    class ProcessErrorThread extends Thread {

        public ProcessErrorThread() {
            setDaemon(true);
        }

        @Override
        public void run() {
            try {
                String error;
                BufferedReader reader = new BufferedReader(
                        new InputStreamReader(stderr));
                while ((error = reader.readLine()) != null) {
                    processError(error + "\n");
                }

                if (stderr != null) {
                    stderr.close();
                    LOG.debug("ProcessErrorThread done");
                }
            } catch (Throwable t) {
                // Note that an error occurred
                outerrThreadsError = t;

                LOG.error(t);
                try {
                    if (stderr != null) {
                        stderr.close();
                    }
                } catch (IOException ioe) {
                    LOG.warn(ioe);
                }
                throw new RuntimeException(t);
            }
        }
    }

}
TOP

Related Classes of org.apache.pig.impl.streaming.ExecutableManager$ProcessErrorThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.