Package com.cloudera.flume.watchdog

Source Code of com.cloudera.flume.watchdog.Watchdog

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.watchdog;

import java.io.IOException;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.SelectableChannel;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.util.InputStreamPipe;

/**
* This is a is a watchdog program that starts its arguments as a sub process
* and restarts it if the program exists with any error code other than 0.
* stderr and stdout are redirected from the child program to output to the
* watchdog. Sub-processes are not restricted to java programs.
*
* Example usage (Starts the watchdog on the LocalFlumeAgent).
*
* java c.c.f.watchdog.Watchdog java c.c.f.agent.FlumeLocalAgent
*
* Caveats: This program is not helpful for programs that hang -- it assumes
* that program fail-exit. It is also assumed that the watchdog, being a small
* and simple program, is robust and will not fail.
*
* Future versions may determine if it is hung by using the SIGAR library and
* some heuristics to see if the watched process is dead (possibility: no cpu
* usage, no io, no syscalls). These will be more of an art than a science.
*
*
*/
public class Watchdog {
  static final Logger LOG = LoggerFactory.getLogger(Watchdog.class);

  String[] args;

  // intilized by laucnh
  Selector selector;
  Runtime rt;
  Process proc;
  InputStreamPipe outPipe, errPipe, watchdogPipe;
  PrintStream procIn;
  SelectableChannel stdout, stderr, stdin;
  boolean interactive;

  public Watchdog(String[] args, boolean interactive) {
    this.args = args.clone();
    this.interactive = interactive;
  }

  public Watchdog(String[] args) {
    this(args, false);
  }

  /**
   * This starts the different pumper threads, and opens all the piping
   * connections.
   *
   * @throws IOException
   */
  void startup() throws IOException {
    selector = Selector.open();

    rt = Runtime.getRuntime();
    proc = rt.exec(args);
    outPipe = new InputStreamPipe(proc.getInputStream());
    errPipe = new InputStreamPipe(proc.getErrorStream());
    if (interactive) {
      watchdogPipe = new InputStreamPipe(System.in);
      procIn = new PrintStream(proc.getOutputStream());
    }

    stdout = outPipe.getChannel();
    stderr = errPipe.getChannel();
    if (interactive) {
      stdin = watchdogPipe.getChannel();
    }

    stdout.register(selector, SelectionKey.OP_READ);
    stderr.register(selector, SelectionKey.OP_READ);
    if (interactive) {
      stdin.register(selector, SelectionKey.OP_READ);
    }

    outPipe.start();
    errPipe.start();
    if (interactive) {
      watchdogPipe.start();
    }
    // if watchdog gets shutdown, make sure the subprocess is closed as well
    rt.addShutdownHook(new Thread() {
      public void run() {
        synchronized (Watchdog.this) {
          LOG.info("Watchdog shutdown hook");
          if (proc != null) {
            proc.destroy();
          }
        }
      }
    });
  }

  // This prevents eventual exhaustion by watch dog due to file handle
  // exhaustion and thread frame exhaustion.
  // # of open handles can be checked by doing an "lsof | grep <pid> | wc"
  void shutdown() throws IOException {
    if (proc == null)
      return;

    if (interactive) {
      watchdogPipe.shutdown();
      watchdogPipe = null;
    }

    outPipe.shutdown(); // +1 thread
    outPipe = null;
    errPipe.shutdown(); // +1 thread
    errPipe = null;

    if (interactive) {
      procIn.close();
      procIn = null;
    }
    proc.getOutputStream().close(); // +1 file handle
    proc.getInputStream().close(); // +1 file handle
    proc.getErrorStream().close(); // +1 file handle
    proc.destroy();
    proc = null;

    if (interactive) {
      stdin.close();
      stdin = null;
    }
    stdout.close(); // +1 file handle
    stdout = null;
    stderr.close(); // +0 but never used in test case
    stderr = null;
    selector.close(); // +5 handles!
    selector = null;
  }

  /**
   * This version uses NIO to do unix style select input output redirection of
   * the child process. If the i/o streams are closed this likely means our
   * process is exiting, so exit and wait for process death.
   *
   * This all happens in a single thread but each pipe has its own thread for
   * moving data between the inputstream and ouptutstream
   *
   */
  public int launchAgent() throws IOException, InterruptedException {
    startup();
    ByteBuffer buffer = ByteBuffer.allocate(32);

    while (true) {
      selector.select(2000);
      Iterator<SelectionKey> it = selector.selectedKeys().iterator();
      if (!it.hasNext()) {
        // nothing to do, loop again.
        continue;
      }

      SelectionKey key = it.next();
      it.remove();
      buffer.clear();

      ReadableByteChannel channel = (ReadableByteChannel) key.channel();
      int count = channel.read(buffer);

      if (count < 0) {
        // EOF
        channel.close();
        break;
      }

      buffer.flip();

      while (buffer.hasRemaining()) {
        if (key.channel() == stdout) {
          System.out.print((char) buffer.get());
        } else if (key.channel() == stderr) {
          System.err.print((char) buffer.get());
        } else if (key.channel() == stdin) {
          procIn.print((char) buffer.get());
          procIn.flush();
        }
      }
    }

    int retval = proc.waitFor();

    shutdown();
    LOG.info("Subprocess exited with value " + retval);
    return retval;
  }

  public void run(int maxTriesPerMin) {
    ArrayList<Date> times = new ArrayList<Date>();

    while (true) {

      Date now = new Date();

      if (times.size() > maxTriesPerMin) {
        // drop entries that are too old to count against us
        ArrayList<Date> newTimes = new ArrayList<Date>();
        for (Date t : times) {
          Calendar c = Calendar.getInstance();
          c.setTime(t);
          c.add(Calendar.MINUTE, 1);
          Date t_plus1 = c.getTime();
          if (t_plus1.getTime() - now.getTime() > 0) {
            newTimes.add(t);
          }
        }

        // if it still to many recent tries, get the oldest and wait until it
        // should expire.
        times = newTimes;
        if (newTimes.size() > maxTriesPerMin) {
          try {
            Calendar c = Calendar.getInstance();
            c.setTime(times.get(0));
            c.add(Calendar.MINUTE, 1);
            Date old_plus1 = c.getTime();

            long delta = old_plus1.getTime() - now.getTime();
            LOG.warn("too many attempts failed per minute -- waiting for "
                + (delta / 1000) + "s");
            Thread.sleep(delta);
          } catch (InterruptedException e) {
            e.printStackTrace();
          }
        }
      }

      Date d = now;
      LOG.info("Restarting process @ " + d);
      times.add(d);
      try {
        int ret = launchAgent();
        if (ret == 0) {
          LOG.info("Subprocess exited cleanly, closing watchdog");
          break;
        }

      } catch (IOException e) {
        e.printStackTrace();
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }
  }

  @Deprecated
  // TODO (jon) move to com.cloudera.util.Watchdog. Except for one config var,
  // there are no dependencies on Flume.
  public static void main(String[] argv) {
    if (argv.length == 0) {
      System.out.println("need to specify watched command as arguments");
      System.exit(-1);
    }

    String[] args = argv;

    FlumeConfiguration conf = FlumeConfiguration.hardExitLoadConfig();
    int maxTriesPerMin = conf.getMaxRestartsPerMin();

    Watchdog watchdog = new Watchdog(args);
    watchdog.run(maxTriesPerMin);
  }
}
TOP

Related Classes of com.cloudera.flume.watchdog.Watchdog

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.