Package org.apache.hadoop.mapred

Source Code of org.apache.hadoop.mapred.CoronaJTFallbackCaller

package org.apache.hadoop.mapred;

import java.io.IOException;
import java.net.ConnectException;
import java.net.InetSocketAddress;

import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.mapred.CoronaSessionInfo.InetSocketAddressWritable;

import com.sun.org.apache.commons.logging.Log;
import com.sun.org.apache.commons.logging.LogFactory;

/**
* Generic caller that tries to reconnect to new job tracker on error in RPC
* call. New job tracker address is obtained from
* @param <T> return type of called function
*/
@SuppressWarnings("deprecation")
public abstract class CoronaJTFallbackCaller<T> {
  /** Default wait time between queries to secondary tracker until remote JT is
   * completely restarted, in msec */
  private static final int SECONDARY_TRACKER_QUERIES_INTERVAL = 10000;
  /** Default timeout for connecting to secondary tracker address */
  private static final long SECONDARY_TRACKER_CONNECT_TIMEOUT = 30000;
  /** Max number of times we can ask for new JT address and get response to back
   * off after single crash */
  private static final int SECONDARY_TRACKER_MAX_BACKOFF = 600;
  /** Logger */
  private static final Log LOG = LogFactory
      .getLog(CoronaJTFallbackCaller.class);
  /** Max number the Fallback caller can connect the new address
   *  In some cases, when the Fallback caller get the new job tracker address
   *  and try to connect the new job tracker, it will find the new job tracker get
   *  lost again. So we need to call reconnectToNewJobTracker () recursively, This
   *  number limit max number we do recursion.
   * */
  private static final int CONNECT_MAX_NUMBER = 8;

  /**
   * Perform the call. Must be overridden by a sub-class.
   * @return The generic return value.
   * @throws IOException
   */
  protected abstract T call() throws IOException;

  /**
   * Prediticate determining if should try again after getting information, that
   * remote JT is during restarting process.
   * @param retryNum numbet of performed retry retry (zeroed after call failure)
   * @return true if should retry
   */
  protected boolean predRetry(int retryNum) {
    return retryNum <= SECONDARY_TRACKER_MAX_BACKOFF;
  }

  /**
   * Provides implementation of wait mechanism between quering secondary tracker
   * for new remote JT address.
   */
  protected void waitRetry() throws InterruptedException {
    synchronized (this) {
      this.wait(SECONDARY_TRACKER_QUERIES_INTERVAL);
    }
  }

  /**
   * Opens client with provided address
   * @param address
   * @throws IOException
   */
  protected abstract void connect(InetSocketAddress address) throws IOException;

  /**
   * Closes RPC client
   */
  protected abstract void shutdown();

  /**
   * Get current RPC address
   * @return current address of RPC clients destination
   */
  protected abstract InetSocketAddress getCurrentClientAddress();

  /**
   * Returns job configuration
   * @return job conf
   */
  protected abstract JobConf getConf();

  /**
   * Gets secondary tracker address
   * @return secondary fallback address
   */
  protected abstract InetSocketAddress getSecondaryTracker();
 
  /**
   * When IO Exception happened, call this function to handle it
   */
  protected abstract void handleIOException(IOException e) throws IOException;

  /**
   * Template function to make the call. Throws if can not fallback.
   * @return The generic return value.
   * @throws IOException
   */
  public final T makeCall() throws IOException {
    while (true) {
      try {
        return call();
      } catch (ConnectException e) {
        // We fall back only after ConnectException
        try {
          // Fall back to secondary tracker and reconnect to new JT
          reconnectToNewJobTracker(0);
        } catch (IOException f) {
          LOG.error("Fallback process failed with ", f);
          // Re-throw original exception
          throw e;
        }
      } catch (IOException e) {
        // the subclass of fallback caller should provide
        // logic here. We will retry in most cases
        handleIOException(e);
      }
    }
  }

  /**
   * Reconnects to new address obtained from secondary address via
   * InterCoronaTrackerProtocol
   * @throws IOException
   */
  private final void reconnectToNewJobTracker(int connectNum) throws IOException {
    if (connectNum >= CONNECT_MAX_NUMBER) {
      LOG.error("reconnectToNewJobTracker has reached its max number.");
      throw new IOException("reconnectToNewJobTracker has reached its max number.");
    }
   
    InetSocketAddress secondaryTracker = getSecondaryTracker();
    JobConf conf = getConf();
    InetSocketAddress oldAddress = getCurrentClientAddress();

    LOG.info("Falling back from " + oldAddress + " to secondary tracker at "
        + secondaryTracker + " with " + connectNum + " try");
    if (secondaryTracker == null)
      throw new IOException("Secondary address not provided.");

    shutdown();
    InterCoronaJobTrackerProtocol secondaryClient = RPC.waitForProxy(
        InterCoronaJobTrackerProtocol.class,
        InterCoronaJobTrackerProtocol.versionID, secondaryTracker, conf,
        SECONDARY_TRACKER_CONNECT_TIMEOUT);
    // Obtain new address
    InetSocketAddressWritable oldAddrWritable = new InetSocketAddressWritable(
        oldAddress);
    InetSocketAddressWritable newAddress = null;
    int retryNum = 0;
    do {
      newAddress = secondaryClient.getNewJobTrackerAddress(oldAddrWritable);
      try {
        waitRetry();
      } catch (InterruptedException e) {
        LOG.error("Fallback interrupted, taking next retry.");
      }
      ++retryNum;
    } while (newAddress == null && predRetry(retryNum));

    if (newAddress == null || newAddress.getAddress() == null)
      throw new IOException("Failed to obtain new job tracker address.");

    RPC.stopProxy(secondaryClient);
    try {
      connect(newAddress.getAddress());
      LOG.info("Fallback process successful: " + newAddress.getAddress());
    } catch (IOException e) {
      LOG.error("Fallback connect to " + newAddress.getAddress() + " failed for ", e);
      reconnectToNewJobTracker(++connectNum);
    }
  }

}
TOP

Related Classes of org.apache.hadoop.mapred.CoronaJTFallbackCaller

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.