/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.corona.InetAddress;
/**
* Sends actions to Corona Task Trackers.
* There are several threads used for sending the actions so that a single
* dead task tracker does not block all actions. To preserve the order of
* operations on a single task (like sending KillTaskAction after
* LaunchTaskAction), the actions are queued to the same thread by doing
* taskid % numThreads. We do not need to preserve the order of task-level
* actions and the KillJobAction, since once we send KillJobAction, the job
* tracker will shutdown anyway. So any actions sent after that will fail.
*/
public class CoronaTaskLauncher {
/** Logger. */
private static final Log LOG = LogFactory.getLog(CoronaTaskLauncher.class);
/** The workers that send actions to task trackers. */
private final ActionSender[] workers;
/** The pool of worker threads that send actions to task trackers. */
private final Thread[] workerThreads;
/** The Corona Job Tracker. */
private final CoronaJobTracker coronaJT;
/** The expiry logic. */
private final ExpireTasks expireTasks;
/** Constructor.
* @param conf The configuration.
* @param coronaJT The Corona Job Tracker.
* @param expireTasks The expiry logic.
*/
CoronaTaskLauncher(
Configuration conf,
CoronaJobTracker coronaJT,
ExpireTasks expireTasks) {
this.coronaJT = coronaJT;
this.expireTasks = expireTasks;
int numLauncherThreads = conf.getInt(
"mapred.corona.jobtracker.numtasklauncherthreads", 4);
workers = new ActionSender[numLauncherThreads];
workerThreads = new Thread[numLauncherThreads];
for (int i = 0; i < numLauncherThreads; i++) {
workers[i] = new ActionSender(i);
workerThreads[i] = new Thread(workers[i]);
workerThreads[i].setName("Task Launcher Thread #" + i);
workerThreads[i].setDaemon(true);
workerThreads[i].start();
}
}
/**
* Enqueue an action to kill the job.
* @param jobId The job identifier.
* @param allTrackers All trackers to send the kill to.
*/
@SuppressWarnings("deprecation")
public void killJob(JobID jobId, Map<String, InetAddress> allTrackers) {
int workerId = 0;
for (Map.Entry<String, InetAddress> entry : allTrackers.entrySet()) {
String trackerName = entry.getKey();
InetAddress addr = entry.getValue();
String description = "KillJobAction " + jobId;
ActionToSend action = new ActionToSend(trackerName, addr,
new KillJobAction(jobId), description);
workers[workerId].enqueueAction(action);
LOG.info("Queueing " + description + " to worker " + workerId + " " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
workerId = (workerId + 1) % workers.length;
}
}
/**
* Enqueue kill tasks actions.
* @param trackerName The name of the tracker to send the kill actions to.
* @param addr The address of the tracker to send the kill actions to.
* @param killActions The kill actions to send.
*/
public void killTasks(
String trackerName, InetAddress addr, List<KillTaskAction> killActions) {
for (KillTaskAction killAction : killActions) {
int workerId = workerIdForTask(killAction.getTaskID());
String description = "KillTaskAction " + killAction.getTaskID();
LOG.info("Queueing " + description + " to worker " + workerId + " " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
workers[workerId].enqueueAction(
new ActionToSend(trackerName, addr, killAction, description));
}
}
/**
* Enqueue a commit task action.
* @param trackerName The name of the tracker to send the commit action to.
* @param addr The address of the tracker to send the commit action to.
* @param action The commit action to send.
*/
public void commitTask(
String trackerName, InetAddress addr, CommitTaskAction action) {
int workerId = workerIdForTask(action.getTaskID());
String description = "KillTaskAction " + action.getTaskID();
LOG.info("Queueing " + description + " to worker " + workerId + " " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
workers[workerId].enqueueAction(new ActionToSend(
trackerName, addr, action, description));
}
/**
* Remove a launching task.
* @param attempt The task attempt ID.
* @return A boolean indicating if an enqueued action was removed.
*/
@SuppressWarnings("deprecation")
public boolean removeLaunchingTask(TaskAttemptID attempt) {
ActionSender designatedWorker = workers[workerIdForTask(attempt)];
return designatedWorker.removeLaunchingTask(attempt);
}
/**
* Enqueue a launch task action.
* @param task The task to launch.
* @param trackerName The name of the tracker to send the task to.
* @param addr The address of the tracker to send the task to.
*/
public void launchTask(Task task, String trackerName, InetAddress addr) {
CoronaSessionInfo info = new CoronaSessionInfo(
coronaJT.getSessionId(), coronaJT.getJobTrackerAddress());
LaunchTaskAction action = new LaunchTaskAction(task, info);
String description = "LaunchTaskAction " + action.getTask().getTaskID();
ActionToSend actionToSend =
new ActionToSend(trackerName, addr, action, description);
int workerId = workerIdForTask(task.getTaskID());
LOG.info("Queueing " + description + " to worker " + workerId + " " +
trackerName + "(" + addr.host + ":" + addr.port + ")");
workers[workerId].enqueueAction(actionToSend);
}
/**
* Represents an action to send to a task tracker.
*/
private class ActionToSend {
/** The host of the tracker. */
private final String trackerHost;
/** The name of the tracker. */
private final String trackerName;
/** The port of the tracker. */
private final int port;
/** The action to send. */
private final TaskTrackerAction ttAction;
/** Description for logging. */
private final String description;
/** Action creation time */
private final long ctime = System.currentTimeMillis();
/** Constructor
* @param trackerName The name of the tracker.
* @param addr The address of the tracker.
* @param action The action to send.
*/
private ActionToSend(String trackerName, InetAddress addr,
TaskTrackerAction action, String description) {
this.trackerName = trackerName;
this.trackerHost = addr.host;
this.port = addr.port;
this.ttAction = action;
this.description = description;
}
}
/**
* A worker that sends actions to trackers. All actions for a task are hashed
* to a single worker.
*/
private class ActionSender implements Runnable {
/** The queue of actions. */
private final List<ActionToSend> workQueue = new LinkedList<ActionToSend>();
/** The worker identifier. */
private final int id;
/** Constructor.
* @param id The identifier of the worker.
*/
public ActionSender(int id) {
this.id = id;
}
@Override
public void run() {
LOG.info("Starting TaskLauncher thread#" + id);
while (true) {
try {
launchTasks();
} catch (InterruptedException e) {
// Ignore, these are daemon threads.
if (LOG.isDebugEnabled()) {
LOG.debug("Got InterruptedException while launching a task", e);
}
}
}
}
/**
* Sends a bunch of tasks at a time. This is called repeatedly.
*
* @throws InterruptedException
*/
private void launchTasks() throws InterruptedException {
List<ActionToSend> actions = new ArrayList<ActionToSend>();
synchronized (workQueue) {
while (workQueue.isEmpty()) {
workQueue.wait();
}
actions.addAll(workQueue);
workQueue.clear();
}
for (ActionToSend actionToSend : actions) {
String trackerName = actionToSend.trackerName;
if (coronaJT.getTrackerStats().isFaulty(trackerName)) {
LOG.warn("Not sending " + actionToSend.description + " to " +
actionToSend.trackerHost + ":" + actionToSend.port +
" since previous communication failed");
coronaJT.processTaskLaunchError(actionToSend.ttAction);
continue;
}
// Fill in the job tracker information.
CoronaSessionInfo info = new CoronaSessionInfo(
coronaJT.getSessionId(), coronaJT.getJobTrackerAddress());
actionToSend.ttAction.setExtensible(info);
// Get the tracker address.
String trackerRpcAddress =
actionToSend.trackerHost + ":" + actionToSend.port;
try {
// Start the timer on the task just before making the connection
// and RPC. If there are any errors after this point, we will reuse
// the error handling for expired launch tasks.
if (actionToSend.ttAction instanceof LaunchTaskAction) {
LaunchTaskAction lta = (LaunchTaskAction) actionToSend.ttAction;
expireTasks.addNewTask(lta.getTask().getTaskID());
}
CoronaTaskTrackerProtocol client = coronaJT.getTaskTrackerClient(
actionToSend.trackerHost, actionToSend.port);
client.submitActions(new TaskTrackerAction[]{actionToSend.ttAction});
} catch (IOException e) {
LOG.error("Could not send " + actionToSend.description +
" to " + trackerRpcAddress, e);
coronaJT.resetTaskTrackerClient(
actionToSend.trackerHost, actionToSend.port);
coronaJT.getTrackerStats().recordConnectionError(trackerName);
coronaJT.processTaskLaunchError(actionToSend.ttAction);
}
// Time To Send
long TTS = System.currentTimeMillis() - actionToSend.ctime;
LOG.info("Processed " + actionToSend.description + " for " +
actionToSend.trackerName + " " + TTS + " msec after its creation.");
}
}
/**
* Remove a task pending launch.
* @param attempt The task attempt ID.
* @return A boolean indicating if a pending launch was removed.
*/
@SuppressWarnings("deprecation")
boolean removeLaunchingTask(TaskAttemptID attempt) {
synchronized (workQueue) {
Iterator<ActionToSend> actionIter = workQueue.iterator();
while (actionIter.hasNext()) {
ActionToSend action = actionIter.next();
if (action.ttAction instanceof LaunchTaskAction &&
((LaunchTaskAction) action.ttAction).getTask().
getTaskID().equals(attempt)) {
actionIter.remove();
return true;
}
}
}
return false;
}
/**
* Enqueue an action to this worker.
* @param a The action.
*/
public void enqueueAction(ActionToSend a) {
synchronized (workQueue) {
workQueue.add(a);
workQueue.notify();
}
}
} // Worker
/**
* Get the worker ID for a task attempt.
* We have this function so that all actions for a task attempt go to a
* single thread. But actions for different attempts of the same task will
* go to different threads. This is good when a thread gets stuck and the
* next attempt of the task can go to another thread.
* @param attemptID The task attempt.
* @return The ID.
*/
@SuppressWarnings("deprecation")
private int workerIdForTask(TaskAttemptID attemptID) {
int taskNum = attemptID.getTaskID().getId();
int attemptNum = attemptID.getId();
return (taskNum + attemptNum) % workers.length;
}
}