Package org.apache.hadoop.mapreduce.v2.app

Source Code of org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler$PingChecker

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapreduce.v2.app;

import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.yarn.Clock;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.service.AbstractService;


/**
* This class keeps track of tasks that have already been launched. It
* determines if a task is alive and running or marks a task as dead if it does
* not hear from it for a long time.
*
*/
@SuppressWarnings({"unchecked", "rawtypes"})
public class TaskHeartbeatHandler extends AbstractService {

  private static final Log LOG = LogFactory.getLog(TaskHeartbeatHandler.class);

  //thread which runs periodically to see the last time since a heartbeat is
  //received from a task.
  private Thread lostTaskCheckerThread;
  private volatile boolean stopped;
  private int taskTimeOut = 5 * 60 * 1000;// 5 mins
  private int taskTimeOutCheckInterval = 30 * 1000; // 30 seconds.

  private final EventHandler eventHandler;
  private final Clock clock;

  private ConcurrentMap<TaskAttemptId, Long> runningAttempts;

  public TaskHeartbeatHandler(EventHandler eventHandler, Clock clock,
      int numThreads) {
    super("TaskHeartbeatHandler");
    this.eventHandler = eventHandler;
    this.clock = clock;
    runningAttempts =
      new ConcurrentHashMap<TaskAttemptId, Long>(16, 0.75f, numThreads);
  }

  @Override
  public void init(Configuration conf) {
    super.init(conf);
    taskTimeOut = conf.getInt(MRJobConfig.TASK_TIMEOUT, 5 * 60 * 1000);
    taskTimeOutCheckInterval =
        conf.getInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 30 * 1000);
  }

  @Override
  public void start() {
    lostTaskCheckerThread = new Thread(new PingChecker());
    lostTaskCheckerThread.setName("TaskHeartbeatHandler PingChecker");
    lostTaskCheckerThread.start();
    super.start();
  }

  @Override
  public void stop() {
    stopped = true;
    lostTaskCheckerThread.interrupt();
    super.stop();
  }

  public void receivedPing(TaskAttemptId attemptID) {
  //only put for the registered attempts
    //TODO throw an exception if the task isn't registered.
    runningAttempts.replace(attemptID, clock.getTime());
  }

  public void register(TaskAttemptId attemptID) {
    runningAttempts.put(attemptID, clock.getTime());
  }

  public void unregister(TaskAttemptId attemptID) {
    runningAttempts.remove(attemptID);
  }

  private class PingChecker implements Runnable {

    @Override
    public void run() {
      while (!stopped && !Thread.currentThread().isInterrupted()) {
        Iterator<Map.Entry<TaskAttemptId, Long>> iterator =
            runningAttempts.entrySet().iterator();

        // avoid calculating current time everytime in loop
        long currentTime = clock.getTime();

        while (iterator.hasNext()) {
          Map.Entry<TaskAttemptId, Long> entry = iterator.next();
          if (currentTime > entry.getValue() + taskTimeOut) {

            //In case the iterator isn't picking up the latest.
            // Extra lookup outside of the iterator - but only if the task
            // is considered to be timed out.
            Long taskTime = runningAttempts.get(entry.getKey());
            if (taskTime != null && currentTime > taskTime + taskTimeOut) {
              // task is lost, remove from the list and raise lost event
              iterator.remove();
              eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry
                  .getKey(), "AttemptID:" + entry.getKey().toString()
                  + " Timed out after " + taskTimeOut / 1000 + " secs"));
              eventHandler.handle(new TaskAttemptEvent(entry.getKey(),
                  TaskAttemptEventType.TA_TIMED_OUT));
            }

          }
        }
        try {
          Thread.sleep(taskTimeOutCheckInterval);
        } catch (InterruptedException e) {
          LOG.info("TaskHeartbeatHandler thread interrupted");
          break;
        }
      }
    }
  }

}
TOP

Related Classes of org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler$PingChecker

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.