Package org.apache.hadoop.mapreduce.task.reduce

Source Code of org.apache.hadoop.mapreduce.task.reduce.EventFetcher

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.task.reduce;

import java.io.IOException;
import java.net.URI;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
import org.apache.hadoop.mapreduce.TaskAttemptID;

@SuppressWarnings("deprecation")
class EventFetcher<K,V> extends Thread {
  private static final long SLEEP_TIME = 1000;
  private static final int MAX_EVENTS_TO_FETCH = 10000;
  private static final int MAX_RETRIES = 10;
  private static final int RETRY_PERIOD = 5000;
  private static final Log LOG = LogFactory.getLog(EventFetcher.class);

  private final TaskAttemptID reduce;
  private final TaskUmbilicalProtocol umbilical;
  private final ShuffleScheduler<K,V> scheduler;
  private int fromEventId = 0;
  private ExceptionReporter exceptionReporter = null;
 
  private int maxMapRuntime = 0;

  private volatile boolean stopped = false;
 
  public EventFetcher(TaskAttemptID reduce,
                      TaskUmbilicalProtocol umbilical,
                      ShuffleScheduler<K,V> scheduler,
                      ExceptionReporter reporter) {
    setName("EventFetcher for fetching Map Completion Events");
    setDaemon(true);   
    this.reduce = reduce;
    this.umbilical = umbilical;
    this.scheduler = scheduler;
    exceptionReporter = reporter;
  }

  @Override
  public void run() {
    int failures = 0;
    LOG.info(reduce + " Thread started: " + getName());
   
    try {
      while (!stopped && !Thread.currentThread().isInterrupted()) {
        try {
          int numNewMaps = getMapCompletionEvents();
          failures = 0;
          if (numNewMaps > 0) {
            LOG.info(reduce + ": " + "Got " + numNewMaps + " new map-outputs");
          }
          LOG.debug("GetMapEventsThread about to sleep for " + SLEEP_TIME);
          if (!Thread.currentThread().isInterrupted()) {
            Thread.sleep(SLEEP_TIME);
          }
        } catch (InterruptedException e) {
          LOG.info("EventFetcher is interrupted.. Returning");
          return;
        } catch (IOException ie) {
          LOG.info("Exception in getting events", ie);
          // check to see whether to abort
          if (++failures >= MAX_RETRIES) {
            throw new IOException("too many failures downloading events", ie);
          }
          // sleep for a bit
          if (!Thread.currentThread().isInterrupted()) {
            Thread.sleep(RETRY_PERIOD);
          }
        }
      }
    } catch (InterruptedException e) {
      return;
    } catch (Throwable t) {
      exceptionReporter.reportException(t);
      return;
    }
  }

  public void shutDown() {
    this.stopped = true;
    interrupt();
    try {
      join(5000);
    } catch(InterruptedException ie) {
      LOG.warn("Got interrupted while joining " + getName(), ie);
    }
  }
 
  /**
   * Queries the {@link TaskTracker} for a set of map-completion events
   * from a given event ID.
   * @throws IOException
   */ 
  private int getMapCompletionEvents() throws IOException {
   
    int numNewMaps = 0;
   
    MapTaskCompletionEventsUpdate update =
      umbilical.getMapCompletionEvents((org.apache.hadoop.mapred.JobID)
                                       reduce.getJobID(),
                                       fromEventId,
                                       MAX_EVENTS_TO_FETCH,
                                       (org.apache.hadoop.mapred.TaskAttemptID)
                                         reduce);
    TaskCompletionEvent events[] = update.getMapTaskCompletionEvents();
    LOG.debug("Got " + events.length + " map completion events from " +
             fromEventId);
     
    // Check if the reset is required.
    // Since there is no ordering of the task completion events at the
    // reducer, the only option to sync with the new jobtracker is to reset
    // the events index
    if (update.shouldReset()) {
      fromEventId = 0;
      scheduler.resetKnownMaps();
    }
   
    // Update the last seen event ID
    fromEventId += events.length;
   
    // Process the TaskCompletionEvents:
    // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs.
    // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop
    //    fetching from those maps.
    // 3. Remove TIPFAILED maps from neededOutputs since we don't need their
    //    outputs at all.
    for (TaskCompletionEvent event : events) {
      switch (event.getTaskStatus()) {
        case SUCCEEDED:
          URI u = getBaseURI(event.getTaskTrackerHttp());
          scheduler.addKnownMapOutput(u.getHost() + ":" + u.getPort(),
                                      u.toString(),
                                      event.getTaskAttemptId());
          numNewMaps ++;
          int duration = event.getTaskRunTime();
          if (duration > maxMapRuntime) {
            maxMapRuntime = duration;
            scheduler.informMaxMapRunTime(maxMapRuntime);
          }
          break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
          scheduler.obsoleteMapOutput(event.getTaskAttemptId());
          LOG.info("Ignoring obsolete output of " + event.getTaskStatus() +
                   " map-task: '" + event.getTaskAttemptId() + "'");
          break;
        case TIPFAILED:
          scheduler.tipFailed(event.getTaskAttemptId().getTaskID());
          LOG.info("Ignoring output of failed map TIP: '"
               event.getTaskAttemptId() + "'");
          break;
      }
    }
    return numNewMaps;
  }
 
  private URI getBaseURI(String url) {
    StringBuffer baseUrl = new StringBuffer(url);
    if (!url.endsWith("/")) {
      baseUrl.append("/");
    }
    baseUrl.append("mapOutput?job=");
    baseUrl.append(reduce.getJobID());
    baseUrl.append("&reduce=");
    baseUrl.append(reduce.getTaskID().getId());
    baseUrl.append("&map=");
    URI u = URI.create(baseUrl.toString());
    return u;
  }
}
TOP

Related Classes of org.apache.hadoop.mapreduce.task.reduce.EventFetcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.