Package org.apache.tez.dag.app.rm

Source Code of org.apache.tez.dag.app.rm.TaskScheduler$HeldContainer

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.dag.app.rm;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.RackResolver;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.TezUncheckedException;
import org.apache.tez.dag.app.AppContext;
import org.apache.tez.dag.app.DAGAppMasterState;
import org.apache.tez.dag.app.rm.TaskScheduler.TaskSchedulerAppCallback.AppFinalStatus;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

/* TODO not yet updating cluster nodes on every allocate response
* from RMContainerRequestor
   import org.apache.tez.dag.app.rm.node.AMNodeEventNodeCountUpdated;
    if (clusterNmCount != lastClusterNmCount) {
      LOG.info("Num cluster nodes changed from " + lastClusterNmCount + " to "
          + clusterNmCount);
      eventHandler.handle(new AMNodeEventNodeCountUpdated(clusterNmCount));
    }
*/
public class TaskScheduler extends AbstractService
                             implements AMRMClientAsync.CallbackHandler {
  private static final Log LOG = LogFactory.getLog(TaskScheduler.class);

  public interface TaskSchedulerAppCallback {
    public class AppFinalStatus {
      public final FinalApplicationStatus exitStatus;
      public final String exitMessage;
      public final String postCompletionTrackingUrl;
      public AppFinalStatus(FinalApplicationStatus exitStatus,
                             String exitMessage,
                             String posCompletionTrackingUrl) {
        this.exitStatus = exitStatus;
        this.exitMessage = exitMessage;
        this.postCompletionTrackingUrl = posCompletionTrackingUrl;
      }
    }
    // upcall to app must be outside locks
    public void taskAllocated(Object task,
                               Object appCookie,
                               Container container);
    // this may end up being called for a task+container pair that the app
    // has not heard about. this can happen because of a race between
    // taskAllocated() upcall and deallocateTask() downcall
    public void containerCompleted(Object taskLastAllocated,
                                    ContainerStatus containerStatus);
    public void containerBeingReleased(ContainerId containerId);
    public void nodesUpdated(List<NodeReport> updatedNodes);
    public void appShutdownRequested();
    public void setApplicationRegistrationData(
                                Resource maxContainerCapability,
                                Map<ApplicationAccessType, String> appAcls,
                                ByteBuffer clientAMSecretKey
                                );
    public void onError(Throwable t);
    public float getProgress();
    public AppFinalStatus getFinalAppStatus();
  }

  public interface ContainerSignatureMatcher {
    /**
     * Checks the compatibility between the specified container signatures.
     *
     * @return true if the first signature is a super set of the second
     *         signature.
     */
    public boolean isSuperSet(Object cs1, Object cs2);
   
    /**
     * Checks if the container signatures match exactly
     * @return true if exact match
     */
    public boolean isExactMatch(Object cs1, Object cs2);
  }

  final TezAMRMClientAsync<CookieContainerRequest> amRmClient;
  final TaskSchedulerAppCallback realAppClient;
  final TaskSchedulerAppCallback appClientDelegate;
  final ContainerSignatureMatcher containerSignatureMatcher;
  ExecutorService appCallbackExecutor;

  // Container Re-Use configuration
  private boolean shouldReuseContainers;
  private boolean reuseRackLocal;
  private boolean reuseNonLocal;

  Map<Object, CookieContainerRequest> taskRequests =
                  new HashMap<Object, CookieContainerRequest>();
  // LinkedHashMap is need in getProgress()
  LinkedHashMap<Object, Container> taskAllocations =
                  new LinkedHashMap<Object, Container>();
  /**
   * Tracks last task assigned to a known container.
   */
  Map<ContainerId, Object> containerAssignments =
                  new HashMap<ContainerId, Object>();
  HashMap<ContainerId, Object> releasedContainers =
                  new HashMap<ContainerId, Object>();
  /**
   * Map of containers currently being held by the TaskScheduler.
   */
  Map<ContainerId, HeldContainer> heldContainers =
      new HashMap<ContainerId, HeldContainer>();
 
  Resource totalResources = Resource.newInstance(0, 0);
  Resource allocatedResources = Resource.newInstance(0, 0);
 
  final String appHostName;
  final int appHostPort;
  final String appTrackingUrl;
  final AppContext appContext;

  boolean isStopped = false;

  private ContainerAssigner NODE_LOCAL_ASSIGNER = new NodeLocalContainerAssigner();
  private ContainerAssigner RACK_LOCAL_ASSIGNER = new RackLocalContainerAssigner();
  private ContainerAssigner NON_LOCAL_ASSIGNER = new NonLocalContainerAssigner();

  DelayedContainerManager delayedContainerManager;
  long localitySchedulingDelay;
  long sessionDelay;
 
  class CRCookie {
    // Do not use these variables directly. Can caused mocked unit tests to fail.
    private Object task;
    private Object appCookie;
    private Object containerSignature;
   
    CRCookie(Object task, Object appCookie, Object containerSignature) {
      this.task = task;
      this.appCookie = appCookie;
      this.containerSignature = containerSignature;
    }

    Object getTask() {
      return task;
    }

    Object getAppCookie() {
      return appCookie;
    }
   
    Object getContainerSignature() {
      return containerSignature;
    }
  }

  class CookieContainerRequest extends ContainerRequest {
    CRCookie cookie;

    public CookieContainerRequest(
        Resource capability,
        String[] hosts,
        String[] racks,
        Priority priority,
        CRCookie cookie) {
      super(capability, hosts, racks, priority);
      this.cookie = cookie;
    }

    CRCookie getCookie() {
      return cookie;
    }
  }

  public TaskScheduler(TaskSchedulerAppCallback appClient,
                        ContainerSignatureMatcher containerSignatureMatcher,
                        String appHostName,
                        int appHostPort,
                        String appTrackingUrl,
                        AppContext appContext) {
    super(TaskScheduler.class.getName());
    this.realAppClient = appClient;
    this.appCallbackExecutor = createAppCallbackExecutorService();
    this.containerSignatureMatcher = containerSignatureMatcher;
    this.appClientDelegate = createAppCallbackDelegate(appClient);
    this.amRmClient = TezAMRMClientAsync.createAMRMClientAsync(1000, this);
    this.appHostName = appHostName;
    this.appHostPort = appHostPort;
    this.appTrackingUrl = appTrackingUrl;
    this.appContext = appContext;
  }

  @Private
  @VisibleForTesting
  TaskScheduler(TaskSchedulerAppCallback appClient,
      ContainerSignatureMatcher containerSignatureMatcher,
      String appHostName,
      int appHostPort,
      String appTrackingUrl,
      TezAMRMClientAsync<CookieContainerRequest> client,
      AppContext appContext) {
    super(TaskScheduler.class.getName());
    this.realAppClient = appClient;
    this.appCallbackExecutor = createAppCallbackExecutorService();
    this.containerSignatureMatcher = containerSignatureMatcher;
    this.appClientDelegate = createAppCallbackDelegate(appClient);
    this.amRmClient = client;
    this.appHostName = appHostName;
    this.appHostPort = appHostPort;
    this.appTrackingUrl = appTrackingUrl;
    this.appContext = appContext;
  }

  private ExecutorService createAppCallbackExecutorService() {
    return Executors.newSingleThreadExecutor(new ThreadFactoryBuilder()
        .setNameFormat("TaskSchedulerAppCaller #%d").setDaemon(true).build());
  }
 
  public Resource getAvailableResources() {
    return amRmClient.getAvailableResources();
  }

  public int getClusterNodeCount() {
    return amRmClient.getClusterNodeCount();
  }

  TaskSchedulerAppCallback createAppCallbackDelegate(
      TaskSchedulerAppCallback realAppClient) {
    return new TaskSchedulerAppCallbackWrapper(realAppClient,
        appCallbackExecutor);
  }
 
  // AbstractService methods
  @Override
  public synchronized void serviceInit(Configuration conf) {

    amRmClient.init(conf);
    int heartbeatIntervalMax = conf.getInt(
        TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX,
        TezConfiguration.TEZ_AM_RM_HEARTBEAT_INTERVAL_MS_MAX_DEFAULT);
    amRmClient.setHeartbeatInterval(heartbeatIntervalMax);

    shouldReuseContainers = conf.getBoolean(
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED,
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_ENABLED_DEFAULT);
    reuseRackLocal = conf.getBoolean(
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED,
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED_DEFAULT);
    reuseNonLocal = conf
      .getBoolean(
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED,
        TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED_DEFAULT);
    Preconditions.checkArgument(
      ((!reuseRackLocal && !reuseNonLocal) || (reuseRackLocal)),
      "Re-use Rack-Local cannot be disabled if Re-use Non-Local has been"
      + " enabled");

    localitySchedulingDelay = conf.getLong(
      TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS,
      TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS_DEFAULT);
    Preconditions.checkArgument(localitySchedulingDelay >= 0,
        "Locality Scheduling delay should be >=0");

    sessionDelay = conf.getLong(
        TezConfiguration.TEZ_AM_CONTAINER_SESSION_DELAY_ALLOCATION_MILLIS,
        TezConfiguration.TEZ_AM_CONTAINER_SESSION_DELAY_ALLOCATION_MILLIS_DEFAULT);
    Preconditions.checkArgument(sessionDelay >= 0 || sessionDelay == -1,
      "Session delay should be either -1 or >=0");

    delayedContainerManager = new DelayedContainerManager();
    LOG.info("TaskScheduler initialized with configuration: " +
            "maxRMHeartbeatInterval: " + heartbeatIntervalMax +
            ", containerReuseEnabled: " + shouldReuseContainers +
            ", reuseRackLocal: " + reuseRackLocal +
            ", reuseNonLocal: " + reuseNonLocal +
            ", localitySchedulingDelay: " + localitySchedulingDelay +
            ", sessionDelay=" + sessionDelay);
  }

  @Override
  public void serviceStart() {
    try {
      RegisterApplicationMasterResponse response;
      synchronized (this) {
        amRmClient.start();
        response = amRmClient.registerApplicationMaster(appHostName,
                                                        appHostPort,
                                                        appTrackingUrl);
      }
      // upcall to app outside locks
      appClientDelegate.setApplicationRegistrationData(
          response.getMaximumResourceCapability(),
          response.getApplicationACLs(),
          response.getClientToAMTokenMasterKey());

      delayedContainerManager.start();
    } catch (YarnException e) {
      LOG.error("Yarn Exception while registering", e);
      throw new TezUncheckedException(e);
    } catch (IOException e) {
      LOG.error("IO Exception while registering", e);
      throw new TezUncheckedException(e);
    }
  }

  @Override
  public void serviceStop() throws InterruptedException {
    // upcall to app outside of locks
    AppFinalStatus status = appClientDelegate.getFinalAppStatus();
    try {
      delayedContainerManager.shutdown();
      // Wait for contianers to be released.
      delayedContainerManager.join(2000l);
      // TODO TEZ-36 dont unregister automatically after reboot sent by RM
      synchronized (this) {
        isStopped = true;
        amRmClient.unregisterApplicationMaster(status.exitStatus,
                                               status.exitMessage,
                                               status.postCompletionTrackingUrl);
      }

      // call client.stop() without lock client will attempt to stop the callback
      // operation and at the same time the callback operation might be trying
      // to get our lock.
      amRmClient.stop();
      appCallbackExecutor.shutdown();
      appCallbackExecutor.awaitTermination(1000l, TimeUnit.MILLISECONDS);
    } catch (YarnException e) {
      LOG.error("Yarn Exception while unregistering ", e);
      throw new TezUncheckedException(e);
    } catch (IOException e) {
      LOG.error("IOException while unregistering ", e);
      throw new TezUncheckedException(e);
    }
  }

  // AMRMClientAsync interface methods
  @Override
  public void onContainersCompleted(List<ContainerStatus> statuses) {
    if(isStopped) {
      return;
    }
    Map<Object, ContainerStatus> appContainerStatus =
                        new HashMap<Object, ContainerStatus>(statuses.size());
    synchronized (this) {
      for(ContainerStatus containerStatus : statuses) {
        ContainerId completedId = containerStatus.getContainerId();
        HeldContainer delayedContainer = heldContainers.get(completedId);

        Object task = releasedContainers.remove(completedId);
        if(task != null){
          if (delayedContainer != null) {
            LOG.warn("Held container should be null since releasedContainer is not");
          }
          // TODO later we may want to check if exit code matched expectation
          // e.g. successful container should not come back fail exit code after
          // being released
          // completion of a container we had released earlier
          // an allocated container completed. notify app
          LOG.info("Released container completed:" + completedId +
                   " last allocated to task: " + task);
          appContainerStatus.put(task, containerStatus);
          continue;
        }

        // not found in released containers. check currently allocated containers
        // no need to release this container as the RM has already completed it
        task = unAssignContainer(completedId, false);
        if (delayedContainer != null) {
          heldContainers.remove(completedId);
          Resources.subtract(allocatedResources, delayedContainer.getContainer().getResource());
        } else {
          LOG.warn("Held container expected to be not null for a non-AM-released container");
        }
        if(task != null) {
          // completion of a container we have allocated currently
          // an allocated container completed. notify app
          LOG.info("Allocated container completed:" + completedId +
                   " last allocated to task: " + task);
          appContainerStatus.put(task, containerStatus);
          continue;
        }

        // container neither allocated nor released
        LOG.info("Ignoring unknown container: " + containerStatus.getContainerId());
      }
    }

    // upcall to app must be outside locks
    for (Entry<Object, ContainerStatus> entry : appContainerStatus.entrySet()) {
      appClientDelegate.containerCompleted(entry.getKey(), entry.getValue());
    }
  }

  @Override
  public void onContainersAllocated(List<Container> containers) {
    if (isStopped) {
      return;
    }
    Map<CookieContainerRequest, Container> assignedContainers;

    if (LOG.isDebugEnabled()) {
      StringBuilder sb = new StringBuilder();
      for (Container container: containers) {
        sb.append(container.getId()).append(", ");
      }
      LOG.debug("Assigned New Containers: " + sb.toString());
    }

    synchronized (this) {
      if (!shouldReuseContainers) {
        List<Container> modifiableContainerList = Lists.newLinkedList(containers);
        assignedContainers = assignNewlyAllocatedContainers(
            modifiableContainerList);
      } else {
        // unify allocations
        pushNewContainerToDelayed(containers);
        return;
      }
    }

    // upcall to app must be outside locks
    informAppAboutAssignments(assignedContainers);
  }

  /**
   * Tries assigning the list of specified containers. Optionally, release
   * containers or add them to the delayed container queue.
   *
   * The flags apply to all containers in the specified lists. So, separate
   * calls should be made based on the expected behaviour.
   *
   * @param containers
   *          The list of containers to be assigned. The list *may* be modified
   *          in place based on allocations and releases.
   * @return Assignments.
   */
  private synchronized Map<CookieContainerRequest, Container>
      assignNewlyAllocatedContainers(Iterable<Container> containers) {

    Map<CookieContainerRequest, Container> assignedContainers =
        new HashMap<CookieContainerRequest, Container>();
    assignNewContainersWithLocation(containers,
      NODE_LOCAL_ASSIGNER, assignedContainers);
    assignNewContainersWithLocation(containers,
      RACK_LOCAL_ASSIGNER, assignedContainers);
    assignNewContainersWithLocation(containers,
      NON_LOCAL_ASSIGNER, assignedContainers);

    // Release any unassigned containers given by the RM
    releaseUnassignedContainers(containers);

    return assignedContainers;
  }

  private synchronized Map<CookieContainerRequest, Container>
      tryAssignReUsedContainers(Iterable<Container> containers) {

    Map<CookieContainerRequest, Container> assignedContainers =
      new HashMap<CookieContainerRequest, Container>();

    // Honor locality and match as many as possible
    assignReUsedContainersWithLocation(containers,
      NODE_LOCAL_ASSIGNER, assignedContainers, true);
    assignReUsedContainersWithLocation(containers,
      RACK_LOCAL_ASSIGNER, assignedContainers, true);
    assignReUsedContainersWithLocation(containers,
      NON_LOCAL_ASSIGNER, assignedContainers, true);

    return assignedContainers;
  }

  /**
   * Try to assign a re-used container
   * @param heldContainer Container to be used to assign to tasks
   * @return Assigned container map
   */

  private synchronized Map<CookieContainerRequest, Container>
      assignDelayedContainer(HeldContainer heldContainer) {

    DAGAppMasterState state = appContext.getAMState();
    boolean isNew = heldContainer.isNew();
    if (LOG.isDebugEnabled()) {
      LOG.debug("Trying to assign a delayed container"
        + ", containerId=" + heldContainer.getContainer().getId()
        + ", nextScheduleTime=" + heldContainer.getNextScheduleTime()
        + ", containerExpiryTime=" + heldContainer.getContainerExpiryTime()
        + ", AMState=" + state
        + ", matchLevel=" + heldContainer.getLocalityMatchLevel()
        + ", taskRequestsCount=" + taskRequests.size()
        + ", heldContainers=" + heldContainers.size()
        + ", delayedContainers=" + delayedContainerManager.delayedContainers.size()
        + ", isNew=" + isNew);
    }

    if (state.equals(DAGAppMasterState.IDLE) || taskRequests.isEmpty()) {
      // reset locality level on held container
      // if sessionDelay defined, push back into delayed queue if not already
      // done so

      heldContainer.resetLocalityMatchLevel();
      long currentTime = System.currentTimeMillis();
      if (isNew || (heldContainer.getContainerExpiryTime() <= currentTime
          && sessionDelay != -1)) {
        LOG.info("No taskRequests. Container's session delay expired or is new. " +
          "Releasing container"
          + ", containerId=" + heldContainer.container.getId()
          + ", containerExpiryTime="
          + heldContainer.getContainerExpiryTime()
          + ", sessionDelay=" + sessionDelay
          + ", taskRequestsCount=" + taskRequests.size()
          + ", heldContainers=" + heldContainers.size()
          + ", delayedContainers=" + delayedContainerManager.delayedContainers.size()
          + ", isNew=" + isNew);
        releaseUnassignedContainers(
            Lists.newArrayList(heldContainer.container));
      } else {
        if (!appContext.isSession()) {
          releaseUnassignedContainers(
            Lists.newArrayList(heldContainer.container));
        } else {
          // only put back in queue if this is a session
          heldContainer.resetLocalityMatchLevel();
          delayedContainerManager.addDelayedContainer(
            heldContainer.getContainer(),
            currentTime + localitySchedulingDelay);
        }
      }
    } else if (state.equals(DAGAppMasterState.RUNNING)) {
      HeldContainer.LocalityMatchLevel localityMatchLevel =
        heldContainer.getLocalityMatchLevel();
      Map<CookieContainerRequest, Container> assignedContainers =
        new HashMap<CookieContainerRequest, Container>();

      Container containerToAssign = heldContainer.container;

      // Each time a container is seen, we try node, rack and non-local in that
      // order depending on matching level allowed

      // if match level is NEW or NODE, match only at node-local
      // always try node local matches for other levels
      if (isNew
          || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NEW)
          || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NODE)
          || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.RACK)
          || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NON_LOCAL)) {
        assignReUsedContainerWithLocation(containerToAssign,
            NODE_LOCAL_ASSIGNER, assignedContainers, true);
        if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
          LOG.info("Failed to assign tasks to delayed container using node"
            + ", containerId=" + heldContainer.getContainer().getId());
        }
      }

      // if re-use allowed at rack
      // match against rack if match level is RACK or NON-LOCAL
      // if scheduling delay is 0, match at RACK allowed without a sleep
      if (assignedContainers.isEmpty()) {
        if ((reuseRackLocal || isNew) && (localitySchedulingDelay == 0 ||
          (localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.RACK)
            || localityMatchLevel.equals(
              HeldContainer.LocalityMatchLevel.NON_LOCAL)))) {
          assignReUsedContainerWithLocation(containerToAssign,
              RACK_LOCAL_ASSIGNER, assignedContainers, false);
          if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
            LOG.info("Failed to assign tasks to delayed container using rack"
              + ", containerId=" + heldContainer.getContainer().getId());
          }
        }
      }

      // if re-use allowed at non-local
      // match against rack if match level is NON-LOCAL
      // if scheduling delay is 0, match at NON-LOCAL allowed without a sleep
      if (assignedContainers.isEmpty()) {
        if ((reuseNonLocal || isNew) && (localitySchedulingDelay == 0
            || localityMatchLevel.equals(
                HeldContainer.LocalityMatchLevel.NON_LOCAL))) {
         assignReUsedContainerWithLocation(containerToAssign,
              NON_LOCAL_ASSIGNER, assignedContainers, false);
          if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
            LOG.info("Failed to assign tasks to delayed container using non-local"
                + ", containerId=" + heldContainer.getContainer().getId());
          }
        }
      }

      if (assignedContainers.isEmpty()) {

        long currentTime = System.currentTimeMillis();

        // Release container if final expiry time is reached
        // Dont release a new container. The RM may not give us new ones
        if (!isNew && heldContainer.getContainerExpiryTime() <= currentTime
          && sessionDelay != -1) {
          LOG.info("Container's session delay expired. Releasing container"
            + ", containerId=" + heldContainer.container.getId()
            + ", containerExpiryTime="
            + heldContainer.getContainerExpiryTime()
            + ", sessionDelay=" + sessionDelay);
          releaseUnassignedContainers(
            Lists.newArrayList(heldContainer.container));
        } else {

          // Let's decide if this container has hit the end of the road

          // EOL true if container's match level is NON-LOCAL
          boolean hitFinalMatchLevel = localityMatchLevel.equals(
            HeldContainer.LocalityMatchLevel.NON_LOCAL);
          if (!hitFinalMatchLevel) {
            // EOL also true if locality delay is 0
            // or rack-local or non-local is disabled
            heldContainer.incrementLocalityMatchLevel();
            if (localitySchedulingDelay == 0 ||
                (!reuseRackLocal
                  || (!reuseNonLocal &&
                    heldContainer.getLocalityMatchLevel().equals(
                        HeldContainer.LocalityMatchLevel.NON_LOCAL)))) {
              hitFinalMatchLevel = true;
            }
            // the above if-stmt does not apply to new containers since they will
            // be matched at all locality levels. So there finalMatchLevel cannot
            // be short-circuited
            if (localitySchedulingDelay > 0 && isNew) {
              hitFinalMatchLevel = false;
            }
          }
         
          if (hitFinalMatchLevel) {
            boolean safeToRelease = true;
            Priority topPendingPriority = amRmClient.getTopPriority();
            Priority containerPriority = heldContainer.container.getPriority();
            if (isNew && topPendingPriority != null &&
                containerPriority.compareTo(topPendingPriority) < 0) {
              // this container is of lower priority and given to us by the RM for
              // a task that will be matched after the current top priority. Keep
              // this container for those pending tasks since the RM is not going
              // to give this container to us again
              safeToRelease = false;
            }
           
            // Are there any pending requests at any priority?
            // release if there are tasks or this is not a session
            if (safeToRelease &&
                (!taskRequests.isEmpty() || !appContext.isSession())) {
              LOG.info("Releasing held container as either there are pending but "
                + " unmatched requests or this is not a session"
                + ", containerId=" + heldContainer.container.getId()
                + ", pendingTasks=" + !taskRequests.isEmpty()
                + ", isSession=" + appContext.isSession()
                + ". isNew=" + isNew);
              releaseUnassignedContainers(
                Lists.newArrayList(heldContainer.container));
            } else {
              // if no tasks, treat this like an idle session
              heldContainer.resetLocalityMatchLevel();
              delayedContainerManager.addDelayedContainer(
                heldContainer.getContainer(),
                currentTime + localitySchedulingDelay);
            }
          } else {
            // Schedule delay container to match at a later try
            delayedContainerManager.addDelayedContainer(
                heldContainer.getContainer(),
                currentTime + localitySchedulingDelay);
          }
        }
      } else if (LOG.isDebugEnabled()) {
        LOG.debug("Delayed container assignment successful"
            + ", containerId=" + heldContainer.getContainer().getId());
      }

      return assignedContainers;
    } else {
      // ignore all other cases?
      LOG.warn("Received a request to assign re-used containers when AM was "
        + " in state: " + state + ". Ignoring request and releasing container"
        + ": " + heldContainer.getContainer().getId());
      releaseUnassignedContainers(Lists.newArrayList(heldContainer.container));
    }

    return null;
  }

  public synchronized void resetMatchLocalityForAllHeldContainers() {
    for (HeldContainer heldContainer : heldContainers.values()) {
      heldContainer.resetLocalityMatchLevel();
    }
    synchronized(delayedContainerManager) {
      delayedContainerManager.notify();
    }
  }

  @Override
  public void onShutdownRequest() {
    if(isStopped) {
      return;
    }
    // upcall to app must be outside locks
    appClientDelegate.appShutdownRequested();
  }

  @Override
  public void onNodesUpdated(List<NodeReport> updatedNodes) {
    if(isStopped) {
      return;
    }
    // ignore bad nodes for now
    // upcall to app must be outside locks
    appClientDelegate.nodesUpdated(updatedNodes);
  }

  @Override
  public float getProgress() {
    if(isStopped) {
      return 1;
    }

    if(totalResources.getMemory() == 0) {
      // assume this is the first allocate callback. nothing is allocated.
      // available resource = totalResource
      // TODO this will not handle dynamic changes in resources
      totalResources = Resources.clone(getAvailableResources());
      LOG.info("App total resource memory: " + totalResources.getMemory() +
               " cpu: " + totalResources.getVirtualCores() +
               " taskAllocations: " + taskAllocations.size());
    }

    preemptIfNeeded();

    return appClientDelegate.getProgress();
  }

  @Override
  public void onError(Throwable t) {
    if(isStopped) {
      return;
    }
    appClientDelegate.onError(t);
  }

  public Resource getTotalResources() {
    return totalResources;
  }

  public synchronized void allocateTask(
      Object task,
      Resource capability,
      String[] hosts,
      String[] racks,
      Priority priority,
      Object containerSignature,
      Object clientCookie) {

    // XXX Have ContainerContext implement an interface defined by TaskScheduler.
    // TODO check for nulls etc
    // TODO extra memory allocation
    CRCookie cookie = new CRCookie(task, clientCookie, containerSignature);
    CookieContainerRequest request = new CookieContainerRequest(
      capability, hosts, racks, priority, cookie);

    addTaskRequest(task, request);
    // See if any of the delayedContainers can be used for this task.
    delayedContainerManager.triggerScheduling(true);
    LOG.info("Allocation request for task: " + task +
      " with request: " + request +
      " host: " + ((hosts!=null&&hosts.length>0)?hosts[0]:"null") +
      " rack: " + ((racks!=null&&racks.length>0)?racks[0]:"null"));
  }

  /**
   * @param task
   *          the task to de-allocate.
   * @param taskSucceeded
   *          specify whether the task succeeded or failed.
   * @return true if a container is assigned to this task.
   */
  public boolean deallocateTask(Object task, boolean taskSucceeded) {
    Map<CookieContainerRequest, Container> assignedContainers = null;

    synchronized (this) {
      CookieContainerRequest request = removeTaskRequest(task);
      if (request != null) {
        // task not allocated yet
        LOG.info("Deallocating task: " + task + " before allocation");
        return false;
      }

      // task request not present. Look in allocations
      Container container = doBookKeepingForTaskDeallocate(task);
      if (container == null) {
        // task neither requested nor allocated.
        LOG.info("Ignoring removal of unknown task: " + task);
        return false;
      } else {
        LOG.info("Deallocated task: " + task + " from container: "
            + container.getId());

        if (!taskSucceeded || !shouldReuseContainers) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Releasing container, containerId=" + container.getId()
                + ", taskSucceeded=" + taskSucceeded
                + ", reuseContainersFlag=" + shouldReuseContainers);
          }
          releaseContainer(container.getId());
        } else {
          // Don't attempt to delay containers if delay is 0.
          HeldContainer heldContainer = heldContainers.get(container.getId());
          if (heldContainer != null) {
            heldContainer.resetLocalityMatchLevel();
            long currentTime = System.currentTimeMillis();
            if (sessionDelay > 0) {
              heldContainer.setContainerExpiryTime(currentTime + sessionDelay);
            }
            assignedContainers = assignDelayedContainer(heldContainer);
          } else {
            LOG.info("Skipping container after task deallocate as container is"
                + " no longer running, containerId=" + container.getId());
          }
        }
      }
    }

    // up call outside of the lock.
    if (assignedContainers != null && assignedContainers.size() == 1) {
      informAppAboutAssignments(assignedContainers);
    }
    return true;
  }
 
  public synchronized Object deallocateContainer(ContainerId containerId) {
    Object task = unAssignContainer(containerId, true);
    if(task != null) {
      LOG.info("Deallocated container: " + containerId +
        " from task: " + task);
      return task;
    }

    LOG.info("Ignoring dealloction of unknown container: " + containerId);
    return null;
  }

  synchronized void preemptIfNeeded() {
    Resource freeResources = Resources.subtract(totalResources,
      allocatedResources);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Allocated resource memory: " + allocatedResources.getMemory() +
        " cpu:" + allocatedResources.getVirtualCores() +
        " delayedContainers: " + delayedContainerManager.delayedContainers.size());
    }
    assert freeResources.getMemory() >= 0;
   
    if (delayedContainerManager.delayedContainers.size() > 0) {
      // if we are holding onto containers then nothing to preempt from outside
      return;
    }

    CookieContainerRequest highestPriRequest = null;
    for(CookieContainerRequest request : taskRequests.values()) {
      if(highestPriRequest == null) {
        highestPriRequest = request;
      } else if(isHigherPriority(request.getPriority(),
                                   highestPriRequest.getPriority())){
        highestPriRequest = request;
      }
    }
    if(highestPriRequest != null &&
       !fitsIn(highestPriRequest.getCapability(), freeResources)) {
      // highest priority request will not fit in existing free resources
      // free up some more
      // TODO this is subject to error wrt RM resource normalization
      Map.Entry<Object, Container> preemptedEntry = null;
      for(Map.Entry<Object, Container> entry : taskAllocations.entrySet()) {
        HeldContainer heldContainer = heldContainers.get(entry.getValue().getId());
        CookieContainerRequest lastTaskInfo = heldContainer.getLastTaskInfo();
        Priority taskPriority = lastTaskInfo.getPriority();
        Object signature = lastTaskInfo.getCookie().getContainerSignature();
        if(!isHigherPriority(highestPriRequest.getPriority(), taskPriority)) {
          // higher or same priority
          continue;
        }
        if (containerSignatureMatcher.isExactMatch(
            highestPriRequest.getCookie().getContainerSignature(),
            signature)) {
          // exact match with different priorities
          continue;
        }
        if(preemptedEntry == null ||
           !isHigherPriority(taskPriority,
               preemptedEntry.getValue().getPriority())) {
          // keep the lower priority or the one added later
          preemptedEntry = entry;
        }
      }
      if(preemptedEntry != null) {
        // found something to preempt
        LOG.info("Preempting task: " + preemptedEntry.getKey() +
            " to free resource for request: " + highestPriRequest +
            " . Current free resources: " + freeResources);
        deallocateContainer(preemptedEntry.getValue().getId());
        // app client will be notified when after container is killed
        // and we get its completed container status
      }
    }
  }

  private boolean fitsIn(Resource toFit, Resource resource) {
    // YARN-893 prevents using correct library code
    //return Resources.fitsIn(toFit, resource);
    return resource.getMemory() >= toFit.getMemory();
  }

  private CookieContainerRequest getMatchingRequestWithPriority(
      Container container,
      String location) {
    Priority priority = container.getPriority();
    Resource capability = container.getResource();
    List<? extends Collection<CookieContainerRequest>> requestsList =
        amRmClient.getMatchingRequests(priority, location, capability);

    if (!requestsList.isEmpty()) {
      // pick first one
      for (Collection<CookieContainerRequest> requests : requestsList) {
        for (CookieContainerRequest cookieContainerRequest : requests) {
          if (canAssignTaskToContainer(cookieContainerRequest, container)) {
            return cookieContainerRequest;
          }
        }
      }
    }

    return null;
  }

  private CookieContainerRequest getMatchingRequestWithoutPriority(
      Container container,
      String location) {
    Resource capability = container.getResource();
    List<? extends Collection<CookieContainerRequest>> pRequestsList =
      amRmClient.getMatchingRequestsForTopPriority(location, capability);
    if (pRequestsList == null || pRequestsList.isEmpty()) {
      return null;
    }
    for (Collection<CookieContainerRequest> requests : pRequestsList) {
      for (CookieContainerRequest cookieContainerRequest : requests) {
        if (canAssignTaskToContainer(cookieContainerRequest, container)) {
          return cookieContainerRequest;
        }
      }
    }
    return null;
  }

  private boolean canAssignTaskToContainer(
      CookieContainerRequest cookieContainerRequest, Container container) {
    HeldContainer heldContainer = heldContainers.get(container.getId());
    if (heldContainer == null || heldContainer.isNew()) { // New container.
      return true;
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Trying to match task to a held container, "
            + " containerId=" + heldContainer.container.getId());
      }
      if (containerSignatureMatcher.isSuperSet(heldContainer
          .getFirstContainerSignature(), cookieContainerRequest.getCookie()
          .getContainerSignature())) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Matched delayed container to task"
            + " containerId=" + heldContainer.container.getId());
        }
        return true;
      }
    }
    if (LOG.isDebugEnabled()) {
      LOG.debug("Failed to match delayed container to task"
        + " containerId=" + heldContainer.container.getId());
    }
    return false;
  }

  private Object getTask(CookieContainerRequest request) {
    return request.getCookie().getTask();
  }

  private void releaseContainer(ContainerId containerId) {
    Object assignedTask = containerAssignments.remove(containerId);
    if (assignedTask != null) {
      // A task was assigned to this container at some point. Inform the app.
      appClientDelegate.containerBeingReleased(containerId);
    }
    HeldContainer delayedContainer = heldContainers.remove(containerId);
    if (delayedContainer != null) {
      Resources.subtractFrom(allocatedResources,
          delayedContainer.getContainer().getResource());
    }
    if (delayedContainer != null || !shouldReuseContainers) {
      amRmClient.releaseAssignedContainer(containerId);
    }
    if (assignedTask != null) {
      // A task was assigned at some point. Add to release list since we are
      // releasing the container.
      releasedContainers.put(containerId, assignedTask);
    }
  }

  private void assignContainer(Object task,
      Container container,
      CookieContainerRequest assigned) {
    CookieContainerRequest request = removeTaskRequest(task);
    assert request != null;
    //assert assigned.equals(request);

    Container result = taskAllocations.put(task, container);
    assert result == null;
    containerAssignments.put(container.getId(), task);
    HeldContainer heldContainer = heldContainers.get(container.getId());
    if (!shouldReuseContainers && heldContainer == null) {
      heldContainers.put(container.getId(), new HeldContainer(container,
        -1, -1, assigned));
      Resources.addTo(allocatedResources, container.getResource());
    } else {
      if (heldContainer.isNew()) {
        // check for existence before adding since the first container potentially
        // has the broadest signature as subsequent uses dont expand any dimension.
        // This will need to be enhanced to track other signatures too when we
        // think about preferring within vertex matching etc.
        heldContainers.put(container.getId(),
            new HeldContainer(container, heldContainer.getNextScheduleTime(),
                heldContainer.getContainerExpiryTime(), assigned));
      }
      heldContainer.setLastTaskInfo(assigned);
    }
  }
 
  private void pushNewContainerToDelayed(List<Container> containers){
    long expireTime = -1;
    if (sessionDelay > 0) {
      long currentTime = System.currentTimeMillis();
      expireTime = currentTime + sessionDelay;
    }

    synchronized (delayedContainerManager) {
      for (Container container : containers) {
        if (heldContainers.put(container.getId(), new HeldContainer(container,
            -1, expireTime, null)) != null) {
          throw new TezUncheckedException("New container " + container.getId()
              + " is already held.");
        }
        Resources.addTo(allocatedResources, container.getResource());
        delayedContainerManager.addDelayedContainer(container,
            delayedContainerManager.maxScheduleTimeSeen + 1);
      }
    }
    delayedContainerManager.triggerScheduling(false);     
  }

  private CookieContainerRequest removeTaskRequest(Object task) {
    CookieContainerRequest request = taskRequests.remove(task);
    if(request != null) {
      // remove all references of the request from AMRMClient
      amRmClient.removeContainerRequest(request);
    }
    return request;
  }

  private void addTaskRequest(Object task,
                                CookieContainerRequest request) {
    // TODO TEZ-37 fix duplicate handling
    taskRequests.put(task, request);
    amRmClient.addContainerRequest(request);
  }

  private Container doBookKeepingForTaskDeallocate(Object task) {
    Container container = taskAllocations.remove(task);
    if (container == null) {
      return null;
    }
    return container;
  }

  private Object unAssignContainer(ContainerId containerId,
                                    boolean releaseIfFound) {
    // Not removing. containerAssignments tracks the last task run on a
    // container.
    Object task = containerAssignments.get(containerId);
    if(task == null) {
      return null;
    }
    Container container = taskAllocations.remove(task);
    assert container != null;
    if(releaseIfFound) {
      releaseContainer(containerId);
    }
    return task;
  }

  private boolean isHigherPriority(Priority lhs, Priority rhs) {
    return lhs.getPriority() < rhs.getPriority();
  }

  private synchronized void assignNewContainersWithLocation(
      Iterable<Container> containers,
      ContainerAssigner assigner,
      Map<CookieContainerRequest, Container> assignedContainers) {

    Iterator<Container> containerIterator = containers.iterator();
    while (containerIterator.hasNext()) {
      Container container = containerIterator.next();
      CookieContainerRequest assigned =
        assigner.assignNewContainer(container);
      if (assigned != null) {
        assignedContainers.put(assigned, container);
        containerIterator.remove();
      }
    }
  }

  private synchronized void assignReUsedContainersWithLocation(
      Iterable<Container> containers,
      ContainerAssigner assigner,
      Map<CookieContainerRequest, Container> assignedContainers,
      boolean honorLocality) {

    Iterator<Container> containerIterator = containers.iterator();
    while (containerIterator.hasNext()) {
      Container container = containerIterator.next();
      if (assignReUsedContainerWithLocation(container, assigner,
          assignedContainers, honorLocality)) {
        containerIterator.remove();
      }
    }
  }

  private synchronized boolean assignReUsedContainerWithLocation(
    Container container,
    ContainerAssigner assigner,
    Map<CookieContainerRequest, Container> assignedContainers,
    boolean honorLocality) {

    Priority containerPriority = container.getPriority();
    Priority topPendingTaskPriority = amRmClient.getTopPriority();
    if (topPendingTaskPriority == null) {
      // nothing left to assign
      return false;
    }
   
    if (topPendingTaskPriority.compareTo(containerPriority) > 0) {
      // if the next task to assign is higher priority than the container then
      // dont assign this container to that task.
      // if task and container are equal priority - then its first use or reuse
      // within the same priority - safe to use
      // if task is lower priority than container then its we use a container that
      // is no longer needed by higher priority tasks All those higher pri tasks
      // have been assigned resources - safe to use (first use or reuse)
      // if task is higher priority than container then we may end up using a
      // container that was assigned by the RM for a lower priority pending task
      // that will be assigned after this higher priority task is assigned. If we
      // use that task's container now then we may not be able to match this
      // container to that task later on. However the RM has already assigned us
      // all containers and is not going to give us new containers. We will get
      // stuck for resources.
      return false;
    }
   
    CookieContainerRequest assigned =
      assigner.assignReUsedContainer(container, honorLocality);
    if (assigned != null) {
      assignedContainers.put(assigned, container);
      return true;
    }
    return false;
  }

  private void releaseUnassignedContainers(Iterable<Container> containers) {
    for (Container container : containers) {
      LOG.info("Releasing unused container: "
          + container.getId());
      releaseContainer(container.getId());
    }
  }

  private void informAppAboutAssignment(CookieContainerRequest assigned,
      Container container) {
    appClientDelegate.taskAllocated(getTask(assigned),
        assigned.getCookie().getAppCookie(), container);
  }

  private void informAppAboutAssignments(
      Map<CookieContainerRequest, Container> assignedContainers) {
    if (assignedContainers == null || assignedContainers.isEmpty()) {
      return;
    }
    for (Entry<CookieContainerRequest, Container> entry : assignedContainers
        .entrySet()) {
      informAppAboutAssignment(entry.getKey(), entry.getValue());
    }
  }

  private abstract class ContainerAssigner {

    protected final String locality;

    protected ContainerAssigner(String locality) {
      this.locality = locality;
    }

    public abstract CookieContainerRequest assignNewContainer(
        Container container);

    public abstract CookieContainerRequest assignReUsedContainer(
      Container container, boolean honorLocality);

    public void doBookKeepingForAssignedContainer(
        CookieContainerRequest assigned, Container container,
        String matchedLocation, boolean honorLocalityFlags) {
      if (assigned == null) {
        return;
      }
      Object task = getTask(assigned);
      assert task != null;

      LOG.info("Assigning container to task"
        + ", container=" + container
        + ", task=" + task
        + ", containerHost=" + container.getNodeId().getHost()
        + ", localityMatchType=" + locality
        + ", matchedLocation=" + matchedLocation
        + ", honorLocalityFlags=" + honorLocalityFlags
        + ", reusedContainer="
        + containerAssignments.containsKey(container.getId())
        + ", delayedContainers=" + delayedContainerManager.delayedContainers.size()
        + ", containerResourceMemory=" + container.getResource().getMemory()
        + ", containerResourceVCores="
        + container.getResource().getVirtualCores());

      assignContainer(task, container, assigned);
    }
  }
 
  private class NodeLocalContainerAssigner extends ContainerAssigner {

    NodeLocalContainerAssigner() {
      super("NodeLocal");
    }

    @Override
    public CookieContainerRequest assignNewContainer(Container container) {
      String location = container.getNodeId().getHost();
      CookieContainerRequest assigned = getMatchingRequestWithPriority(
          container, location);
      doBookKeepingForAssignedContainer(assigned, container, location, false);
      return assigned;
    }

    @Override
    public CookieContainerRequest assignReUsedContainer(Container container,
        boolean honorLocality) {
      String location = container.getNodeId().getHost();
      CookieContainerRequest assigned = getMatchingRequestWithoutPriority(
        container, location);
      doBookKeepingForAssignedContainer(assigned, container, location, true);
      return assigned;

    }
  }

  private class RackLocalContainerAssigner extends ContainerAssigner {

    RackLocalContainerAssigner() {
      super("RackLocal");
    }

    @Override
    public CookieContainerRequest assignNewContainer(Container container) {
      String location = RackResolver.resolve(container.getNodeId().getHost())
          .getNetworkLocation();
      CookieContainerRequest assigned = getMatchingRequestWithPriority(container,
          location);
      doBookKeepingForAssignedContainer(assigned, container, location, false);
      return assigned;
    }

    @Override
    public CookieContainerRequest assignReUsedContainer(
      Container container, boolean honorLocality) {
      // TEZ-586 this is not match an actual rackLocal request unless honorLocality
      // is false. This method is useless if honorLocality=true
      if (!honorLocality) {
        String location = RackResolver.resolve(container.getNodeId().getHost())
          .getNetworkLocation();
        CookieContainerRequest assigned = getMatchingRequestWithoutPriority(
            container, location);
        doBookKeepingForAssignedContainer(assigned, container, location,
            honorLocality);
        return assigned;
      }
      return null;
    }
  }

  private class NonLocalContainerAssigner extends ContainerAssigner {

    NonLocalContainerAssigner() {
      super("NonLocal");
    }

    @Override
    public CookieContainerRequest assignNewContainer(Container container) {
      String location = ResourceRequest.ANY;
      CookieContainerRequest assigned = getMatchingRequestWithPriority(container,
          location);
      doBookKeepingForAssignedContainer(assigned, container, location, false);
      return assigned;
    }

    @Override
    public CookieContainerRequest assignReUsedContainer(Container container,
        boolean honorLocality) {
      if (!honorLocality) {
        String location = ResourceRequest.ANY;
        CookieContainerRequest assigned = getMatchingRequestWithoutPriority(
          container, location);
        doBookKeepingForAssignedContainer(assigned, container, location,
            honorLocality);
        return assigned;
      }
      return null;
    }

  }
 
 
  @VisibleForTesting
  class DelayedContainerManager extends Thread {

    class HeldContainerTimerComparator implements Comparator<HeldContainer> {

      @Override
      public int compare(HeldContainer c1,
          HeldContainer c2) {
        return (int) (c1.getNextScheduleTime() - c2.getNextScheduleTime());
      }
    }

    private PriorityBlockingQueue<HeldContainer> delayedContainers =
      new PriorityBlockingQueue<HeldContainer>(20,
        new HeldContainerTimerComparator());

    private volatile boolean tryAssigningAll = false;
    private volatile boolean running = true;
    private long maxScheduleTimeSeen = -1;
   
    // used for testing only
    @VisibleForTesting
    AtomicBoolean drainedDelayedContainers = null;

    DelayedContainerManager() {
      super.setName("DelayedContainerManager");
    }
   
    @Override
    public void run() {
      while(running) {
        // Try assigning all containers if there's a request to do so.
        if (tryAssigningAll) {
          doAssignAll();
          tryAssigningAll = false;
        }

        // Try allocating containers which have timed out.
        // Required since these containers may get assigned without
        // locality at this point.
        if (delayedContainers.peek() == null) {
          try {
            if (drainedDelayedContainers != null) {
              drainedDelayedContainers.set(true);
              synchronized (drainedDelayedContainers) {
                drainedDelayedContainers.notifyAll();
              }
            }
            synchronized(this) {
              this.wait();
            }
            // Re-loop to see if tryAssignAll is set.
            continue;
          } catch (InterruptedException e) {
            LOG.info("AllocatedContainerManager Thread interrupted");
          }
        } else {
          HeldContainer delayedContainer = delayedContainers.peek();
          if (delayedContainer == null) {
            continue;
          }
          if (LOG.isDebugEnabled()) {
            LOG.debug("Considering HeldContainer: "
              + delayedContainer + " for assignment");
          }
          long currentTs = System.currentTimeMillis();
          long nextScheduleTs = delayedContainer.getNextScheduleTime();
          if (currentTs >= nextScheduleTs) {
            // Remove the container and try scheduling it.
            // TEZ-587 what if container is released by RM after this
            // in onContainerCompleted()
            delayedContainer = delayedContainers.poll();
            if (delayedContainer == null) {
              continue;
            }
            Map<CookieContainerRequest, Container> assignedContainers = null;
            synchronized(TaskScheduler.this) {
              if (null !=
                  heldContainers.get(delayedContainer.getContainer().getId())) {
                assignedContainers = assignDelayedContainer(delayedContainer);
              } else {
                LOG.info("Skipping delayed container as container is no longer"
                  + " running, containerId="
                  + delayedContainer.getContainer().getId());
              }
            }
            // Inform App should be done outside of the lock
            informAppAboutAssignments(assignedContainers);
          } else {
            synchronized(this) {
              try {
                // Wait for the next container to be assignable
                delayedContainer = delayedContainers.peek();
                long diff = localitySchedulingDelay;
                if (delayedContainer != null) {
                  diff = delayedContainer.getNextScheduleTime() - currentTs;
                }
                if (diff > 0) {
                  this.wait(diff);
                }
              } catch (InterruptedException e) {
                LOG.info("AllocatedContainerManager Thread interrupted");
              }
            }
          }
        }
      }
      releasePendingContainers();
    }
   
    private void doAssignAll() {
      // The allocatedContainers queue should not be modified in the middle of an iteration over it.
      // Synchronizing here on TaskScheduler.this to prevent this from happening.
      // The call to assignAll from within this method should NOT add any
      // elements back to the allocatedContainers list. Since they're all
      // delayed elements, de-allocation should not happen either - leaving the
      // list of delayed containers intact, except for the contaienrs which end
      // up getting assigned.
      if (delayedContainers.isEmpty()) {
        return;
      }

      Map<CookieContainerRequest, Container> assignedContainers;
      synchronized(TaskScheduler.this) {
        // honor reuse-locality flags (container not timed out yet), Don't queue
        // (already in queue), don't release (release happens when containers
        // time-out)
        if (LOG.isDebugEnabled()) {
          LOG.debug("Trying to assign all delayed containers to newly received"
            + " tasks");
        }
        assignedContainers = tryAssignReUsedContainers(
          new ContainerIterable(delayedContainers));
      }
      // Inform app
      informAppAboutAssignments(assignedContainers);
    }
   
    /**
     * Indicate that an attempt should be made to allocate all available containers.
     * Intended to be used in cases where new Container requests come in
     */
    public void triggerScheduling(boolean scheduleAll) {
      this.tryAssigningAll = scheduleAll;
      synchronized(this) {
        this.notify();
      }
    }

    public void shutdown() {
      this.running = false;
      this.interrupt();
    }
   
    private void releasePendingContainers() {
      List<HeldContainer> pendingContainers = Lists.newArrayListWithCapacity(
        delayedContainers.size());
      delayedContainers.drainTo(pendingContainers);
      releaseUnassignedContainers(new ContainerIterable(pendingContainers));
    }

    private void addDelayedContainer(Container container,
        long nextScheduleTime) {
      HeldContainer delayedContainer = heldContainers.get(container.getId());
      if (delayedContainer == null) {
        LOG.warn("Attempting to add a non-running container to the"
            + " delayed container list, containerId=" + container.getId());
        return;
      } else {
        delayedContainer.setNextScheduleTime(nextScheduleTime);
      }
      if (maxScheduleTimeSeen < nextScheduleTime) {
        maxScheduleTimeSeen = nextScheduleTime;
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("Adding container to delayed queue"
          + ", containerId=" + delayedContainer.getContainer().getId()
          + ", nextScheduleTime=" + delayedContainer.getNextScheduleTime()
          + ", containerExpiry=" + delayedContainer.getContainerExpiryTime());
      }
      boolean added = delayedContainers.offer(delayedContainer);
      synchronized(this) {
        this.notify();
      }
      if (!added) {
        releaseUnassignedContainers(Lists.newArrayList(container));
      }
    }

  }

  private class ContainerIterable implements Iterable<Container> {

    private final Iterable<HeldContainer> delayedContainers;

    ContainerIterable(Iterable<HeldContainer> delayedContainers) {
      this.delayedContainers = delayedContainers;
    }

    @Override
    public Iterator<Container> iterator() {

      final Iterator<HeldContainer> delayedContainerIterator = delayedContainers
          .iterator();

      return new Iterator<Container>() {

        @Override
        public boolean hasNext() {
          return delayedContainerIterator.hasNext();
        }

        @Override
        public Container next() {
          return delayedContainerIterator.next().getContainer();
        }

        @Override
        public void remove() {
          delayedContainerIterator.remove();
        }
      };
    }
  }

  static class HeldContainer {

    enum LocalityMatchLevel {
      NEW,
      NODE,
      RACK,
      NON_LOCAL
    }

    Container container;
    private long nextScheduleTime;
    private Object firstContainerSignature;
    private LocalityMatchLevel localityMatchLevel;
    private long containerExpiryTime;
    private CookieContainerRequest lastTaskInfo;
   
    HeldContainer(Container container,
        long nextScheduleTime,
        long containerExpiryTime,
        CookieContainerRequest firstTaskInfo) {
      this.container = container;
      this.nextScheduleTime = nextScheduleTime;
      if (firstTaskInfo != null) {
        this.lastTaskInfo = firstTaskInfo;
        this.firstContainerSignature = firstTaskInfo.getCookie().getContainerSignature();
      }
      this.localityMatchLevel = LocalityMatchLevel.NODE;
      this.containerExpiryTime = containerExpiryTime;
    }
   
    boolean isNew() {
      return firstContainerSignature == null;
    }
   
    public Container getContainer() {
      return this.container;
    }
   
    public long getNextScheduleTime() {
      return this.nextScheduleTime;
    }
   
    public void setNextScheduleTime(long nextScheduleTime) {
      this.nextScheduleTime = nextScheduleTime;
    }

    public long getContainerExpiryTime() {
      return this.containerExpiryTime;
    }

    public void setContainerExpiryTime(long containerExpiryTime) {
      this.containerExpiryTime = containerExpiryTime;
    }

    public Object getFirstContainerSignature() {
      return this.firstContainerSignature;
    }
   
    public CookieContainerRequest getLastTaskInfo() {
      return this.lastTaskInfo;
    }
   
    public void setLastTaskInfo(CookieContainerRequest taskInfo) {
      lastTaskInfo = taskInfo;
    }

    public synchronized void resetLocalityMatchLevel() {
      localityMatchLevel = LocalityMatchLevel.NEW;
    }

    public synchronized void incrementLocalityMatchLevel() {
      if (localityMatchLevel.equals(LocalityMatchLevel.NEW)) {
        localityMatchLevel = LocalityMatchLevel.NODE;
      } else if (localityMatchLevel.equals(LocalityMatchLevel.NODE)) {
        localityMatchLevel = LocalityMatchLevel.RACK;
      } else if (localityMatchLevel.equals(LocalityMatchLevel.RACK)) {
        localityMatchLevel = LocalityMatchLevel.NON_LOCAL;
      } else if (localityMatchLevel.equals(LocalityMatchLevel.NON_LOCAL)) {
        throw new TezUncheckedException("Cannot increment locality level "
          + " from current NON_LOCAL for container: " + container.getId());
      }
    }

    public LocalityMatchLevel getLocalityMatchLevel() {
      return this.localityMatchLevel;
    }

    @Override
    public String toString() {
      return "HeldContainer: id: " + container.getId()
          + ", nextScheduleTime: " + nextScheduleTime
          + ", localityMatchLevel=" + localityMatchLevel
          + ", signature: "
          + (firstContainerSignature != null? firstContainerSignature.toString():"null");
    }
  }
}
TOP

Related Classes of org.apache.tez.dag.app.rm.TaskScheduler$HeldContainer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
ew');