Source Code of eu.stratosphere.nephele.jobmanager.scheduler.AbstractScheduler

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/


package eu.stratosphere.nephele.jobmanager.scheduler;


import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;


import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;


import eu.stratosphere.nephele.execution.ExecutionState;
import eu.stratosphere.nephele.executiongraph.ExecutionEdge;
import eu.stratosphere.nephele.executiongraph.ExecutionGate;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGraphIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionPipeline;
import eu.stratosphere.nephele.executiongraph.ExecutionStage;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionVertexID;
import eu.stratosphere.nephele.executiongraph.InternalJobStatus;
import eu.stratosphere.nephele.instance.AbstractInstance;
import eu.stratosphere.nephele.instance.AllocatedResource;
import eu.stratosphere.nephele.instance.AllocationID;
import eu.stratosphere.nephele.instance.DummyInstance;
import eu.stratosphere.nephele.instance.InstanceException;
import eu.stratosphere.nephele.instance.InstanceListener;
import eu.stratosphere.nephele.instance.InstanceManager;
import eu.stratosphere.nephele.instance.InstanceRequestMap;
import eu.stratosphere.nephele.instance.InstanceType;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.DeploymentManager;
import eu.stratosphere.util.StringUtils;


/**
 * This abstract scheduler must be extended by a scheduler implementations for Nephele. The abstract class defines the
 * fundamental methods for scheduling and removing jobs. While Nephele's
 * {@link eu.stratosphere.nephele.jobmanager.JobManager} is responsible for requesting the required instances for the
 * job at the {@link eu.stratosphere.nephele.instance.InstanceManager}, the scheduler is in charge of assigning the
 * individual tasks to the instances.
 * 
 */
public abstract class AbstractScheduler implements InstanceListener {


  /**
   * The LOG object to report events within the scheduler.
   */
  protected static final Log LOG = LogFactory.getLog(AbstractScheduler.class);


  /**
   * The instance manager assigned to this scheduler.
   */
  private final InstanceManager instanceManager;


  /**
   * The deployment manager assigned to this scheduler.
   */
  private final DeploymentManager deploymentManager;


  /**
   * Stores the vertices to be restarted once they have switched to the <code>CANCELED</code> state.
   */
  private final Map<ExecutionVertexID, ExecutionVertex> verticesToBeRestarted = new ConcurrentHashMap<ExecutionVertexID, ExecutionVertex>();


  /**
   * Constructs a new abstract scheduler.
   * 
   * @param deploymentManager
   *        the deployment manager assigned to this scheduler
   * @param instanceManager
   *        the instance manager to be used with this scheduler
   */
  protected AbstractScheduler(final DeploymentManager deploymentManager, final InstanceManager instanceManager) {


    this.deploymentManager = deploymentManager;
    this.instanceManager = instanceManager;
    this.instanceManager.setInstanceListener(this);
  }


  /**
   * Adds a job represented by an {@link ExecutionGraph} object to the scheduler. The job is then executed according
   * to the strategies of the concrete scheduler implementation.
   * 
   * @param executionGraph
   *        the job to be added to the scheduler
   * @throws SchedulingException
   *         thrown if an error occurs and the scheduler does not accept the new job
   */
  public abstract void schedulJob(ExecutionGraph executionGraph) throws SchedulingException;


  /**
   * Returns the execution graph which is associated with the given job ID.
   * 
   * @param jobID
   *        the job ID to search the execution graph for
   * @return the execution graph which belongs to the given job ID or <code>null</code if no such execution graph
   *         exists
   */
  public abstract ExecutionGraph getExecutionGraphByID(JobID jobID);


  /**
   * Returns the {@link InstanceManager} object which is used by the current scheduler.
   * 
   * @return the {@link InstanceManager} object which is used by the current scheduler
   */
  public InstanceManager getInstanceManager() {
    return this.instanceManager;
  }


  // void removeJob(JobID jobID);


  /**
   * Shuts the scheduler down. After shut down no jobs can be added to the scheduler.
   */
  public abstract void shutdown();


  /**
   * Collects the instances required to run the job from the given {@link ExecutionStage} and requests them at the
   * loaded instance manager.
   * 
   * @param executionStage
   *        the execution stage to collect the required instances from
   * @throws InstanceException
   *         thrown if the given execution graph is already processing its final stage
   */
  protected void requestInstances(final ExecutionStage executionStage) throws InstanceException {


    final ExecutionGraph executionGraph = executionStage.getExecutionGraph();
    final InstanceRequestMap instanceRequestMap = new InstanceRequestMap();


    synchronized (executionStage) {


      executionStage.collectRequiredInstanceTypes(instanceRequestMap, ExecutionState.CREATED);


      final Iterator<Map.Entry<InstanceType, Integer>> it = instanceRequestMap.getMinimumIterator();
      LOG.info("Requesting the following instances for job " + executionGraph.getJobID());
      while (it.hasNext()) {
        final Map.Entry<InstanceType, Integer> entry = it.next();
        LOG.info(" " + entry.getKey() + " [" + entry.getValue().intValue() + ", "
          + instanceRequestMap.getMaximumNumberOfInstances(entry.getKey()) + "]");
      }


      if (instanceRequestMap.isEmpty()) {
        return;
      }


      this.instanceManager.requestInstance(executionGraph.getJobID(), executionGraph.getJobConfiguration(),
        instanceRequestMap, null);


      // Switch vertex state to assigning
      final ExecutionGraphIterator it2 = new ExecutionGraphIterator(executionGraph, executionGraph
        .getIndexOfCurrentExecutionStage(), true, true);
      while (it2.hasNext()) {


        it2.next().compareAndUpdateExecutionState(ExecutionState.CREATED, ExecutionState.SCHEDULED);
      }
    }
  }


  void findVerticesToBeDeployed(final ExecutionVertex vertex,
      final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed,
      final Set<ExecutionVertex> alreadyVisited) {


    if (!alreadyVisited.add(vertex)) {
      return;
    }


    if (vertex.compareAndUpdateExecutionState(ExecutionState.ASSIGNED, ExecutionState.READY)) {
      final AbstractInstance instance = vertex.getAllocatedResource().getInstance();


      if (instance instanceof DummyInstance) {
        LOG.error("Inconsistency: Vertex " + vertex + " is about to be deployed on a DummyInstance");
      }


      List<ExecutionVertex> verticesForInstance = verticesToBeDeployed.get(instance);
      if (verticesForInstance == null) {
        verticesForInstance = new ArrayList<ExecutionVertex>();
        verticesToBeDeployed.put(instance, verticesForInstance);
      }


      verticesForInstance.add(vertex);
    }


    final int numberOfOutputGates = vertex.getNumberOfOutputGates();
    for (int i = 0; i < numberOfOutputGates; ++i) {


      final ExecutionGate outputGate = vertex.getOutputGate(i);
      boolean deployTarget;


      switch (outputGate.getChannelType()) {
      case NETWORK:
        deployTarget = false;
        break;
      case IN_MEMORY:
        deployTarget = true;
        break;
      default:
        throw new IllegalStateException("Unknown channel type");
      }


      if (deployTarget) {


        final int numberOfOutputChannels = outputGate.getNumberOfEdges();
        for (int j = 0; j < numberOfOutputChannels; ++j) {
          final ExecutionEdge outputChannel = outputGate.getEdge(j);
          final ExecutionVertex connectedVertex = outputChannel.getInputGate().getVertex();
          findVerticesToBeDeployed(connectedVertex, verticesToBeDeployed, alreadyVisited);
        }
      }
    }
  }


  /**
   * Collects all execution vertices with the state ASSIGNED starting from the given start vertex and
   * deploys them on the assigned {@link AllocatedResource} objects.
   * 
   * @param startVertex
   *        the execution vertex to start the deployment from
   */
  public void deployAssignedVertices(final ExecutionVertex startVertex) {


    final JobID jobID = startVertex.getExecutionGraph().getJobID();


    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();


    findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);


    if (!verticesToBeDeployed.isEmpty()) {


      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();


      while (it2.hasNext()) {


        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }


  /**
   * Collects all execution vertices with the state ASSIGNED from the given pipeline and deploys them on the assigned
   * {@link AllocatedResource} objects.
   * 
   * @param pipeline
   *        the execution pipeline to be deployed
   */
  public void deployAssignedPipeline(final ExecutionPipeline pipeline) {


    final JobID jobID = null;


    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();


    final Iterator<ExecutionVertex> it = pipeline.iterator();
    while (it.hasNext()) {
      findVerticesToBeDeployed(it.next(), verticesToBeDeployed, alreadyVisited);
    }


    if (!verticesToBeDeployed.isEmpty()) {


      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();


      while (it2.hasNext()) {


        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }


  /**
   * Collects all execution vertices with the state ASSIGNED starting from the given collection of start vertices and
   * deploys them on the assigned {@link AllocatedResource} objects.
   * 
   * @param startVertices
   *        the collection of execution vertices to start the deployment from
   */
  public void deployAssignedVertices(final Collection<ExecutionVertex> startVertices) {


    JobID jobID = null;


    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();


    for (final ExecutionVertex startVertex : startVertices) {


      if (jobID == null) {
        jobID = startVertex.getExecutionGraph().getJobID();
      }


      findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);
    }


    if (!verticesToBeDeployed.isEmpty()) {


      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();


      while (it2.hasNext()) {


        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }


  /**
   * Collects all execution vertices with the state ASSIGNED starting from the input vertices of the current execution
   * stage and deploys them on the assigned {@link AllocatedResource} objects.
   * 
   * @param executionGraph
   *        the execution graph to collect the vertices from
   */
  public void deployAssignedInputVertices(final ExecutionGraph executionGraph) {


    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final ExecutionStage executionStage = executionGraph.getCurrentExecutionStage();


    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();


    for (int i = 0; i < executionStage.getNumberOfStageMembers(); ++i) {


      final ExecutionGroupVertex startVertex = executionStage.getStageMember(i);
      if (!startVertex.isInputVertex()) {
        continue;
      }


      for (int j = 0; j < startVertex.getCurrentNumberOfGroupMembers(); ++j) {
        final ExecutionVertex vertex = startVertex.getGroupMember(j);
        findVerticesToBeDeployed(vertex, verticesToBeDeployed, alreadyVisited);
      }
    }


    if (!verticesToBeDeployed.isEmpty()) {


      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();


      while (it2.hasNext()) {


        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(executionGraph.getJobID(), entry.getKey(), entry.getValue());
      }
    }
  }




  @Override
  public void resourcesAllocated(final JobID jobID, final List<AllocatedResource> allocatedResources) {


    if (allocatedResources == null) {
      LOG.error("Resource to lock is null!");
      return;
    }


    for (final AllocatedResource allocatedResource : allocatedResources) {
      if (allocatedResource.getInstance() instanceof DummyInstance) {
        LOG.debug("Available instance is of type DummyInstance!");
        return;
      }
    }


    final ExecutionGraph eg = getExecutionGraphByID(jobID);


    if (eg == null) {
      /*
       * The job have have been canceled in the meantime, in this case
       * we release the instance immediately.
       */
      try {
        for (final AllocatedResource allocatedResource : allocatedResources) {
          getInstanceManager().releaseAllocatedResource(jobID, null, allocatedResource);
        }
      } catch (InstanceException e) {
        LOG.error(e);
      }
      return;
    }


    final Runnable command = new Runnable() {


      /**
       * {@inheritDoc}
       */
      @Override
      public void run() {


        final ExecutionStage stage = eg.getCurrentExecutionStage();


        synchronized (stage) {


          for (final AllocatedResource allocatedResource : allocatedResources) {


            AllocatedResource resourceToBeReplaced = null;
            // Important: only look for instances to be replaced in the current stage
            final Iterator<ExecutionGroupVertex> groupIterator = new ExecutionGroupVertexIterator(eg, true,
              stage.getStageNumber());
            while (groupIterator.hasNext()) {


              final ExecutionGroupVertex groupVertex = groupIterator.next();
              for (int i = 0; i < groupVertex.getCurrentNumberOfGroupMembers(); ++i) {


                final ExecutionVertex vertex = groupVertex.getGroupMember(i);


                if (vertex.getExecutionState() == ExecutionState.SCHEDULED
                  && vertex.getAllocatedResource() != null) {
                  // In local mode, we do not consider any topology, only the instance type
                  if (vertex.getAllocatedResource().getInstanceType().equals(
                    allocatedResource.getInstanceType())) {
                    resourceToBeReplaced = vertex.getAllocatedResource();
                    break;
                  }
                }
              }


              if (resourceToBeReplaced != null) {
                break;
              }
            }


            // For some reason, we don't need this instance
            if (resourceToBeReplaced == null) {
              LOG.error("Instance " + allocatedResource.getInstance() + " is not required for job"
                + eg.getJobID());
              try {
                getInstanceManager().releaseAllocatedResource(jobID, eg.getJobConfiguration(),
                  allocatedResource);
              } catch (InstanceException e) {
                LOG.error(e);
              }
              return;
            }


            // Replace the selected instance
            final Iterator<ExecutionVertex> it = resourceToBeReplaced.assignedVertices();
            while (it.hasNext()) {
              final ExecutionVertex vertex = it.next();
              vertex.setAllocatedResource(allocatedResource);
              vertex.updateExecutionState(ExecutionState.ASSIGNED);
            }
          }
        }


        // Deploy the assigned vertices
        deployAssignedInputVertices(eg);


      }


    };


    eg.executeCommand(command);
  }


  /**
   * Checks if the given {@link AllocatedResource} is still required for the
   * execution of the given execution graph. If the resource is no longer
   * assigned to a vertex that is either currently running or about to run
   * the given resource is returned to the instance manager for deallocation.
   * 
   * @param executionGraph
   *        the execution graph the provided resource has been used for so far
   * @param allocatedResource
   *        the allocated resource to check the assignment for
   */
  public void checkAndReleaseAllocatedResource(final ExecutionGraph executionGraph,
      final AllocatedResource allocatedResource) {


    if (allocatedResource == null) {
      LOG.error("Resource to lock is null!");
      return;
    }


    if (allocatedResource.getInstance() instanceof DummyInstance) {
      LOG.debug("Available instance is of type DummyInstance!");
      return;
    }


    boolean resourceCanBeReleased = true;
    final Iterator<ExecutionVertex> it = allocatedResource.assignedVertices();
    while (it.hasNext()) {
      final ExecutionVertex vertex = it.next();
      final ExecutionState state = vertex.getExecutionState();


      if (state != ExecutionState.CREATED && state != ExecutionState.FINISHED
        && state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {


        resourceCanBeReleased = false;
        break;
      }
    }


    if (resourceCanBeReleased) {


      LOG.info("Releasing instance " + allocatedResource.getInstance());
      try {
        getInstanceManager().releaseAllocatedResource(executionGraph.getJobID(), executionGraph
          .getJobConfiguration(), allocatedResource);
      } catch (InstanceException e) {
        LOG.error(StringUtils.stringifyException(e));
      }
    }
  }


  DeploymentManager getDeploymentManager() {
    return this.deploymentManager;
  }


  protected void replayCheckpointsFromPreviousStage(final ExecutionGraph executionGraph) {


    final int currentStageIndex = executionGraph.getIndexOfCurrentExecutionStage();
    final ExecutionStage previousStage = executionGraph.getStage(currentStageIndex - 1);


    final List<ExecutionVertex> verticesToBeReplayed = new ArrayList<ExecutionVertex>();


    for (int i = 0; i < previousStage.getNumberOfOutputExecutionVertices(); ++i) {


      final ExecutionVertex vertex = previousStage.getOutputExecutionVertex(i);
      vertex.updateExecutionState(ExecutionState.ASSIGNED);
      verticesToBeReplayed.add(vertex);
    }


    deployAssignedVertices(verticesToBeReplayed);
  }


  /**
   * Returns a map of vertices to be restarted once they have switched to their <code>CANCELED</code> state.
   * 
   * @return the map of vertices to be restarted
   */
  Map<ExecutionVertexID, ExecutionVertex> getVerticesToBeRestarted() {


    return this.verticesToBeRestarted;
  }




  @Override
  public void allocatedResourcesDied(final JobID jobID, final List<AllocatedResource> allocatedResources) {


    final ExecutionGraph eg = getExecutionGraphByID(jobID);


    if (eg == null) {
      LOG.error("Cannot find execution graph for job with ID " + jobID);
      return;
    }


    final Runnable command = new Runnable() {


      /**
       * {@inheritDoc}
       */
      @Override
      public void run() {


        synchronized (eg) {


          for (final AllocatedResource allocatedResource : allocatedResources) {


            LOG.info("Resource " + allocatedResource.getInstance().getName() + " for Job " + jobID
              + " died.");


            final ExecutionGraph executionGraph = getExecutionGraphByID(jobID);


            if (executionGraph == null) {
              LOG.error("Cannot find execution graph for job " + jobID);
              return;
            }


            Iterator<ExecutionVertex> vertexIter = allocatedResource.assignedVertices();


            // Assign vertices back to a dummy resource.
            final DummyInstance dummyInstance = DummyInstance.createDummyInstance(allocatedResource
              .getInstance()
              .getType());
            final AllocatedResource dummyResource = new AllocatedResource(dummyInstance,
              allocatedResource.getInstanceType(), new AllocationID());


            while (vertexIter.hasNext()) {
              final ExecutionVertex vertex = vertexIter.next();
              vertex.setAllocatedResource(dummyResource);
            }


            final String failureMessage = allocatedResource.getInstance().getName() + " died";


            vertexIter = allocatedResource.assignedVertices();


            while (vertexIter.hasNext()) {
              final ExecutionVertex vertex = vertexIter.next();
              final ExecutionState state = vertex.getExecutionState();


              switch (state) {
              case ASSIGNED:
              case READY:
              case STARTING:
              case RUNNING:
              case FINISHING:


              vertex.updateExecutionState(ExecutionState.FAILED, failureMessage);


              break;
            default:
              }
          }


          // TODO: Fix this
          /*
           * try {
           * requestInstances(this.executionVertex.getGroupVertex().getExecutionStage());
           * } catch (InstanceException e) {
           * e.printStackTrace();
           * // TODO: Cancel the entire job in this case
           * }
           */
        }
      }


      final InternalJobStatus js = eg.getJobStatus();
      if (js != InternalJobStatus.FAILING && js != InternalJobStatus.FAILED) {


        // TODO: Fix this
        // deployAssignedVertices(eg);


        final ExecutionStage stage = eg.getCurrentExecutionStage();


        try {
          requestInstances(stage);
        } catch (InstanceException e) {
          e.printStackTrace();
          // TODO: Cancel the entire job in this case
        }
      }
    }
    };


    eg.executeCommand(command);
  }
}
Source Code of eu.stratosphere.nephele.jobmanager.scheduler.AbstractScheduler

Related Classes of eu.stratosphere.nephele.jobmanager.scheduler.AbstractScheduler