Package eu.stratosphere.nephele.jobmanager.scheduler

Source Code of eu.stratosphere.nephele.jobmanager.scheduler.AbstractScheduler

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.scheduler;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.nephele.execution.ExecutionState;
import eu.stratosphere.nephele.executiongraph.ExecutionEdge;
import eu.stratosphere.nephele.executiongraph.ExecutionGate;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGraphIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionPipeline;
import eu.stratosphere.nephele.executiongraph.ExecutionStage;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionVertexID;
import eu.stratosphere.nephele.executiongraph.InternalJobStatus;
import eu.stratosphere.nephele.instance.AbstractInstance;
import eu.stratosphere.nephele.instance.AllocatedResource;
import eu.stratosphere.nephele.instance.AllocationID;
import eu.stratosphere.nephele.instance.DummyInstance;
import eu.stratosphere.nephele.instance.InstanceException;
import eu.stratosphere.nephele.instance.InstanceListener;
import eu.stratosphere.nephele.instance.InstanceManager;
import eu.stratosphere.nephele.instance.InstanceRequestMap;
import eu.stratosphere.nephele.instance.InstanceType;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.DeploymentManager;
import eu.stratosphere.util.StringUtils;

/**
* This abstract scheduler must be extended by a scheduler implementations for Nephele. The abstract class defines the
* fundamental methods for scheduling and removing jobs. While Nephele's
* {@link eu.stratosphere.nephele.jobmanager.JobManager} is responsible for requesting the required instances for the
* job at the {@link eu.stratosphere.nephele.instance.InstanceManager}, the scheduler is in charge of assigning the
* individual tasks to the instances.
*
*/
public abstract class AbstractScheduler implements InstanceListener {

  /**
   * The LOG object to report events within the scheduler.
   */
  protected static final Log LOG = LogFactory.getLog(AbstractScheduler.class);

  /**
   * The instance manager assigned to this scheduler.
   */
  private final InstanceManager instanceManager;

  /**
   * The deployment manager assigned to this scheduler.
   */
  private final DeploymentManager deploymentManager;

  /**
   * Stores the vertices to be restarted once they have switched to the <code>CANCELED</code> state.
   */
  private final Map<ExecutionVertexID, ExecutionVertex> verticesToBeRestarted = new ConcurrentHashMap<ExecutionVertexID, ExecutionVertex>();

  /**
   * Constructs a new abstract scheduler.
   *
   * @param deploymentManager
   *        the deployment manager assigned to this scheduler
   * @param instanceManager
   *        the instance manager to be used with this scheduler
   */
  protected AbstractScheduler(final DeploymentManager deploymentManager, final InstanceManager instanceManager) {

    this.deploymentManager = deploymentManager;
    this.instanceManager = instanceManager;
    this.instanceManager.setInstanceListener(this);
  }

  /**
   * Adds a job represented by an {@link ExecutionGraph} object to the scheduler. The job is then executed according
   * to the strategies of the concrete scheduler implementation.
   *
   * @param executionGraph
   *        the job to be added to the scheduler
   * @throws SchedulingException
   *         thrown if an error occurs and the scheduler does not accept the new job
   */
  public abstract void schedulJob(ExecutionGraph executionGraph) throws SchedulingException;

  /**
   * Returns the execution graph which is associated with the given job ID.
   *
   * @param jobID
   *        the job ID to search the execution graph for
   * @return the execution graph which belongs to the given job ID or <code>null</code if no such execution graph
   *         exists
   */
  public abstract ExecutionGraph getExecutionGraphByID(JobID jobID);

  /**
   * Returns the {@link InstanceManager} object which is used by the current scheduler.
   *
   * @return the {@link InstanceManager} object which is used by the current scheduler
   */
  public InstanceManager getInstanceManager() {
    return this.instanceManager;
  }

  // void removeJob(JobID jobID);

  /**
   * Shuts the scheduler down. After shut down no jobs can be added to the scheduler.
   */
  public abstract void shutdown();

  /**
   * Collects the instances required to run the job from the given {@link ExecutionStage} and requests them at the
   * loaded instance manager.
   *
   * @param executionStage
   *        the execution stage to collect the required instances from
   * @throws InstanceException
   *         thrown if the given execution graph is already processing its final stage
   */
  protected void requestInstances(final ExecutionStage executionStage) throws InstanceException {

    final ExecutionGraph executionGraph = executionStage.getExecutionGraph();
    final InstanceRequestMap instanceRequestMap = new InstanceRequestMap();

    synchronized (executionStage) {

      executionStage.collectRequiredInstanceTypes(instanceRequestMap, ExecutionState.CREATED);

      final Iterator<Map.Entry<InstanceType, Integer>> it = instanceRequestMap.getMinimumIterator();
      LOG.info("Requesting the following instances for job " + executionGraph.getJobID());
      while (it.hasNext()) {
        final Map.Entry<InstanceType, Integer> entry = it.next();
        LOG.info(" " + entry.getKey() + " [" + entry.getValue().intValue() + ", "
          + instanceRequestMap.getMaximumNumberOfInstances(entry.getKey()) + "]");
      }

      if (instanceRequestMap.isEmpty()) {
        return;
      }

      this.instanceManager.requestInstance(executionGraph.getJobID(), executionGraph.getJobConfiguration(),
        instanceRequestMap, null);

      // Switch vertex state to assigning
      final ExecutionGraphIterator it2 = new ExecutionGraphIterator(executionGraph, executionGraph
        .getIndexOfCurrentExecutionStage(), true, true);
      while (it2.hasNext()) {

        it2.next().compareAndUpdateExecutionState(ExecutionState.CREATED, ExecutionState.SCHEDULED);
      }
    }
  }

  void findVerticesToBeDeployed(final ExecutionVertex vertex,
      final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed,
      final Set<ExecutionVertex> alreadyVisited) {

    if (!alreadyVisited.add(vertex)) {
      return;
    }

    if (vertex.compareAndUpdateExecutionState(ExecutionState.ASSIGNED, ExecutionState.READY)) {
      final AbstractInstance instance = vertex.getAllocatedResource().getInstance();

      if (instance instanceof DummyInstance) {
        LOG.error("Inconsistency: Vertex " + vertex + " is about to be deployed on a DummyInstance");
      }

      List<ExecutionVertex> verticesForInstance = verticesToBeDeployed.get(instance);
      if (verticesForInstance == null) {
        verticesForInstance = new ArrayList<ExecutionVertex>();
        verticesToBeDeployed.put(instance, verticesForInstance);
      }

      verticesForInstance.add(vertex);
    }

    final int numberOfOutputGates = vertex.getNumberOfOutputGates();
    for (int i = 0; i < numberOfOutputGates; ++i) {

      final ExecutionGate outputGate = vertex.getOutputGate(i);
      boolean deployTarget;

      switch (outputGate.getChannelType()) {
      case NETWORK:
        deployTarget = false;
        break;
      case IN_MEMORY:
        deployTarget = true;
        break;
      default:
        throw new IllegalStateException("Unknown channel type");
      }

      if (deployTarget) {

        final int numberOfOutputChannels = outputGate.getNumberOfEdges();
        for (int j = 0; j < numberOfOutputChannels; ++j) {
          final ExecutionEdge outputChannel = outputGate.getEdge(j);
          final ExecutionVertex connectedVertex = outputChannel.getInputGate().getVertex();
          findVerticesToBeDeployed(connectedVertex, verticesToBeDeployed, alreadyVisited);
        }
      }
    }
  }

  /**
   * Collects all execution vertices with the state ASSIGNED starting from the given start vertex and
   * deploys them on the assigned {@link AllocatedResource} objects.
   *
   * @param startVertex
   *        the execution vertex to start the deployment from
   */
  public void deployAssignedVertices(final ExecutionVertex startVertex) {

    final JobID jobID = startVertex.getExecutionGraph().getJobID();

    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

    findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);

    if (!verticesToBeDeployed.isEmpty()) {

      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();

      while (it2.hasNext()) {

        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }

  /**
   * Collects all execution vertices with the state ASSIGNED from the given pipeline and deploys them on the assigned
   * {@link AllocatedResource} objects.
   *
   * @param pipeline
   *        the execution pipeline to be deployed
   */
  public void deployAssignedPipeline(final ExecutionPipeline pipeline) {

    final JobID jobID = null;

    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

    final Iterator<ExecutionVertex> it = pipeline.iterator();
    while (it.hasNext()) {
      findVerticesToBeDeployed(it.next(), verticesToBeDeployed, alreadyVisited);
    }

    if (!verticesToBeDeployed.isEmpty()) {

      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();

      while (it2.hasNext()) {

        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }

  /**
   * Collects all execution vertices with the state ASSIGNED starting from the given collection of start vertices and
   * deploys them on the assigned {@link AllocatedResource} objects.
   *
   * @param startVertices
   *        the collection of execution vertices to start the deployment from
   */
  public void deployAssignedVertices(final Collection<ExecutionVertex> startVertices) {

    JobID jobID = null;

    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

    for (final ExecutionVertex startVertex : startVertices) {

      if (jobID == null) {
        jobID = startVertex.getExecutionGraph().getJobID();
      }

      findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);
    }

    if (!verticesToBeDeployed.isEmpty()) {

      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();

      while (it2.hasNext()) {

        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
      }
    }
  }

  /**
   * Collects all execution vertices with the state ASSIGNED starting from the input vertices of the current execution
   * stage and deploys them on the assigned {@link AllocatedResource} objects.
   *
   * @param executionGraph
   *        the execution graph to collect the vertices from
   */
  public void deployAssignedInputVertices(final ExecutionGraph executionGraph) {

    final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
    final ExecutionStage executionStage = executionGraph.getCurrentExecutionStage();

    final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

    for (int i = 0; i < executionStage.getNumberOfStageMembers(); ++i) {

      final ExecutionGroupVertex startVertex = executionStage.getStageMember(i);
      if (!startVertex.isInputVertex()) {
        continue;
      }

      for (int j = 0; j < startVertex.getCurrentNumberOfGroupMembers(); ++j) {
        final ExecutionVertex vertex = startVertex.getGroupMember(j);
        findVerticesToBeDeployed(vertex, verticesToBeDeployed, alreadyVisited);
      }
    }

    if (!verticesToBeDeployed.isEmpty()) {

      final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
        .entrySet()
        .iterator();

      while (it2.hasNext()) {

        final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
        this.deploymentManager.deploy(executionGraph.getJobID(), entry.getKey(), entry.getValue());
      }
    }
  }


  @Override
  public void resourcesAllocated(final JobID jobID, final List<AllocatedResource> allocatedResources) {

    if (allocatedResources == null) {
      LOG.error("Resource to lock is null!");
      return;
    }

    for (final AllocatedResource allocatedResource : allocatedResources) {
      if (allocatedResource.getInstance() instanceof DummyInstance) {
        LOG.debug("Available instance is of type DummyInstance!");
        return;
      }
    }

    final ExecutionGraph eg = getExecutionGraphByID(jobID);

    if (eg == null) {
      /*
       * The job have have been canceled in the meantime, in this case
       * we release the instance immediately.
       */
      try {
        for (final AllocatedResource allocatedResource : allocatedResources) {
          getInstanceManager().releaseAllocatedResource(jobID, null, allocatedResource);
        }
      } catch (InstanceException e) {
        LOG.error(e);
      }
      return;
    }

    final Runnable command = new Runnable() {

      /**
       * {@inheritDoc}
       */
      @Override
      public void run() {

        final ExecutionStage stage = eg.getCurrentExecutionStage();

        synchronized (stage) {

          for (final AllocatedResource allocatedResource : allocatedResources) {

            AllocatedResource resourceToBeReplaced = null;
            // Important: only look for instances to be replaced in the current stage
            final Iterator<ExecutionGroupVertex> groupIterator = new ExecutionGroupVertexIterator(eg, true,
              stage.getStageNumber());
            while (groupIterator.hasNext()) {

              final ExecutionGroupVertex groupVertex = groupIterator.next();
              for (int i = 0; i < groupVertex.getCurrentNumberOfGroupMembers(); ++i) {

                final ExecutionVertex vertex = groupVertex.getGroupMember(i);

                if (vertex.getExecutionState() == ExecutionState.SCHEDULED
                  && vertex.getAllocatedResource() != null) {
                  // In local mode, we do not consider any topology, only the instance type
                  if (vertex.getAllocatedResource().getInstanceType().equals(
                    allocatedResource.getInstanceType())) {
                    resourceToBeReplaced = vertex.getAllocatedResource();
                    break;
                  }
                }
              }

              if (resourceToBeReplaced != null) {
                break;
              }
            }

            // For some reason, we don't need this instance
            if (resourceToBeReplaced == null) {
              LOG.error("Instance " + allocatedResource.getInstance() + " is not required for job"
                + eg.getJobID());
              try {
                getInstanceManager().releaseAllocatedResource(jobID, eg.getJobConfiguration(),
                  allocatedResource);
              } catch (InstanceException e) {
                LOG.error(e);
              }
              return;
            }

            // Replace the selected instance
            final Iterator<ExecutionVertex> it = resourceToBeReplaced.assignedVertices();
            while (it.hasNext()) {
              final ExecutionVertex vertex = it.next();
              vertex.setAllocatedResource(allocatedResource);
              vertex.updateExecutionState(ExecutionState.ASSIGNED);
            }
          }
        }

        // Deploy the assigned vertices
        deployAssignedInputVertices(eg);

      }

    };

    eg.executeCommand(command);
  }

  /**
   * Checks if the given {@link AllocatedResource} is still required for the
   * execution of the given execution graph. If the resource is no longer
   * assigned to a vertex that is either currently running or about to run
   * the given resource is returned to the instance manager for deallocation.
   *
   * @param executionGraph
   *        the execution graph the provided resource has been used for so far
   * @param allocatedResource
   *        the allocated resource to check the assignment for
   */
  public void checkAndReleaseAllocatedResource(final ExecutionGraph executionGraph,
      final AllocatedResource allocatedResource) {

    if (allocatedResource == null) {
      LOG.error("Resource to lock is null!");
      return;
    }

    if (allocatedResource.getInstance() instanceof DummyInstance) {
      LOG.debug("Available instance is of type DummyInstance!");
      return;
    }

    boolean resourceCanBeReleased = true;
    final Iterator<ExecutionVertex> it = allocatedResource.assignedVertices();
    while (it.hasNext()) {
      final ExecutionVertex vertex = it.next();
      final ExecutionState state = vertex.getExecutionState();

      if (state != ExecutionState.CREATED && state != ExecutionState.FINISHED
        && state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {

        resourceCanBeReleased = false;
        break;
      }
    }

    if (resourceCanBeReleased) {

      LOG.info("Releasing instance " + allocatedResource.getInstance());
      try {
        getInstanceManager().releaseAllocatedResource(executionGraph.getJobID(), executionGraph
          .getJobConfiguration(), allocatedResource);
      } catch (InstanceException e) {
        LOG.error(StringUtils.stringifyException(e));
      }
    }
  }

  DeploymentManager getDeploymentManager() {
    return this.deploymentManager;
  }

  protected void replayCheckpointsFromPreviousStage(final ExecutionGraph executionGraph) {

    final int currentStageIndex = executionGraph.getIndexOfCurrentExecutionStage();
    final ExecutionStage previousStage = executionGraph.getStage(currentStageIndex - 1);

    final List<ExecutionVertex> verticesToBeReplayed = new ArrayList<ExecutionVertex>();

    for (int i = 0; i < previousStage.getNumberOfOutputExecutionVertices(); ++i) {

      final ExecutionVertex vertex = previousStage.getOutputExecutionVertex(i);
      vertex.updateExecutionState(ExecutionState.ASSIGNED);
      verticesToBeReplayed.add(vertex);
    }

    deployAssignedVertices(verticesToBeReplayed);
  }

  /**
   * Returns a map of vertices to be restarted once they have switched to their <code>CANCELED</code> state.
   *
   * @return the map of vertices to be restarted
   */
  Map<ExecutionVertexID, ExecutionVertex> getVerticesToBeRestarted() {

    return this.verticesToBeRestarted;
  }


  @Override
  public void allocatedResourcesDied(final JobID jobID, final List<AllocatedResource> allocatedResources) {

    final ExecutionGraph eg = getExecutionGraphByID(jobID);

    if (eg == null) {
      LOG.error("Cannot find execution graph for job with ID " + jobID);
      return;
    }

    final Runnable command = new Runnable() {

      /**
       * {@inheritDoc}
       */
      @Override
      public void run() {

        synchronized (eg) {

          for (final AllocatedResource allocatedResource : allocatedResources) {

            LOG.info("Resource " + allocatedResource.getInstance().getName() + " for Job " + jobID
              + " died.");

            final ExecutionGraph executionGraph = getExecutionGraphByID(jobID);

            if (executionGraph == null) {
              LOG.error("Cannot find execution graph for job " + jobID);
              return;
            }

            Iterator<ExecutionVertex> vertexIter = allocatedResource.assignedVertices();

            // Assign vertices back to a dummy resource.
            final DummyInstance dummyInstance = DummyInstance.createDummyInstance(allocatedResource
              .getInstance()
              .getType());
            final AllocatedResource dummyResource = new AllocatedResource(dummyInstance,
              allocatedResource.getInstanceType(), new AllocationID());

            while (vertexIter.hasNext()) {
              final ExecutionVertex vertex = vertexIter.next();
              vertex.setAllocatedResource(dummyResource);
            }

            final String failureMessage = allocatedResource.getInstance().getName() + " died";

            vertexIter = allocatedResource.assignedVertices();

            while (vertexIter.hasNext()) {
              final ExecutionVertex vertex = vertexIter.next();
              final ExecutionState state = vertex.getExecutionState();

              switch (state) {
              case ASSIGNED:
              case READY:
              case STARTING:
              case RUNNING:
              case FINISHING:

              vertex.updateExecutionState(ExecutionState.FAILED, failureMessage);

              break;
            default:
              }
          }

          // TODO: Fix this
          /*
           * try {
           * requestInstances(this.executionVertex.getGroupVertex().getExecutionStage());
           * } catch (InstanceException e) {
           * e.printStackTrace();
           * // TODO: Cancel the entire job in this case
           * }
           */
        }
      }

      final InternalJobStatus js = eg.getJobStatus();
      if (js != InternalJobStatus.FAILING && js != InternalJobStatus.FAILED) {

        // TODO: Fix this
        // deployAssignedVertices(eg);

        final ExecutionStage stage = eg.getCurrentExecutionStage();

        try {
          requestInstances(stage);
        } catch (InstanceException e) {
          e.printStackTrace();
          // TODO: Cancel the entire job in this case
        }
      }
    }
    };

    eg.executeCommand(command);
  }
}
TOP

Related Classes of eu.stratosphere.nephele.jobmanager.scheduler.AbstractScheduler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.