Source Code of org.apache.flink.runtime.executiongraph.Execution

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.flink.runtime.executiongraph;


import static org.apache.flink.runtime.execution.ExecutionState.CANCELED;
import static org.apache.flink.runtime.execution.ExecutionState.CANCELING;
import static org.apache.flink.runtime.execution.ExecutionState.CREATED;
import static org.apache.flink.runtime.execution.ExecutionState.DEPLOYING;
import static org.apache.flink.runtime.execution.ExecutionState.FAILED;
import static org.apache.flink.runtime.execution.ExecutionState.FINISHED;
import static org.apache.flink.runtime.execution.ExecutionState.RUNNING;
import static org.apache.flink.runtime.execution.ExecutionState.SCHEDULED;


import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;


import org.apache.flink.runtime.JobException;
import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.instance.AllocatedSlot;
import org.apache.flink.runtime.instance.Instance;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint;
import org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException;
import org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit;
import org.apache.flink.runtime.jobmanager.scheduler.Scheduler;
import org.apache.flink.runtime.jobmanager.scheduler.SlotAllocationFuture;
import org.apache.flink.runtime.jobmanager.scheduler.SlotAllocationFutureAction;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.taskmanager.TaskOperationResult;
import org.apache.flink.util.ExceptionUtils;
import org.slf4j.Logger;


import com.google.common.base.Preconditions;


/**
 * A single execution of a vertex. While an {@link ExecutionVertex} can be executed multiple times (for recovery,
 * or other re-computation), this class tracks the state of a single execution of that vertex and the resources.
 * 
 * NOTE ABOUT THE DESIGN RATIONAL:
 * 
 * In several points of the code, we need to deal with possible concurrent state changes and actions.
 * For example, while the call to deploy a task (send it to the TaskManager) happens, the task gets cancelled.
 * 
 * We could lock the entire portion of the code (decision to deploy, deploy, set state to running) such that
 * it is guaranteed that any "cancel command" will only pick up after deployment is done and that the "cancel
 * command" call will never overtake the deploying call.
 * 
 * This blocks the threads big time, because the remote calls may take long. Depending of their locking behavior, it
 * may even result in distributed deadlocks (unless carefully avoided). We therefore use atomic state updates and
 * occasional double-checking to ensure that the state after a completed call is as expected, and trigger correcting
 * actions if it is not. Many actions are also idempotent (like canceling).
 */
public class Execution {


  private static final AtomicReferenceFieldUpdater<Execution, ExecutionState> STATE_UPDATER =
      AtomicReferenceFieldUpdater.newUpdater(Execution.class, ExecutionState.class, "state");
  
  private static final Logger LOG = ExecutionGraph.LOG;
  
  private static final int NUM_CANCEL_CALL_TRIES = 3;
  
  // --------------------------------------------------------------------------------------------
  
  private final ExecutionVertex vertex;
  
  private final ExecutionAttemptID attemptId;
  
  private final long[] stateTimestamps;
  
  private final int attemptNumber;
  
  
  private volatile ExecutionState state = CREATED;
  
  private volatile AllocatedSlot assignedResource;  // once assigned, never changes
  
  private volatile Throwable failureCause;          // once assigned, never changes
  
  // --------------------------------------------------------------------------------------------
  
  public Execution(ExecutionVertex vertex, int attemptNumber, long startTimestamp) {
    Preconditions.checkNotNull(vertex);
    Preconditions.checkArgument(attemptNumber >= 0);
    
    this.vertex = vertex;
    this.attemptId = new ExecutionAttemptID();
    this.attemptNumber = attemptNumber;
    
    this.stateTimestamps = new long[ExecutionState.values().length];
    markTimestamp(ExecutionState.CREATED, startTimestamp);
  }
  
  // --------------------------------------------------------------------------------------------
  //   Properties
  // --------------------------------------------------------------------------------------------
  
  public ExecutionVertex getVertex() {
    return vertex;
  }
  
  public ExecutionAttemptID getAttemptId() {
    return attemptId;
  }


  public int getAttemptNumber() {
    return attemptNumber;
  }
  
  public ExecutionState getState() {
    return state;
  }
  
  public AllocatedSlot getAssignedResource() {
    return assignedResource;
  }
  
  public Throwable getFailureCause() {
    return failureCause;
  }
  
  public long[] getStateTimestamps() {
    return stateTimestamps;
  }
  
  public long getStateTimestamp(ExecutionState state) {
    return this.stateTimestamps[state.ordinal()];
  }
  
  public boolean isFinished() {
    return state == FINISHED || state == FAILED || state == CANCELED;
  }
  
  // --------------------------------------------------------------------------------------------
  //  Actions
  // --------------------------------------------------------------------------------------------
  
  /**
   * NOTE: This method only throws exceptions if it is in an illegal state to be scheduled, or if the tasks needs
   *       to be scheduled immediately and no resource is available. If the task is accepted by the schedule, any
   *       error sets the vertex state to failed and triggers the recovery logic.
   * 
   * @param scheduler
   * 
   * @throws IllegalStateException Thrown, if the vertex is not in CREATED state, which is the only state that permits scheduling.
   * @throws NoResourceAvailableException Thrown is no queued scheduling is allowed and no resources are currently available.
   */
  public void scheduleForExecution(Scheduler scheduler, boolean queued) throws NoResourceAvailableException {
    if (scheduler == null) {
      throw new NullPointerException();
    }
    
    final SlotSharingGroup sharingGroup = vertex.getJobVertex().getSlotSharingGroup();
    final CoLocationConstraint locationConstraint = vertex.getLocationConstraint();
    
    // sanity check
    if (locationConstraint != null && sharingGroup == null) {
      throw new RuntimeException("Trying to schedule with co-location constraint but without slot sharing allowed.");
    }
    
    if (transitionState(CREATED, SCHEDULED)) {
      
      ScheduledUnit toSchedule = locationConstraint == null ?
        new ScheduledUnit(this, sharingGroup) :
        new ScheduledUnit(this, sharingGroup, locationConstraint);
    
      // IMPORTANT: To prevent leaks of cluster resources, we need to make sure that slots are returned
      //     in all cases where the deployment failed. we use many try {} finally {} clauses to assure that
      if (queued) {
        SlotAllocationFuture future = scheduler.scheduleQueued(toSchedule);
        
        future.setFutureAction(new SlotAllocationFutureAction() {
          @Override
          public void slotAllocated(AllocatedSlot slot) {
            try {
              deployToSlot(slot);
            }
            catch (Throwable t) {
              try {
                slot.releaseSlot();
              } finally {
                markFailed(t);
              }
            }
          }
        });
      }
      else {
        AllocatedSlot slot = scheduler.scheduleImmediately(toSchedule);
        try {
          deployToSlot(slot);
        }
        catch (Throwable t) {
          try {
            slot.releaseSlot();
          } finally {
            markFailed(t);
          }
        }
      }
    }
    else {
      // call race, already deployed
      return;
    }
  }
  
  public void deployToSlot(final AllocatedSlot slot) throws JobException {
    // sanity checks
    if (slot == null) {
      throw new NullPointerException();
    }
    if (!slot.isAlive()) {
      throw new JobException("Traget slot for deployment is not alive.");
    }
    
    // make sure exactly one deployment call happens from the correct state
    // note: the transition from CREATED to DEPLOYING is for testing purposes only
    ExecutionState previous = this.state;
    if (previous == SCHEDULED || previous == CREATED) {
      if (!transitionState(previous, DEPLOYING)) {
        // race condition, someone else beat us to the deploying call.
        // this should actually not happen and indicates a race somewhere else
        throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
      }
    }
    else {
      // vertex may have been cancelled, or it was already scheduled
      throw new IllegalStateException("The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous);
    }
    
    try {
      // good, we are allowed to deploy
      if (!slot.setExecutedVertex(this)) {
        throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
      }
      this.assignedResource = slot;
      
      // race double check, did we fail/cancel and do we need to release the slot?
      if (this.state != DEPLOYING) {
        slot.releaseSlot();
        return;
      }
      
      if (LOG.isInfoEnabled()) {
        LOG.info(String.format("Deploying %s (attempt #%d) to %s", vertex.getSimpleName(),
            attemptNumber, slot.getInstance().getInstanceConnectionInfo().getHostname()));
      }
      
      final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor(attemptId, slot);
      
      // register this execution at the execution graph, to receive call backs
      vertex.getExecutionGraph().registerExecution(this);
      
      // we execute the actual deploy call in a concurrent action to prevent this call from blocking for long
      Runnable deployaction = new Runnable() {
  
        @Override
        public void run() {
          try {
            Instance instance = slot.getInstance();


            TaskOperationResult result = instance.getTaskManagerProxy().submitTask(deployment);
            if (result == null) {
              markFailed(new Exception("Failed to deploy the task to slot " + slot + ": TaskOperationResult was null"));
            }
            else if (!result.getExecutionId().equals(attemptId)) {
              markFailed(new Exception("Answer execution id does not match the request execution id."));
            }
            else if (result.isSuccess()) {
              switchToRunning();
            }
            else {
              // deployment failed :(
              markFailed(new Exception("Failed to deploy the task " + getVertexWithAttempt() + " to slot " + slot + ": " + result.getDescription()));
            }
          }
          catch (Throwable t) {
            // some error occurred. fail the task
            markFailed(t);
          }
        }
      };
      
      vertex.execute(deployaction);
    }
    catch (Throwable t) {
      markFailed(t);
      ExceptionUtils.rethrow(t);
    }
  }
  
  
  public void cancel() {
    // depending on the previous state, we go directly to cancelled (no cancel call necessary)
    // -- or to canceling (cancel call needs to be sent to the task manager)
    
    // because of several possibly previous states, we need to again loop until we make a
    // successful atomic state transition
    while (true) {
      
      ExecutionState current = this.state;
      
      if (current == CANCELING || current == CANCELED) {
        // already taken care of, no need to cancel again
        return;
      }
        
      // these two are the common cases where we need to send a cancel call
      else if (current == RUNNING || current == DEPLOYING) {
        // try to transition to canceling, if successful, send the cancel call
        if (transitionState(current, CANCELING)) {
          sendCancelRpcCall();
          return;
        }
        // else: fall through the loop
      }
      
      else if (current == FINISHED || current == FAILED) {
        // nothing to do any more. finished failed before it could be cancelled.
        // in any case, the task is removed from the TaskManager already
        return;
      }
      else if (current == CREATED || current == SCHEDULED) {
        // from here, we can directly switch to cancelled, because the no task has been deployed
        if (transitionState(current, CANCELED)) {
          
          // we skip the canceling state. set the timestamp, for a consistent appearance
          markTimestamp(CANCELING, getStateTimestamp(CANCELED));
          
          try {
            vertex.executionCanceled();
          }
          finally {
            vertex.getExecutionGraph().deregisterExecution(this);
            if (assignedResource != null) {
              assignedResource.releaseSlot();
            }
          }
          return;
        }
        // else: fall through the loop
      }
      else {
        throw new IllegalStateException(current.name());
      }
    }
  }
  
  /**
   * This method fails the vertex due to an external condition. The task will move to state FAILED.
   * If the task was in state RUNNING or DEPLOYING before, it will send a cancel call to the TaskManager.
   * 
   * @param t The exception that caused the task to fail.
   */
  public void fail(Throwable t) {
    processFail(t, false);
  }
  
  // --------------------------------------------------------------------------------------------
  //   Callbacks
  // --------------------------------------------------------------------------------------------
  
  /**
   * This method marks the task as failed, but will make no attempt to remove task execution from the task manager.
   * It is intended for cases where the task is known not to be running, or then the TaskManager reports failure
   * (in which case it has already removed the task).
   * 
   * @param t The exception that caused the task to fail.
   */
  void markFailed(Throwable t) {
    processFail(t, true);
  }
  
  void markFinished() {
    
    // this call usually comes during RUNNING, but may also come while still in deploying (very fast tasks!)
    while (true) {
      ExecutionState current = this.state;
      
      if (current == RUNNING || current == DEPLOYING) {
      
        if (transitionState(current, FINISHED)) {
          try {
            assignedResource.releaseSlot();
            vertex.getExecutionGraph().deregisterExecution(this);
          }
          finally {
            vertex.executionFinished();
          }
          return;
        }
      }
      else if (current == CANCELING) {
        // we sent a cancel call, and the task manager finished before it arrived. We
        // will never get a CANCELED call back from the job manager
        cancelingComplete();
        return;
      }
      else if (current == CANCELED || current == FAILED) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Task FINISHED, but concurrently went to state " + state);
        }
        return;
      }
      else {
        // this should not happen, we need to fail this
        markFailed(new Exception("Vertex received FINISHED message while being in state " + state));
        return;
      }
    }
  }
  
  void cancelingComplete() {
    
    // the taskmanagers can themselves cancel tasks without an external trigger, if they find that the
    // network stack is canceled (for example by a failing / canceling receiver or sender
    // this is an artifact of the old network runtime, but for now we need to support task transitions
    // from running directly to canceled
    
    while (true) {
      ExecutionState current = this.state;
      
      if (current == CANCELED) {
        return;
      }
      else if (current == CANCELING || current == RUNNING || current == DEPLOYING) {
        if (transitionState(current, CANCELED)) {
          try {
            assignedResource.releaseSlot();
            vertex.getExecutionGraph().deregisterExecution(this);
          }
          finally {
            vertex.executionCanceled();
          }
          return;
        }
        
        // else fall through the loop
      } 
      else {
        // failing in the meantime may happen and is no problem.
        // anything else is a serious problem !!!
        if (current != FAILED) {
          String message = String.format("Asynchronous race: Found state %s after successful cancel call.", state);
          LOG.error(message);
          vertex.getExecutionGraph().fail(new Exception(message));
        }
        return;
      }
    }
  }
  
  // --------------------------------------------------------------------------------------------
  //  Internal Actions
  // --------------------------------------------------------------------------------------------
  
  private boolean processFail(Throwable t, boolean isCallback) {
    
    // damn, we failed. This means only that we keep our books and notify our parent JobExecutionVertex
    // the actual computation on the task manager is cleaned up by the TaskManager that noticed the failure
    
    // we may need to loop multiple times (in the presence of concurrent calls) in order to
    // atomically switch to failed 
    while (true) {
      ExecutionState current = this.state;
      
      if (current == FAILED) {
        // already failed. It is enough to remember once that we failed (its sad enough)
        return false;
      }
      
      if (current == CANCELED) {
        // we are already aborting or are already aborted
        if (LOG.isDebugEnabled()) {
          LOG.debug(String.format("Ignoring transition of vertex %s to %s while being %s", 
              getVertexWithAttempt(), FAILED, CANCELED));
        }
        return false;
      }
      
      if (transitionState(current, FAILED, t)) {
        // success (in a manner of speaking)
        this.failureCause = t;
        
        try {
          if (assignedResource != null) {
            assignedResource.releaseSlot();
          }
          vertex.getExecutionGraph().deregisterExecution(this);
        }
        finally {
          vertex.executionFailed(t);
        }
        
        if (!isCallback && (current == RUNNING || current == DEPLOYING)) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Sending out cancel request, to remove task execution from TaskManager.");
          }
          
          try {
            if (assignedResource != null) {
              sendCancelRpcCall();
            }
          } catch (Throwable tt) {
            // no reason this should ever happen, but log it to be safe
            LOG.error("Error triggering cancel call while marking task as failed.", tt);
          }
        }
        
        // leave the loop
        return true;
      }
    }
  }
  
  private boolean switchToRunning() {
    
    if (transitionState(DEPLOYING, RUNNING)) {
      return true;
    }
    else {
      // something happened while the call was in progress.
      // it can mean:
      //  - canceling, while deployment was in progress. state is now canceling, or canceled, if the response overtook
      //  - finishing (execution and finished call overtook the deployment answer, which is possible and happens for fast tasks)
      //  - failed (execution, failure, and failure message overtook the deployment answer)
      
      ExecutionState currentState = this.state;
      
      if (currentState == FINISHED || currentState == CANCELED) {
        // do nothing, the task was really fast (nice)
        // or it was canceled really fast
      }
      else if (currentState == CANCELING || currentState == FAILED) {
        if (LOG.isDebugEnabled()) {
          LOG.debug(String.format("Concurrent canceling/failing of %s while deployment was in progress.", getVertexWithAttempt()));
        }
        sendCancelRpcCall();
      }
      else {
        String message = String.format("Concurrent unexpected state transition of task %s to %s while deployment was in progress.",
            getVertexWithAttempt(), currentState);
        
        if (LOG.isDebugEnabled()) {
          LOG.debug(message);
        }
        
        // undo the deployment
        sendCancelRpcCall();
        
        // record the failure
        markFailed(new Exception(message));
      }
      
      return false;
    }
  }
  
  private void sendCancelRpcCall() {
    final AllocatedSlot slot = this.assignedResource;
    if (slot == null) {
      return;
    }
    
    Runnable cancelAction = new Runnable() {
      
      @Override
      public void run() {
        Throwable exception = null;
        
        for (int triesLeft = NUM_CANCEL_CALL_TRIES; triesLeft > 0; --triesLeft) {
          
          try {
            // send the call. it may be that the task is not really there (asynchronous / overtaking messages)
            // in which case it is fine (the deployer catches it)
            TaskOperationResult result = slot.getInstance().getTaskManagerProxy().cancelTask(attemptId);
            
            if (!result.isSuccess()) {
              // the task was not found, which may be when the task concurrently finishes or fails, or
              // when the cancel call overtakes the deployment call
              if (LOG.isDebugEnabled()) {
                LOG.debug("Cancel task call did not find task. Probably RPC call race.");
              }
            }
            
            // in any case, we need not call multiple times, so we quit
            return;
          }
          catch (Throwable t) {
            if (exception == null) {
              exception = t;
            }
            LOG.error("Canceling vertex " + getVertexWithAttempt() + " failed (" + triesLeft + " tries left): " + t.getMessage() , t);
          }
        }
        
        // dang, utterly unsuccessful - the target node must be down, in which case the tasks are lost anyways
        fail(new Exception("Task could not be canceled.", exception));
      }
    };
    
    vertex.execute(cancelAction);
  }
  
  // --------------------------------------------------------------------------------------------
  //  Miscellaneous
  // --------------------------------------------------------------------------------------------
  
  private boolean transitionState(ExecutionState currentState, ExecutionState targetState) {
    return transitionState(currentState, targetState, null);
  }
  
  private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) {
    if (STATE_UPDATER.compareAndSet(this, currentState, targetState)) {
      markTimestamp(targetState);
      
      // make sure that the state transition completes normally.
      // potential errors (in listeners may not affect the main logic)
      try {
        vertex.notifyStateTransition(attemptId, targetState, error);
      }
      catch (Throwable t) {
        LOG.error("Error while notifying execution graph of execution state trnsition.", t);
      }
      return true;
    } else {
      return false;
    }
  }
  
  private void markTimestamp(ExecutionState state) {
    markTimestamp(state, System.currentTimeMillis());
  }
  
  private void markTimestamp(ExecutionState state, long timestamp) {
    this.stateTimestamps[state.ordinal()] = timestamp;
  }
  
  public String getVertexWithAttempt() {
    return vertex.getSimpleName() + " - execution #" + attemptNumber;
  }
  
  @Override
  public String toString() {
    return String.format("Attempt #%d (%s) @ %s - [%s]", attemptNumber, vertex.getSimpleName(),
        (assignedResource == null ? "(unassigned)" : assignedResource.toString()), state);
  }
}
Source Code of org.apache.flink.runtime.executiongraph.Execution

Related Classes of org.apache.flink.runtime.executiongraph.Execution