Package org.apache.ambari.server.actionmanager

Source Code of org.apache.ambari.server.actionmanager.ActionScheduler

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.server.actionmanager;

import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.reflect.TypeToken;
import com.google.inject.persist.UnitOfWork;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.Role;
import org.apache.ambari.server.RoleCommand;
import org.apache.ambari.server.ServiceComponentHostNotFoundException;
import org.apache.ambari.server.ServiceComponentNotFoundException;
import org.apache.ambari.server.agent.ActionQueue;
import org.apache.ambari.server.agent.CommandReport;
import org.apache.ambari.server.agent.ExecutionCommand;
import org.apache.ambari.server.configuration.Configuration;
import org.apache.ambari.server.controller.HostsMap;
import org.apache.ambari.server.serveraction.ServerAction;
import org.apache.ambari.server.serveraction.ServerActionManager;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
import org.apache.ambari.server.state.Host;
import org.apache.ambari.server.state.HostState;
import org.apache.ambari.server.state.Service;
import org.apache.ambari.server.state.ServiceComponent;
import org.apache.ambari.server.state.ServiceComponentHost;
import org.apache.ambari.server.state.ServiceComponentHostEvent;
import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
import org.apache.ambari.server.state.svccomphost.ServiceComponentHostOpFailedEvent;
import org.apache.ambari.server.utils.StageUtils;
import org.apache.commons.collections.MultiMap;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;



/**
* This class encapsulates the action scheduler thread.
* Action schedule frequently looks at action database and determines if
* there is an action that can be scheduled.
*/
class ActionScheduler implements Runnable {

  private static Logger LOG = LoggerFactory.getLogger(ActionScheduler.class);
  private final long actionTimeout;
  private final long sleepTime;
  private final UnitOfWork unitOfWork;
  private volatile boolean shouldRun = true;
  private Thread schedulerThread = null;
  private final ActionDBAccessor db;
  private final short maxAttempts;
  private final ActionQueue actionQueue;
  private final Clusters fsmObject;
  private boolean taskTimeoutAdjustment = true;
  private final HostsMap hostsMap;
  private final Object wakeupSyncObject = new Object();
  private final ServerActionManager serverActionManager;
  private final Configuration configuration;

  private final Set<String> requestsInProgress = new HashSet<String>();

  /**
   * true if scheduler should run ASAP.
   * We need this flag to avoid sleep in situations, when
   * we receive awake() request during running a scheduler iteration.
   */
  private boolean activeAwakeRequest = false;
  //Cache for clusterHostinfo, key - stageId-requestId
  private Cache<String, Map<String, Set<String>>> clusterHostInfoCache;

  public ActionScheduler(long sleepTimeMilliSec, long actionTimeoutMilliSec,
      ActionDBAccessor db, ActionQueue actionQueue, Clusters fsmObject,
      int maxAttempts, HostsMap hostsMap, ServerActionManager serverActionManager,
      UnitOfWork unitOfWork, Configuration configuration) {
    this.sleepTime = sleepTimeMilliSec;
    this.hostsMap = hostsMap;
    this.actionTimeout = actionTimeoutMilliSec;
    this.db = db;
    this.actionQueue = actionQueue;
    this.fsmObject = fsmObject;
    this.maxAttempts = (short) maxAttempts;
    this.serverActionManager = serverActionManager;
    this.unitOfWork = unitOfWork;
    this.clusterHostInfoCache = CacheBuilder.newBuilder().
        expireAfterAccess(5, TimeUnit.MINUTES).
        build();
    this.configuration = configuration;
  }

  public void start() {
    schedulerThread = new Thread(this);
    schedulerThread.start();
  }

  public void stop() {
    shouldRun = false;
    schedulerThread.interrupt();
  }

  /**
   * Should be called from another thread when we want scheduler to
   * make a run ASAP (for example, to process desired configs of SCHs).
   * The method is guaranteed to return quickly.
   */
  public void awake() {
    synchronized (wakeupSyncObject) {
      activeAwakeRequest = true;
      wakeupSyncObject.notify();
    }
  }

  @Override
  public void run() {
    while (shouldRun) {
      try {
        synchronized (wakeupSyncObject) {
          if (!activeAwakeRequest) {
              wakeupSyncObject.wait(sleepTime);
          }
          activeAwakeRequest = false;
        }
        doWork();
      } catch (InterruptedException ex) {
        LOG.warn("Scheduler thread is interrupted going to stop", ex);
        shouldRun = false;
      } catch (Exception ex) {
        LOG.warn("Exception received", ex);
        requestsInProgress.clear();
      } catch (Throwable t) {
        LOG.warn("ERROR", t);
        requestsInProgress.clear();
      }
    }
  }

  public void doWork() throws AmbariException {
    try {
      unitOfWork.begin();
      Set<String> runningRequestIds = new HashSet<String>();
      Set<String> affectedHosts = new HashSet<String>();
      List<Stage> stages = db.getStagesInProgress();
      if (LOG.isDebugEnabled()) {
        LOG.debug("Scheduler wakes up");
        LOG.debug("Processing {} in progress stages ", stages.size());
      }
      if (stages == null || stages.isEmpty()) {
        //Nothing to do
        if (LOG.isDebugEnabled()) {
          LOG.debug("No stage in progress..nothing to do");
        }
        return;
      }
      int i_stage = 0;
      for (Stage s : stages) {
        // Check if we can process this stage in parallel with another stages
        i_stage ++;

        long requestId = s.getRequestId();
        // Convert to string to avoid glitches with boxing/unboxing
        String requestIdStr = String.valueOf(requestId);
        LOG.debug("==> STAGE_i = " + i_stage + "(requestId=" + requestIdStr + ",StageId=" + s.getStageId() + ")");
        if (runningRequestIds.contains(requestIdStr)) {
          // We don't want to process different stages from the same request in parallel
          LOG.debug("==> We don't want to process different stages from the same request in parallel" );
          continue;
        } else {
          runningRequestIds.add(requestIdStr);
          if (!requestsInProgress.contains(requestIdStr)) {
            requestsInProgress.add(requestIdStr);
            db.startRequest(requestId);
          }
        }

        List<String> stageHosts = s.getHosts();
        boolean conflict = false;
        for (String host : stageHosts) {
          if (affectedHosts.contains(host)) {
            conflict = true;
            break;
          }
        }
        if (conflict) {
          // Also we don't want to perform stages in parallel at the same hosts
          continue;
        } else {
          affectedHosts.addAll(stageHosts);
        }

        // Commands that will be scheduled in current scheduler wakeup
        List<ExecutionCommand> commandsToSchedule = new ArrayList<ExecutionCommand>();
        Map<String, RoleStats> roleStats = processInProgressStage(s, commandsToSchedule);
        // Check if stage is failed
        boolean failed = false;
        for (String role : roleStats.keySet()) {
          RoleStats stats = roleStats.get(role);
          if (LOG.isDebugEnabled()) {
            LOG.debug("Stats for role:" + role + ", stats=" + stats);
          }
          if (stats.isRoleFailed()) {
            failed = true;
            break;
          }
        }

        if(!failed) {
          // Prior stage may have failed and it may need to fail the whole request
          failed = hasPreviousStageFailed(s);
        }

        if (failed) {
          LOG.warn("Operation completely failed, aborting request id:"
              + s.getRequestId());
          abortOperationsForStage(s);
          return;
        }

        List<ExecutionCommand> commandsToStart = new ArrayList<ExecutionCommand>();
        List<ExecutionCommand> commandsToUpdate = new ArrayList<ExecutionCommand>();

        //Schedule what we have so far

        for (ExecutionCommand cmd : commandsToSchedule) {
          if (Role.valueOf(cmd.getRole()).equals(Role.AMBARI_SERVER_ACTION)) {
            /**
             * We don't forbid executing any stages in parallel with
             * AMBARI_SERVER_ACTION. That  should be OK as AMBARI_SERVER_ACTION
             * is not used as of now. The general motivation has been to update
             * Request status when last task associated with the
             * Request is finished.
             */
            executeServerAction(s, cmd);
          } else {
            processHostRole(s, cmd, commandsToStart, commandsToUpdate);
          }
        }

        LOG.debug("==> Commands to start: {}", commandsToStart.size());
        LOG.debug("==> Commands to update: {}", commandsToUpdate.size());

        //Multimap is analog of Map<Object, List<Object>> but allows to avoid nested loop
        ListMultimap<String, ServiceComponentHostEvent> eventMap = formEventMap(s, commandsToStart);
        List<ExecutionCommand> commandsToAbort = new ArrayList<ExecutionCommand>();
        if (!eventMap.isEmpty()) {
          LOG.debug("==> processing {} serviceComponentHostEvents...", eventMap.size());
          Cluster cluster = fsmObject.getCluster(s.getClusterName());
          if (cluster != null) {
            List<ServiceComponentHostEvent> failedEvents =
              cluster.processServiceComponentHostEvents(eventMap);
            LOG.debug("==> {} events failed.", failedEvents.size());

            for (Iterator<ExecutionCommand> iterator = commandsToUpdate.iterator(); iterator.hasNext(); ) {
              ExecutionCommand cmd = iterator.next();
              for (ServiceComponentHostEvent event : failedEvents) {
                if (StringUtils.equals(event.getHostName(), cmd.getHostname()) &&
                  StringUtils.equals(event.getServiceComponentName(), cmd.getRole())) {
                  iterator.remove();
                  commandsToAbort.add(cmd);
                  break;
                }
              }
            }
          } else {
            LOG.warn("There was events to process but cluster {} not found", s.getClusterName());
          }
        }

        LOG.debug("==> Scheduling {} tasks...", commandsToUpdate.size());
        db.bulkHostRoleScheduled(s, commandsToUpdate);

        LOG.debug("==> Aborting {} tasks...", commandsToAbort.size());
        db.bulkAbortHostRole(s, commandsToAbort);

        LOG.debug("==> Adding {} tasks to queue...", commandsToUpdate.size());
        for (ExecutionCommand cmd : commandsToUpdate) {
          actionQueue.enqueue(cmd.getHostname(), cmd);
        }
        LOG.debug("==> Finished.");

        if (! configuration.getParallelStageExecution()) { // If disabled
          return;
        }
      }

      requestsInProgress.retainAll(runningRequestIds);

    } finally {
      LOG.debug("Scheduler finished work.");
      unitOfWork.end();
    }
  }


  /**
   * Executes internal ambari-server action
   */
  private void executeServerAction(Stage s, ExecutionCommand cmd) {
    try {
      LOG.trace("Executing server action: request_id={}, stage_id={}, task_id={}",
        s.getRequestId(), s.getStageId(), cmd.getTaskId());
      long now = System.currentTimeMillis();
      String hostName = cmd.getHostname();
      String roleName = cmd.getRole();

      s.setStartTime(hostName, roleName, now);
      s.setLastAttemptTime(hostName, roleName, now);
      s.incrementAttemptCount(hostName, roleName);
      s.setHostRoleStatus(hostName, roleName, HostRoleStatus.QUEUED);
      db.hostRoleScheduled(s, hostName, roleName);
      String actionName = cmd.getRoleParams().get(ServerAction.ACTION_NAME);
      this.serverActionManager.executeAction(actionName, cmd.getCommandParams());
      reportServerActionSuccess(s, cmd);

    } catch (AmbariException e) {
      LOG.warn("Could not execute server action " + cmd.toString(), e);
      reportServerActionFailure(s, cmd, e.getMessage());
    }
  }

  private boolean hasPreviousStageFailed(Stage stage) {
    boolean failed = false;
    long prevStageId = stage.getStageId() - 1;
    if (prevStageId > 0) {
      // Find previous stage instance
      List<Stage> allStages = db.getAllStages(stage.getRequestId());
      Stage prevStage = null;
      for (Stage s : allStages) {
        if (s.getStageId() == prevStageId) {
          prevStage = s;
          break;
        }
      }

      //It may be null for test scenarios
      if(prevStage != null) {
        Map<Role, Integer> hostCountsForRoles = new HashMap<Role, Integer>();
        Map<Role, Integer> failedHostCountsForRoles = new HashMap<Role, Integer>();

        for (String host : prevStage.getHostRoleCommands().keySet()) {
          Map<String, HostRoleCommand> roleCommandMap = prevStage.getHostRoleCommands().get(host);
          for (String role : roleCommandMap.keySet()) {
            HostRoleCommand c = roleCommandMap.get(role);
            if (hostCountsForRoles.get(c.getRole()) == null) {
              hostCountsForRoles.put(c.getRole(), 0);
              failedHostCountsForRoles.put(c.getRole(), 0);
            }
            int hostCount = hostCountsForRoles.get(c.getRole());
            hostCountsForRoles.put(c.getRole(), hostCount + 1);
            if (c.getStatus().isFailedState()) {
              int failedHostCount = failedHostCountsForRoles.get(c.getRole());
              failedHostCountsForRoles.put(c.getRole(), failedHostCount + 1);
            }
          }
        }

        for (Role role : hostCountsForRoles.keySet()) {
          float failedHosts = failedHostCountsForRoles.get(role);
          float totalHosts = hostCountsForRoles.get(role);
          if (((totalHosts - failedHosts) / totalHosts) < prevStage.getSuccessFactor(role)) {
            failed = true;
          }
        }
      }
    }
    return failed;
  }

  private void reportServerActionSuccess(Stage stage, ExecutionCommand cmd) {
    CommandReport report = new CommandReport();
    report.setStatus(HostRoleStatus.COMPLETED.toString());
    report.setExitCode(0);
    report.setStdOut("Server action succeeded");
    report.setStdErr("");
    db.updateHostRoleState(cmd.getHostname(), stage.getRequestId(), stage.getStageId(),
            cmd.getRole(), report);
  }

  private void reportServerActionFailure(Stage stage, ExecutionCommand cmd, String message) {
    CommandReport report = new CommandReport();
    report.setStatus(HostRoleStatus.FAILED.toString());
    report.setExitCode(1);
    report.setStdOut("Server action failed");
    report.setStdErr(message);
    db.updateHostRoleState(cmd.getHostname(), stage.getRequestId(), stage.getStageId(),
            cmd.getRole(), report);
  }

  /**
   * @return Stats for the roles in the stage. It is used to determine whether stage
   * has succeeded or failed.
   * Side effects:
   * This method processes command timeouts and retry attempts, and
   * adds new (pending) execution commands to commandsToSchedule list.
   */
  private Map<String, RoleStats> processInProgressStage(Stage s,
      List<ExecutionCommand> commandsToSchedule) throws AmbariException {
    LOG.debug("==> Collecting commands to schedule...");
    // Map to track role status
    Map<String, RoleStats> roleStats = initRoleStats(s);
    long now = System.currentTimeMillis();
    long taskTimeout = actionTimeout;
    if (taskTimeoutAdjustment) {
      taskTimeout = actionTimeout + s.getStageTimeout();
    }

    Cluster cluster = null;
    if (null != s.getClusterName()) {
      cluster = fsmObject.getCluster(s.getClusterName());
    }

    for (String host : s.getHosts()) {
      List<ExecutionCommandWrapper> commandWrappers = s.getExecutionCommands(host);
      Host hostObj = fsmObject.getHost(host);
      int i_my = 0;
      LOG.trace("===>host=" + host);
      for(ExecutionCommandWrapper wrapper : commandWrappers) {
        ExecutionCommand c = wrapper.getExecutionCommand();
        String roleStr = c.getRole();
        HostRoleStatus status = s.getHostRoleStatus(host, roleStr);
        i_my ++;
        if (LOG.isTraceEnabled()) {
          LOG.trace("Host task " + i_my + ") id = " + c.getTaskId() + " status = " + status.toString() +
            " (role=" + roleStr + "), roleCommand = "+ c.getRoleCommand());
        }
        boolean hostDeleted = false;
        if (null != cluster) {
          Service svc = null;
          if (c.getServiceName() != null && !c.getServiceName().isEmpty()) {
            svc = cluster.getService(c.getServiceName());
          }

          ServiceComponent svcComp = null;
          Map<String, ServiceComponentHost> scHosts = null;
          try {
            if (svc != null) {
              svcComp = svc.getServiceComponent(roleStr);
              scHosts = svcComp.getServiceComponentHosts();
            }
          } catch (ServiceComponentNotFoundException scnex) {
            String msg = String.format(
                    "%s is not not a service component, assuming its an action",
                    roleStr);
            LOG.debug(msg);
          }

          hostDeleted = (scHosts != null && !scHosts.containsKey(host));
          if (hostDeleted) {
            String message = String.format(
              "Host component information has not been found.  Details:" +
              "cluster=%s; host=%s; service=%s; component=%s; ",
              c.getClusterName(), host,
              svcComp == null ? "null" : svcComp.getServiceName(),
              svcComp == null ? "null" : svcComp.getName());
            LOG.warn(message);
          }
        }

        // Check that service host component is not deleted
        if (hostDeleted) {
         
          String message = String.format(
            "Host not found when trying to schedule an execution command. " +
            "The most probable reason for that is that host or host component " +
            "has been deleted recently. The command has been aborted and dequeued." +
            "Execution command details: " +
            "cmdId: %s; taskId: %s; roleCommand: %s",
            c.getCommandId(), c.getTaskId(), c.getRoleCommand());
          LOG.warn("Host {} has been detected as non-available. {}", host, message);
          // Abort the command itself
          db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message);
          status = HostRoleStatus.ABORTED;
        } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, taskTimeout)) {
          // Process command timeouts
          LOG.info("Host:" + host + ", role:" + roleStr + ", actionId:" + s.getActionId() + " timed out");
          if (s.getAttemptCount(host, roleStr) >= maxAttempts) {
            LOG.warn("Host:" + host + ", role:" + roleStr + ", actionId:" + s.getActionId() + " expired");
            db.timeoutHostRole(host, s.getRequestId(), s.getStageId(), c.getRole());
            //Reinitialize status
            status = s.getHostRoleStatus(host, roleStr);

            if (null != cluster) {
              transitionToFailedState(cluster.getClusterName(), c.getServiceName(), roleStr, host, now, false);
            }

            // Dequeue command
            LOG.info("Removing command from queue, host={}, commandId={} ", host, c.getCommandId());
            actionQueue.dequeue(host, c.getCommandId());
          } else {
            // reschedule command
            commandsToSchedule.add(c);
            LOG.trace("===> commandsToSchedule(reschedule)=" + commandsToSchedule.size());
          }
        } else if (status.equals(HostRoleStatus.PENDING)) {
          //Need to schedule first time
          commandsToSchedule.add(c);
          LOG.trace("===>commandsToSchedule(first_time)=" + commandsToSchedule.size());
        }

        this.updateRoleStats(status, roleStats.get(roleStr));
      }
    }
    LOG.debug("Collected {} commands to schedule in this wakeup.", commandsToSchedule.size());
    return roleStats;
  }

  /**
   * Generate a OPFailed event before aborting all operations in the stage
   * @param stage
   */
  private void abortOperationsForStage(Stage stage) {
    long now = System.currentTimeMillis();

    for (String hostName : stage.getHosts()) {
      List<ExecutionCommandWrapper> commandWrappers =
        stage.getExecutionCommands(hostName);

      for(ExecutionCommandWrapper wrapper : commandWrappers) {
        ExecutionCommand c = wrapper.getExecutionCommand();
        transitionToFailedState(stage.getClusterName(), c.getServiceName(),
          c.getRole(), hostName, now, true);
      }
    }

    db.abortOperation(stage.getRequestId());
  }

  /**
   * Raise a OPFailed event for a SCH
   * @param clusterName
   * @param serviceName
   * @param componentName
   * @param hostname
   * @param timestamp
   */
  private void transitionToFailedState(String clusterName, String serviceName,
                                       String componentName, String hostname,
                                       long timestamp,
                                       boolean ignoreTransitionException) {

    try {
      Cluster cluster = fsmObject.getCluster(clusterName);

      ServiceComponentHostOpFailedEvent timeoutEvent =
        new ServiceComponentHostOpFailedEvent(componentName,
          hostname, timestamp);

      Service svc = cluster.getService(serviceName);
      ServiceComponent svcComp = svc.getServiceComponent(componentName);
      ServiceComponentHost svcCompHost =
        svcComp.getServiceComponentHost(hostname);
      svcCompHost.handleEvent(timeoutEvent);

    } catch (ServiceComponentNotFoundException scnex) {
      LOG.debug(componentName + " associated with service " + serviceName +
        " is not a service component, assuming it's an action.");
    } catch (ServiceComponentHostNotFoundException e) {
      String msg = String.format("Service component host %s not found, " +
              "unable to transition to failed state.", componentName);
      LOG.warn(msg, e);
    } catch (InvalidStateTransitionException e) {
      if (ignoreTransitionException) {
        LOG.debug("Unable to transition to failed state.", e);
      } else {
        LOG.warn("Unable to transition to failed state.", e);
      }
    } catch (AmbariException e) {
      LOG.warn("Unable to transition to failed state.", e);
    }
  }


  /**
   * Populates a map < role_name, role_stats>.
   */
  private Map<String, RoleStats> initRoleStats(Stage s) {
    // Meaning: how many hosts are affected by commands for each role
    Map<Role, Integer> hostCountsForRoles = new HashMap<Role, Integer>();
    // < role_name, rolestats >
    Map<String, RoleStats> roleStats = new TreeMap<String, RoleStats>();

    for (String host : s.getHostRoleCommands().keySet()) {
      Map<String, HostRoleCommand> roleCommandMap = s.getHostRoleCommands().get(host);
      for (String role : roleCommandMap.keySet()) {
        HostRoleCommand c = roleCommandMap.get(role);
        if (hostCountsForRoles.get(c.getRole()) == null) {
          hostCountsForRoles.put(c.getRole(), 0);
        }
        int val = hostCountsForRoles.get(c.getRole());
        hostCountsForRoles.put(c.getRole(), val + 1);
      }
    }

    for (Role r : hostCountsForRoles.keySet()) {
      RoleStats stats = new RoleStats(hostCountsForRoles.get(r),
          s.getSuccessFactor(r));
      roleStats.put(r.toString(), stats);
    }
    return roleStats;
  }

  private boolean timeOutActionNeeded(HostRoleStatus status, Stage stage,
      Host host, String role, long currentTime, long taskTimeout) throws
    AmbariException {
    if (( !status.equals(HostRoleStatus.QUEUED) ) &&
        ( ! status.equals(HostRoleStatus.IN_PROGRESS) )) {
      return false;
    }
    // Fast fail task if host state is unknown
    if (host.getState().equals(HostState.HEARTBEAT_LOST)) {
      LOG.debug("Timing out action since agent is not heartbeating.");
      return true;
    }
    if (currentTime > stage.getLastAttemptTime(host.getHostName(), role)
        + taskTimeout) {
      return true;
    }
    return false;
  }

  private ListMultimap<String, ServiceComponentHostEvent> formEventMap(Stage s, List<ExecutionCommand> commands) {
    ListMultimap<String, ServiceComponentHostEvent> serviceEventMap = ArrayListMultimap.create();
    for (ExecutionCommand cmd : commands) {
      String hostname = cmd.getHostname();
      String roleStr = cmd.getRole();
      if (RoleCommand.ACTIONEXECUTE != cmd.getRoleCommand()) {
          serviceEventMap.put(cmd.getServiceName(), s.getFsmEvent(hostname, roleStr).getEvent());
      }
    }
    return serviceEventMap;
  }

  private void processHostRole(Stage s, ExecutionCommand cmd, List<ExecutionCommand> commandsToStart,
                               List<ExecutionCommand> commandsToUpdate)
    throws AmbariException {
    long now = System.currentTimeMillis();
    String roleStr = cmd.getRole();
    String hostname = cmd.getHostname();

    // start time is -1 if host role command is not started yet
    if (s.getStartTime(hostname, roleStr) < 0) {

      commandsToStart.add(cmd);
      s.setStartTime(hostname,roleStr, now);
      s.setHostRoleStatus(hostname, roleStr, HostRoleStatus.QUEUED);
    }
    s.setLastAttemptTime(hostname, roleStr, now);
    s.incrementAttemptCount(hostname, roleStr);
    /** change the hostname in the command for the host itself **/
    cmd.setHostname(hostsMap.getHostMap(hostname));


    //Try to get clusterHostInfo from cache
    String stagePk = s.getStageId() + "-" + s.getRequestId();
    Map<String, Set<String>> clusterHostInfo = clusterHostInfoCache.getIfPresent(stagePk);

    if (clusterHostInfo == null) {
      Type type = new TypeToken<Map<String, Set<String>>>() {}.getType();
      clusterHostInfo = StageUtils.getGson().fromJson(s.getClusterHostInfo(), type);
      clusterHostInfoCache.put(stagePk, clusterHostInfo);
    }

    cmd.setClusterHostInfo(clusterHostInfo);

    commandsToUpdate.add(cmd);
  }

  private void scheduleHostRole(Stage s, ExecutionCommand cmd)
      throws InvalidStateTransitionException, AmbariException {
    long now = System.currentTimeMillis();
    String roleStr = cmd.getRole();
    String hostname = cmd.getHostname();

    // start time is -1 if host role command is not started yet
    if (s.getStartTime(hostname, roleStr) < 0) {
      if (RoleCommand.ACTIONEXECUTE != cmd.getRoleCommand()) {
        try {
          Cluster c = fsmObject.getCluster(s.getClusterName());
          Service svc = c.getService(cmd.getServiceName());
          ServiceComponent svcComp = svc.getServiceComponent(roleStr);
          ServiceComponentHost svcCompHost =
                  svcComp.getServiceComponentHost(hostname);
          svcCompHost.handleEvent(s.getFsmEvent(hostname, roleStr).getEvent());
        } catch (ServiceComponentNotFoundException scnex) {
          LOG.debug("Not a service component, assuming its an action");
        } catch (InvalidStateTransitionException e) {
          LOG.info(
              "Transition failed for host: " + hostname + ", role: "
                  + roleStr, e);
          throw e;
        } catch (AmbariException e) {
          LOG.warn("Exception in fsm: " + hostname + ", role: " + roleStr,
              e);
          throw e;
        }
      }
      s.setStartTime(hostname,roleStr, now);
      s.setHostRoleStatus(hostname, roleStr, HostRoleStatus.QUEUED);
    }
    s.setLastAttemptTime(hostname, roleStr, now);
    s.incrementAttemptCount(hostname, roleStr);
    LOG.debug("Scheduling command: "+cmd.toString()+" for host: "+hostname);
    /** change the hostname in the command for the host itself **/
    cmd.setHostname(hostsMap.getHostMap(hostname));
   

    //Try to get clusterHostInfo from cache
    String stagePk = s.getStageId() + "-" + s.getRequestId();
    Map<String, Set<String>> clusterHostInfo = clusterHostInfoCache.getIfPresent(stagePk);
   
    if (clusterHostInfo == null) {
      Type type = new TypeToken<Map<String, Set<String>>>() {}.getType();
      clusterHostInfo = StageUtils.getGson().fromJson(s.getClusterHostInfo(), type);
      clusterHostInfoCache.put(stagePk, clusterHostInfo);
    }
   
    cmd.setClusterHostInfo(clusterHostInfo);

    actionQueue.enqueue(hostname, cmd);
    db.hostRoleScheduled(s, hostname, roleStr);
  }

  private void updateRoleStats(HostRoleStatus status, RoleStats rs) {
    switch (status) {
    case COMPLETED:
      rs.numSucceeded++;
      break;
    case FAILED:
      rs.numFailed++;
      break;
    case QUEUED:
      rs.numQueued++;
      break;
    case PENDING:
      rs.numPending++;
      break;
    case TIMEDOUT:
      rs.numTimedOut++;
      break;
    case ABORTED:
      rs.numAborted++;
      break;
    case IN_PROGRESS:
      rs.numInProgress++;
      break;
    default:
      LOG.error("Unknown status " + status.name());
    }
  }
 
 
  public void setTaskTimeoutAdjustment(boolean val) {
    this.taskTimeoutAdjustment = val;
  }

  static class RoleStats {
    int numInProgress;
    int numQueued = 0;
    int numSucceeded = 0;
    int numFailed = 0;
    int numTimedOut = 0;
    int numPending = 0;
    int numAborted = 0;
    final int totalHosts;
    final float successFactor;

    RoleStats(int total, float successFactor) {
      this.totalHosts = total;
      this.successFactor = successFactor;
    }

    /**
     * Role successful means the role is successful enough to
     */
    boolean isSuccessFactorMet() {
      int minSuccessNeeded = (int) Math.ceil(successFactor * totalHosts);
      if (minSuccessNeeded <= numSucceeded) {
        return true;
      } else {
        return false;
      }
    }

    private boolean isRoleInProgress() {
      return (numPending+numQueued+numInProgress > 0);
    }

    /**
     * Role failure means role is no longer in progress and success factor is
     * not met.
     */
    boolean isRoleFailed() {
      if (isRoleInProgress() || isSuccessFactorMet()) {
        return false;
      } else {
        return true;
      }
    }

    public String toString() {
      StringBuilder builder = new StringBuilder();
      builder.append("numQueued="+numQueued);
      builder.append(", numInProgress="+numInProgress);
      builder.append(", numSucceeded="+numSucceeded);
      builder.append(", numFailed="+numFailed);
      builder.append(", numTimedOut="+numTimedOut);
      builder.append(", numPending="+numPending);
      builder.append(", numAborted="+numAborted);
      builder.append(", totalHosts="+totalHosts);
      builder.append(", successFactor="+successFactor);
      return builder.toString();
    }
  }
}
TOP

Related Classes of org.apache.ambari.server.actionmanager.ActionScheduler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.