Package com.vmware.bdd.manager

Source Code of com.vmware.bdd.manager.ClusterManager

/***************************************************************************

* Copyright (c) 2012-2014 VMware, Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/

package com.vmware.bdd.manager;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.log4j.Logger;
import org.springframework.batch.core.JobParameter;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.beans.factory.annotation.Autowired;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.vmware.bdd.apitypes.ClusterCreate;
import com.vmware.bdd.apitypes.ClusterRead;
import com.vmware.bdd.apitypes.ClusterStatus;
import com.vmware.bdd.apitypes.LimitInstruction;
import com.vmware.bdd.apitypes.NodeGroupCreate;
import com.vmware.bdd.apitypes.NodeGroupRead;
import com.vmware.bdd.apitypes.NodeRead;
import com.vmware.bdd.apitypes.NodeStatus;
import com.vmware.bdd.apitypes.PlacementPolicy.GroupAssociation;
import com.vmware.bdd.apitypes.Priority;
import com.vmware.bdd.apitypes.TaskRead;
import com.vmware.bdd.entity.ClusterEntity;
import com.vmware.bdd.entity.NetworkEntity;
import com.vmware.bdd.entity.NodeEntity;
import com.vmware.bdd.entity.NodeGroupEntity;
import com.vmware.bdd.exception.BddException;
import com.vmware.bdd.exception.ClusterConfigException;
import com.vmware.bdd.exception.ClusterHealServiceException;
import com.vmware.bdd.exception.ClusterManagerException;
import com.vmware.bdd.exception.VcProviderException;
import com.vmware.bdd.manager.intf.IClusterEntityManager;
import com.vmware.bdd.service.IClusterHealService;
import com.vmware.bdd.service.IClusteringService;
import com.vmware.bdd.service.IExecutionService;
import com.vmware.bdd.service.job.JobConstants;
import com.vmware.bdd.service.resmgmt.INetworkService;
import com.vmware.bdd.service.resmgmt.IResourceService;
import com.vmware.bdd.service.utils.VcResourceUtils;
import com.vmware.bdd.software.mgmt.plugin.intf.SoftwareManager;
import com.vmware.bdd.software.mgmt.plugin.model.HadoopStack;
import com.vmware.bdd.specpolicy.ClusterSpecFactory;
import com.vmware.bdd.specpolicy.CommonClusterExpandPolicy;
import com.vmware.bdd.spectypes.IronfanStack;
import com.vmware.bdd.spectypes.VcCluster;
import com.vmware.bdd.utils.AuAssert;
import com.vmware.bdd.utils.CommonUtil;
import com.vmware.bdd.utils.Constants;
import com.vmware.bdd.utils.JobUtils;
import com.vmware.bdd.utils.ValidationUtils;

public class ClusterManager {
   static final Logger logger = Logger.getLogger(ClusterManager.class);
   private ClusterConfigManager clusterConfigMgr;

   private INetworkService networkManager;

   private JobManager jobManager;

   private IResourceService resMgr;

   private IClusterEntityManager clusterEntityMgr;

   private RackInfoManager rackInfoMgr;

   private IClusteringService clusteringService;

   private IClusterHealService clusterHealService;

   private IExecutionService executionService;
   private SoftwareManagerCollector softwareManagerCollector;

   @Autowired
   private UnsupportedOpsBlocker opsBlocker;

   public JobManager getJobManager() {
      return jobManager;
   }

   public void setJobManager(JobManager jobManager) {
      this.jobManager = jobManager;
   }

   public ClusterConfigManager getClusterConfigMgr() {
      return clusterConfigMgr;
   }

   public void setClusterConfigMgr(ClusterConfigManager clusterConfigMgr) {
      this.clusterConfigMgr = clusterConfigMgr;
   }

   public INetworkService getNetworkManager() {
      return networkManager;
   }

   public void setNetworkManager(INetworkService networkManager) {
      this.networkManager = networkManager;
   }

   public IResourceService getResMgr() {
      return resMgr;
   }

   @Autowired
   public void setResMgr(IResourceService resMgr) {
      this.resMgr = resMgr;
   }

   public IClusterEntityManager getClusterEntityMgr() {
      return clusterEntityMgr;
   }

   @Autowired
   public void setClusterEntityMgr(IClusterEntityManager clusterEntityMgr) {
      this.clusterEntityMgr = clusterEntityMgr;
   }

   public RackInfoManager getRackInfoMgr() {
      return rackInfoMgr;
   }

   @Autowired
   public void setRackInfoMgr(RackInfoManager rackInfoMgr) {
      this.rackInfoMgr = rackInfoMgr;
   }

   public IClusteringService getClusteringService() {
      return clusteringService;
   }

   @Autowired
   public void setClusteringService(IClusteringService clusteringService) {
      this.clusteringService = clusteringService;
   }

   public IClusterHealService getClusterHealService() {
      return clusterHealService;
   }

   @Autowired
   public void setClusterHealService(IClusterHealService clusterHealService) {
      this.clusterHealService = clusterHealService;
   }

   public IExecutionService getExecutionService() {
      return executionService;
   }

   @Autowired
   public void setExecutionService(IExecutionService executionService) {
      this.executionService = executionService;
   }

   @Autowired
   public void setSoftwareManagerCollector(SoftwareManagerCollector softwareManagerCollector) {
        this.softwareManagerCollector = softwareManagerCollector;
   }

   public Map<String, Object> getClusterConfigManifest(String clusterName,
         List<String> targets, boolean needAllocIp) {
      ClusterCreate clusterConfig =
            clusterConfigMgr.getClusterConfig(clusterName, needAllocIp);
      Map<String, String> cloudProvider = resMgr.getCloudProviderAttributes();

      ClusterRead read = clusterEntityMgr.toClusterRead(clusterName, true);

      Map<String, Object> attrs = new HashMap<String, Object>();

      if (Constants.IRONFAN.equalsIgnoreCase(clusterConfig.getAppManager())) {
         SoftwareManager softwareManager = clusterConfigMgr.getSoftwareManager(clusterConfig.getAppManager());
         IronfanStack stack = (IronfanStack)filterDistroFromAppManager(softwareManager, clusterConfig.getDistro());
         CommonClusterExpandPolicy.expandDistro(clusterConfig, stack);
        
         attrs.put("cloud_provider", cloudProvider);
         attrs.put("cluster_definition", clusterConfig);        
      }

      if (read != null) {
         attrs.put("cluster_data", read);
      }
      if (targets != null && !targets.isEmpty()) {
         attrs.put("targets", targets);
      }
      return attrs;
   }

   private void writeJsonFile(Map<String, Object> clusterConfig, File file) {
      Gson gson =
            new GsonBuilder().excludeFieldsWithoutExposeAnnotation()
                  .setPrettyPrinting().create();
      String jsonStr = gson.toJson(clusterConfig);

      AuAssert.check(jsonStr != null);
      logger.debug("writing cluster manifest in json " + jsonStr + " to file "
            + file);
      BufferedWriter out = null;
      try {
         out =
               new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                     file), "UTF-8"));
         out.write(jsonStr);
      } catch (IOException ex) {
         logger.error(ex.getMessage()
               + "\n failed to write cluster manifest to file " + file);
         throw BddException.INTERNAL(ex, "Failed to write cluster manifest.");
      } finally {
         if (out != null) {
            try {
               out.close();
            } catch (IOException e) {
               logger.error("falied to close writer" + out, e);
            }
         }
      }
   }

   public File writeClusterSpecFile(String targetName, File workDir,
         boolean needAllocIp) {
      String clusterName = targetName.split("-")[0];
      String fileName = clusterName + ".json";
      List<String> targets = new ArrayList<String>(1);
      targets.add(targetName);
      Map<String, Object> clusterConfig =
            getClusterConfigManifest(clusterName, targets, needAllocIp);

      File file = new File(workDir, fileName);
      writeJsonFile(clusterConfig, file);

      return file;
   }

   private void refreshClusterStatus(String clusterName) {
      List<String> clusterNames = new ArrayList<String>();
      clusterNames.add(clusterName);
      refreshClusterStatus(clusterNames);
   }

   private void refreshClusterStatus(List<String> clusterNames) {
      for (String clusterName : clusterNames) {
         Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
         param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(
               new Date()));
         param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
               clusterName));
         JobParameters jobParameters = new JobParameters(param);
         try {
            long jobExecutionId =
                  jobManager.runJob(JobConstants.QUERY_CLUSTER_JOB_NAME,
                        jobParameters);
            TaskRead status = jobManager.getJobExecutionStatus(jobExecutionId);
            while (status.getStatus() != TaskRead.Status.COMPLETED
                  && status.getStatus() != TaskRead.Status.FAILED
                  && status.getStatus() != TaskRead.Status.ABANDONED
                  && status.getStatus() != TaskRead.Status.STOPPED) {
               Thread.sleep(1000);
               status = jobManager.getJobExecutionStatus(jobExecutionId);
            }
         } catch (Exception ex) {
            logger.error("failed to run query cluster job: " + clusterName, ex);
         }
      }
   }

   public ClusterRead getClusterByName(String clusterName, boolean realTime) {
      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      // return the latest data from db
      if (realTime
            && cluster.getStatus().isSyncServiceStatus()) {
            // for not running cluster, we don't sync up status from chef
         refreshClusterStatus(clusterName);
      }

      return clusterEntityMgr.toClusterRead(clusterName);
   }

   public ClusterCreate getClusterSpec(String clusterName) {
      ClusterCreate spec = clusterConfigMgr.getClusterConfig(clusterName);
      spec.setVcClusters(null);
      spec.setTemplateId(null);
      spec.setDistroMap(null);
      spec.setSharedDatastorePattern(null);
      spec.setLocalDatastorePattern(null);
      spec.setNetworkings(null);
      spec.setRpNames(null);
      spec.setDsNames(null);
      spec.setNetworkConfig(null);
      spec.setName(null);
      spec.setDistro(null);
      spec.setValidateConfig(null);
      spec.setSpecFile(null);
      spec.setTopologyPolicy(null);
      spec.setHostToRackMap(null);
      spec.setHttpProxy(null);
      spec.setNoProxy(null);
      spec.setDistroVendor(null);
      spec.setDistroVersion(null);
      spec.setPassword(null);
      NodeGroupCreate[] groups = spec.getNodeGroups();
      if (groups != null) {
         for (NodeGroupCreate group : groups) {
            group.setVcClusters(null);
            group.getStorage().setImagestoreNamePattern(null);
            group.getStorage().setDiskstoreNamePattern(null);
            group.setVmFolderPath(null);
            group.getStorage().setSplitPolicy(null);
            group.getStorage().setControllerType(null);
            group.getStorage().setAllocType(null);
            if (group.getPlacementPolicies() != null) {
               List<GroupAssociation> associations =
                     group.getPlacementPolicies().getGroupAssociations();
               if (associations != null && associations.isEmpty()) {
                  group.getPlacementPolicies().setGroupAssociations(null);
               }
            }
         }
      }
      return spec;
   }

   public List<ClusterRead> getClusters(Boolean realTime) {
      List<ClusterRead> clusters = new ArrayList<ClusterRead>();
      List<ClusterEntity> clusterEntities = clusterEntityMgr.findAllClusters();
      for (ClusterEntity entity : clusterEntities) {
         clusters.add(getClusterByName(entity.getName(), realTime));
      }
      return clusters;
   }

   public Long createCluster(ClusterCreate createSpec) throws Exception {
      SoftwareManager softMgr = softwareManagerCollector.getSoftwareManager(createSpec.getAppManager());
      // @ Todo if specify hadoop stack, we can get hadoop stack by stack name. Otherwise, we will get a default hadoop stack.
      HadoopStack stack = clusterConfigMgr.filterDistroFromAppManager(softMgr, createSpec.getDistro());

      // if the distro is not specified by the REST Client, add it.
      if(CommonUtil.isBlank(createSpec.getDistro())) {
         createSpec.setDistro(stack.getDistro());
      }
      createSpec.setDistroVendor(stack.getVendor());
      createSpec.setDistroVersion(stack.getFullVersion());

      // create auto rps if vc cluster/rp is specified
      createAutoRps(createSpec);
      ClusterCreate clusterSpec =
            ClusterSpecFactory.getCustomizedSpec(createSpec, softMgr.getType());
      createSpec.verifyClusterNameLength();
      clusterSpec.validateNodeGroupNames();
      //Check the cpu, memory max configuration according vm hardware version
      if (clusterSpec != null && clusterSpec.getNodeGroups() != null) {
         for (NodeGroupCreate ng : clusterSpec.getNodeGroups()) {
            String templateVmId = clusteringService.getTemplateVmId();
            if (templateVmId != null) {
               VcResourceUtils.checkVmMaxConfiguration(templateVmId,
                     ng.getCpuNum() == null ? 0 : ng.getCpuNum(),
                     ng.getMemCapacityMB() == null ? 0 : ng.getMemCapacityMB());
            }
         }
      }
      String name = clusterSpec.getName();
      logger.info("ClusteringService, creating cluster " + name);

      List<String> dsNames = getUsedDS(clusterSpec.getDsNames());
      if (dsNames.isEmpty()) {
         throw ClusterConfigException.NO_DATASTORE_ADDED();
      }
      List<VcCluster> vcClusters = getUsedVcClusters(clusterSpec.getRpNames());
      if (vcClusters == null || vcClusters.isEmpty()) {
         throw ClusterConfigException.NO_RESOURCE_POOL_ADDED();
      }
      // validate accessibility
      validateDatastore(dsNames, vcClusters);
      validateNetworkAccessibility(createSpec.getName(), createSpec.getNetworkNames(), vcClusters);
      //save configuration into meta-db, and extend configuration using default spec
      clusterConfigMgr.createClusterConfig(clusterSpec);
      clusterEntityMgr.updateClusterStatus(name, ClusterStatus.PROVISIONING);
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.PROVISION_ERROR.name()));
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            createSpec.getName()));
      param.put(JobConstants.VERIFY_NODE_STATUS_SCOPE_PARAM, new JobParameter(
            JobConstants.CLUSTER_NODE_SCOPE_VALUE));
      JobParameters jobParameters = new JobParameters(param);
      return jobManager.runJob(JobConstants.CREATE_CLUSTER_JOB_NAME,
            jobParameters);
   }

   private void createAutoRps(ClusterCreate createSpec) {
      if (createSpec.getVcClusters() == null) {
         return;
      }
      // user specify vc resource pools directly, create auto rp in meta db dynamically
      List<VcCluster> vcClusters = createSpec.getVcClusters();
      List<String> rpNames =
            clusterConfigMgr.getRpMgr().addAutoResourcePools(vcClusters, true);
      logger.info("added automation resource pools: " + rpNames);
      createSpec.setRpNames(rpNames);
   }

   private void validateNetworkAccessibility(final String clusterName,
         List<String> networkList, List<VcCluster> clusters) {
      if (networkList == null || networkList.isEmpty()) {
         throw ClusterConfigException.NETWORK_IS_NOT_SPECIFIED(clusterName);
      }
      Set<String> networkNames = new HashSet<String>();
      networkNames.addAll(networkList);
      logger.info("start to validate network if exsit in db.");
      verifyNetworkNamesExsitInDB(networkNames, clusterName);
      logger.info("start to validate network accessibility.");
      if (!resMgr.isNetworkAccessibleByCluster(networkList, clusters)) {
         List<String> clusterNames = new ArrayList<String>();
         for (VcCluster cluster : clusters) {
            clusterNames.add(cluster.getName());
         }
         throw ClusterConfigException.NETWORK_UNACCESSIBLE(networkList,
               clusterNames);
      }
   }

   private void validateDatastore(List<String> dsNames, List<VcCluster> clusters) {
      // validate if there is any datastore is accessible by one cluster
      logger.info("start to validate accessibility for datastores: " + dsNames
            + ", and clusters: " + clusters);
      boolean found = false;
      for (String dsName : dsNames) {
         for (VcCluster vcCluster : clusters) {
            if (resMgr.isDatastoreAccessibleByCluster(dsName,
                  vcCluster.getName())) {
               found = true;
               break;
            }
         }
      }
      if (!found) {
         // no any datastore is accessible by specified cluster
         List<String> vcClusterNames = new ArrayList<String>();
         for (VcCluster vcCluster : clusters) {
            vcClusterNames.add(vcCluster.getName());
         }
         throw ClusterConfigException.DATASTORE_UNACCESSIBLE(vcClusterNames,
             dsNames);
      }
   }

   private List<String> getUsedDS(List<String> specifiedDsNames) {
      if (specifiedDsNames == null || specifiedDsNames.isEmpty()) {
         specifiedDsNames = new ArrayList<String>();
         specifiedDsNames.addAll(clusterConfigMgr.getDatastoreMgr()
               .getAllDatastoreNames());
      }
      return specifiedDsNames;
   }

   private List<VcCluster> getUsedVcClusters(List<String> rpNames) {
      List<VcCluster> clusters = null;
      if (rpNames == null || rpNames.isEmpty()) {
         clusters = clusterConfigMgr.getRpMgr().getAllVcResourcePool();
      } else {
         clusters = new ArrayList<VcCluster>();
         StringBuffer nonexistentRpNames = new StringBuffer();
         for (String rpName : rpNames) {
            List<VcCluster> vcClusters =
                  clusterConfigMgr.getRpMgr().getVcResourcePoolByName(rpName);

            if (vcClusters == null) {
               nonexistentRpNames.append(rpName).append(",");
            } else {
               clusters.addAll(vcClusters);
            }
         }
         if (nonexistentRpNames.length() > 0) {
            nonexistentRpNames.delete(nonexistentRpNames.length()-1, nonexistentRpNames.length());
            throw VcProviderException
                  .RESOURCE_POOL_NOT_FOUND(nonexistentRpNames.toString());
         }
      }
      return clusters;
   }

   public Long configCluster(String clusterName, ClusterCreate createSpec)
         throws Exception {
      opsBlocker.blockUnsupportedOpsByCluster("configCluster", clusterName);

      logger.info("ClusterManager, config cluster " + clusterName);
      ClusterEntity cluster;

      if ((cluster = clusterEntityMgr.findByName(clusterName)) == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      if (!cluster.getStatus().isActiveServiceStatus()
            && !ClusterStatus.CONFIGURE_ERROR.equals(cluster.getStatus())
            && !ClusterStatus.SERVICE_STOPPED.equals(cluster.getStatus())) {
         logger.error("can not config cluster: " + clusterName + ", "
               + cluster.getStatus());
         throw ClusterManagerException.UPDATE_NOT_ALLOWED_ERROR(clusterName,
               "To update a cluster, its status must be RUNNING, CONFIGURE_ERROR or SERVICE_ERROR");
      }
      clusterConfigMgr.updateAppConfig(clusterName, createSpec);

      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.CONFIGURE_ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName,
            ClusterStatus.CONFIGURING);
      clusterEntityMgr.cleanupActionError(clusterName);
      try {
         return jobManager.runJob(JobConstants.CONFIG_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to configure cluster " + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.CONFIGURE_ERROR);
         throw e;
      }
   }

   public Long resumeClusterCreation(String clusterName) throws Exception {
      logger.info("ClusterManager, resume cluster creation " + clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);

      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      if (cluster.getStatus() != ClusterStatus.PROVISION_ERROR) {
         logger.error("can not resume creation of cluster: " + clusterName
               + ", " + cluster.getStatus());
         throw ClusterManagerException.UPDATE_NOT_ALLOWED_ERROR(clusterName,
               "To update a cluster, its status must be PROVISION_ERROR or SERVICE_ERROR");
      }
      List<String> dsNames = getUsedDS(cluster.getVcDatastoreNameList());
      if (dsNames.isEmpty()) {
         throw ClusterConfigException.NO_RESOURCE_POOL_ADDED();
      }
      List<VcCluster> vcClusters = getUsedVcClusters(cluster.getVcRpNameList());
      if (vcClusters.isEmpty()) {
         throw ClusterConfigException.NO_DATASTORE_ADDED();
      }
      // validate accessibility
      validateDatastore(dsNames, vcClusters);
      validateNetworkAccessibility(cluster.getName(), cluster.fetchNetworkNameList(), vcClusters);
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.PROVISION_ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName,
            ClusterStatus.PROVISIONING);
      clusterEntityMgr.cleanupActionError(clusterName);
      try {
         return jobManager.runJob(JobConstants.RESUME_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to resume cluster creation for cluster "
               + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.PROVISION_ERROR);
         throw e;
      }
   }

   public Long deleteClusterByName(String clusterName) throws Exception {
      logger.info("ClusterManager, deleting cluster " + clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);

      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      if (!cluster.getStatus().isStableStatus()) {
         logger.error("cluster: " + clusterName
               + " cannot be deleted, it is in " + cluster.getStatus()
               + " status");
         throw ClusterManagerException.DELETION_NOT_ALLOWED_ERROR(clusterName,
               "To delete a cluster, its status must be RUNNING, STOPPED, ERROR, PROVISION_ERROR, CONFIGURE_ERROR or UPGRADE_ERROR");
      }
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.DELETING);
      clusterEntityMgr.cleanupActionError(clusterName);
      try {
         return jobManager.runJob(JobConstants.DELETE_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to delete cluster " + clusterName, e);
         cluster = clusterEntityMgr.findByName(clusterName);
         if (cluster != null) {
            clusterEntityMgr.updateClusterStatus(clusterName,
                  ClusterStatus.ERROR);
         }
         throw e;
      }
   }

   public Long upgradeClusterByName(String clusterName) throws Exception {
      logger.info("ClusterManager, upgrading cluster " + clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);

      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      if (!clusterEntityMgr.needUpgrade(clusterName)) {
         logger.error("cluster " + clusterName + " is the latest version already");
         throw ClusterManagerException.ALREADY_LATEST_VERSION_ERROR(clusterName);
      }

      if (!cluster.getStatus().isStableStatus()) {
         logger.error("cluster: " + clusterName
               + " cannot be upgraded, it is in " + cluster.getStatus()
               + " status");
         throw ClusterManagerException.UPGRADE_NOT_ALLOWED_ERROR(clusterName,
               "To upgrade a cluster, its status must be RUNNING, STOPPED, ERROR, CONFIGURE_ERROR or UPGRADE_ERROR");
      }
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM, new JobParameter(ClusterStatus.UPGRADE_ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.storeClusterLastStatus(clusterName);
      clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.UPGRADING);
      clusterEntityMgr.updateNodesActionForUpgrade(clusterName, Constants.NODE_ACTION_UPGRADING);
      clusterEntityMgr.cleanupErrorForClusterUpgrade(clusterName);
      try {
         return jobManager.runJob(JobConstants.UPGRADE_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to upgrade cluster " + clusterName, e);
         cluster = clusterEntityMgr.findByName(clusterName);
         if (cluster != null) {
            clusterEntityMgr.updateClusterStatus(clusterName,
                  ClusterStatus.UPGRADE_ERROR);
         }
         throw e;
      }
   }

   public Long startCluster(String clusterName) throws Exception {
      logger.info("ClusterManager, starting cluster " + clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      if (cluster.getStatus().isActiveServiceStatus()) {
         logger.error("cluster " + clusterName + " is running already");
         throw ClusterManagerException.ALREADY_STARTED_ERROR(clusterName);
      }

      if (!ClusterStatus.STOPPED.equals(cluster.getStatus())
            && !ClusterStatus.ERROR.equals(cluster.getStatus())) {
         logger.error("cluster " + clusterName
               + " cannot be started, it is in " + cluster.getStatus()
               + " status");
         throw ClusterManagerException.START_NOT_ALLOWED_ERROR(clusterName,
               "To start a cluster, its status must be STOPPED or ERROR");
      }

      cluster.setVhmTargetNum(-1);
      clusterEntityMgr.update(cluster);
      clusterEntityMgr.cleanupActionError(clusterName);
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.STARTING);
      try {
         return jobManager.runJob(JobConstants.START_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to start cluster " + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.ERROR);
         throw e;
      }
   }

   public Long stopCluster(String clusterName) throws Exception {
      logger.info("ClusterManager, stopping cluster " + clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      if (ClusterStatus.STOPPED.equals(cluster.getStatus())) {
         logger.error("cluster " + clusterName + " is stopped already");
         throw ClusterManagerException.ALREADY_STOPPED_ERROR(clusterName);
      }

      if (!cluster.getStatus().isActiveServiceStatus()
            && !ClusterStatus.SERVICE_STOPPED.equals(cluster.getStatus())
            && !ClusterStatus.ERROR.equals(cluster.getStatus())
            && !ClusterStatus.SERVICE_WARNING.equals(cluster.getStatus())) {
         logger.error("cluster " + clusterName
               + " cannot be stopped, it is in " + cluster.getStatus()
               + " status");
         throw ClusterManagerException.STOP_NOT_ALLOWED_ERROR(clusterName,
               "To stop a cluster, its status must be RUNNING, ERROR, SERVICE_WARNING or SERVICE_STOPPED");
      }
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.STOPPED.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.ERROR.name()));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.STOPPING);
      clusterEntityMgr.cleanupActionError(clusterName);
      try {
         return jobManager.runJob(JobConstants.STOP_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to stop cluster " + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.ERROR);
         throw e;
      }
   }

   public Long resizeCluster(String clusterName, String nodeGroupName,
         int instanceNum) throws Exception {
      logger.info("ClusterManager, updating node group " + nodeGroupName
            + " in cluster " + clusterName + " reset instance number to "
            + instanceNum);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      List<String> dsNames = getUsedDS(cluster.getVcDatastoreNameList());
      if (dsNames.isEmpty()) {
         throw ClusterConfigException.NO_RESOURCE_POOL_ADDED();
      }

      List<VcCluster> vcClusters = getUsedVcClusters(cluster.getVcRpNameList());
      if (vcClusters.isEmpty()) {
         throw ClusterConfigException.NO_DATASTORE_ADDED();
      }

      // validate accessibility
      validateDatastore(dsNames, vcClusters);
      validateNetworkAccessibility(cluster.getName(), cluster.fetchNetworkNameList(), vcClusters);

      NodeGroupEntity group =
            clusterEntityMgr.findByName(cluster, nodeGroupName);
      if (group == null) {
         logger.error("nodegroup " + nodeGroupName + " of cluster "
               + clusterName + " does not exist");
         throw ClusterManagerException.NODEGROUP_NOT_FOUND_ERROR(nodeGroupName);
      }

      AuAssert.check(!group.getRoleNameList().isEmpty(), "roles should not be empty");
      SoftwareManager softMgr =
            softwareManagerCollector
                  .getSoftwareManager(cluster.getAppManager());
      List<String> unsupportedRoles =
            softMgr.validateScaling(clusterEntityMgr
                  .toNodeGroupInfo(clusterName, nodeGroupName));
      if (!unsupportedRoles.isEmpty()) {
         logger.info("can not resize node group with role: " + unsupportedRoles);
         throw ClusterManagerException.ROLES_NOT_SUPPORTED(unsupportedRoles);
      }

      if (!cluster.getStatus().isActiveServiceStatus()) {
         logger.error("cluster " + clusterName
               + " can be resized only in RUNNING status, it is now in "
               + cluster.getStatus() + " status");
         throw ClusterManagerException.UPDATE_NOT_ALLOWED_ERROR(clusterName,
               "To update a cluster, its status must be RUNNING");
      }

      if (instanceNum <= group.getDefineInstanceNum()) {
         logger.error("node group " + nodeGroupName
               + " cannot be shrinked from " + group.getDefineInstanceNum()
               + " to " + instanceNum + " nodes");
         throw ClusterManagerException.SHRINK_OP_NOT_SUPPORTED(nodeGroupName,
               instanceNum, group.getDefineInstanceNum());
      }

      Integer instancePerHost = group.getInstancePerHost();
      if (instancePerHost != null && instanceNum % instancePerHost != 0) {
         throw BddException
               .INVALID_PARAMETER(
                     "instance number",
                     new StringBuilder(100)
                           .append(instanceNum)
                           .append(
                                 ".instanceNum must be evenly divisible by instancePerHost")
                           .toString());
      }

      ValidationUtils.validHostNumber(clusterEntityMgr, group, instanceNum);
      ValidationUtils.hasEnoughHost(rackInfoMgr, clusterEntityMgr, group,
            instanceNum);

      int oldInstanceNum = group.getDefineInstanceNum();
      group.setDefineInstanceNum(instanceNum);

      clusterEntityMgr.update(group);
      clusterEntityMgr.cleanupActionError(clusterName);
      // create job
      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      param.put(JobConstants.GROUP_NAME_JOB_PARAM, new JobParameter(
            nodeGroupName));
      param.put(JobConstants.GROUP_INSTANCE_NEW_NUMBER_JOB_PARAM,
            new JobParameter(Long.valueOf(instanceNum)));
      param.put(JobConstants.GROUP_INSTANCE_OLD_NUMBER_JOB_PARAM,
            new JobParameter(Long.valueOf(oldInstanceNum)));
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.VERIFY_NODE_STATUS_SCOPE_PARAM, new JobParameter(
            JobConstants.GROUP_NODE_SCOPE_VALUE));
      JobParameters jobParameters = new JobParameters(param);
      clusterEntityMgr.updateClusterStatus(clusterName, ClusterStatus.UPDATING);
      try {
         return jobManager.runJob(JobConstants.RESIZE_CLUSTER_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to resize cluster " + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.RUNNING);
         group.setDefineInstanceNum(oldInstanceNum);
         clusterEntityMgr.update(group);
         throw e;
      }
   }

   /**
    * set cluster parameters synchronously
    *
    * @param clusterName
    * @param activeComputeNodeNum
    * @param minComputeNodeNum
    * @param maxComputeNodeNum
    * @param enableAuto
    * @param ioPriority
    * @return
    * @throws Exception
    */
   @SuppressWarnings("unchecked")
   public List<String> syncSetParam(String clusterName,
         Integer activeComputeNodeNum, Integer minComputeNodeNum, Integer maxComputeNodeNum,
         Boolean enableAuto, Priority ioPriority) throws Exception {

      //allow set ioshare only.
      if(enableAuto != null || activeComputeNodeNum != null ||
            maxComputeNodeNum != null || minComputeNodeNum != null) {
         opsBlocker.blockUnsupportedOpsByCluster("syncSetElasticity", clusterName);
      }

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      ClusterRead clusterRead = getClusterByName(clusterName, false);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      clusterEntityMgr.cleanupActionError(clusterName);
      //update vm ioshares
      if (ioPriority != null) {
         prioritizeCluster(clusterName, ioPriority);
      }

      // as prioritizeCluster will update clusterEntity, here we need to refresh to avoid overriding
      cluster = clusterEntityMgr.findByName(clusterName);

      if (enableAuto != null && enableAuto != cluster.getAutomationEnable()) {
         if (enableAuto && cluster.getDistroVendor().equalsIgnoreCase(Constants.MAPR_VENDOR)) {
            logger.error("cluster " + clusterName + " is a MAPR distro, which cannot be auto scaled");
            throw BddException.NOT_ALLOWED_SCALING("cluster", clusterName);
         }
         cluster.setAutomationEnable(enableAuto);
      }

      if (minComputeNodeNum != null
            && minComputeNodeNum != cluster.getVhmMinNum()) {
         cluster.setVhmMinNum(minComputeNodeNum);
      }

      if (maxComputeNodeNum != null
            && maxComputeNodeNum != cluster.getVhmMaxNum()) {
         cluster.setVhmMaxNum(maxComputeNodeNum);
      }

      List<String> nodeGroupNames = new ArrayList<String>();
      if ((enableAuto != null || minComputeNodeNum != null || maxComputeNodeNum != null || activeComputeNodeNum != null)
            && !clusterRead.validateSetManualElasticity(nodeGroupNames)) {
         throw BddException.INVALID_PARAMETER("cluster", clusterName);
      }

      if (activeComputeNodeNum != null) {
         if (!activeComputeNodeNum.equals(cluster.getVhmTargetNum())) {
            cluster.setVhmTargetNum(activeComputeNodeNum);
         }
      }

      //enableAuto is only set during cluster running status and
      //other elasticity attributes are only set during cluster running/stop status
      if ((enableAuto != null)
            && !cluster.getStatus().isActiveServiceStatus()) {
         logger.error("Cannot change elasticity mode, when cluster "
               + clusterName + " is in " + cluster.getStatus() + " status");
         throw ClusterManagerException.SET_AUTO_ELASTICITY_NOT_ALLOWED_ERROR(
               clusterName, "The cluster's status must be RUNNING");
      }
      if (!cluster.getStatus().isActiveServiceStatus()
            && !ClusterStatus.SERVICE_STOPPED.equals(cluster.getStatus())
            && !ClusterStatus.STOPPED.equals(cluster.getStatus())) {
         logger.error("Cannot change elasticity parameters, when cluster "
               + clusterName + " is in " + cluster.getStatus() + " status");
         throw ClusterManagerException.SET_AUTO_ELASTICITY_NOT_ALLOWED_ERROR(
               clusterName, "The cluster's status must be RUNNING, SERVICE_WARNING, SERVICE_ERROR or STOPPED");
      }

      clusterEntityMgr.update(cluster);

      //update vhm extra config file
      if (enableAuto != null || minComputeNodeNum != null || maxComputeNodeNum != null) {
         boolean success =
               clusteringService.setAutoElasticity(clusterName, false);
         if (!success) {
        throw ClusterManagerException
            .FAILED_TO_SET_AUTO_ELASTICITY_ERROR(clusterName,
                "Could not update elasticity configuration file");
         }
      }

      //waitForManual if switch to Manual and targetNodeNum is null
      if (enableAuto != null && !enableAuto
            && cluster.getVhmTargetNum() == null) {
         JobUtils.waitForManual(clusterName, executionService);
      }

      return nodeGroupNames;
   }

   /**
    * set cluster parameters asynchronously
    *
    * @param clusterName
    * @param activeComputeNodeNum
    * @param minComputeNodeNum
    * @param maxComputeNodeNum
    * @param enableAuto
    * @param ioPriority
    * @return
    * @throws Exception
    */
   public Long asyncSetParam(String clusterName, Integer activeComputeNodeNum,
         Integer minComputeNodeNum, Integer maxComputeNodeNum, Boolean enableAuto,
         Priority ioPriority)
         throws Exception {
      //allow set ioshare only.
      if(enableAuto != null || activeComputeNodeNum != null ||
            maxComputeNodeNum != null || minComputeNodeNum != null) {
         opsBlocker.blockUnsupportedOpsByCluster("asyncSetElasticity", clusterName);
      }

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      syncSetParam(clusterName, activeComputeNodeNum, minComputeNodeNum, maxComputeNodeNum,
            enableAuto, ioPriority);

      ClusterRead cluster = getClusterByName(clusterName, false);
      // cluster must be running status
      if (!cluster.getStatus().isActiveServiceStatus()) {
         String msg = "Cluster "+ clusterName +" is not running.";
         logger.error(msg);
         throw ClusterManagerException
               .SET_MANUAL_ELASTICITY_NOT_ALLOWED_ERROR(msg);
      }

      Map<String, JobParameter> param = new TreeMap<String, JobParameter>();
      param.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter(new Date()));
      param.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter(
            clusterName));
      // TODO: transfer SET_TARGET/UNLIMIT from CLI directly
      if (activeComputeNodeNum == null) {
         param.put(JobConstants.VHM_ACTION_JOB_PARAM, new JobParameter(
               LimitInstruction.actionWaitForManual));
      } else if (activeComputeNodeNum == -1) {
         param.put(JobConstants.VHM_ACTION_JOB_PARAM, new JobParameter(
               LimitInstruction.actionUnlimit));
      } else {
         param.put(JobConstants.VHM_ACTION_JOB_PARAM, new JobParameter(
               LimitInstruction.actionSetTarget));
      }
      if (activeComputeNodeNum != null) {
         param.put(JobConstants.ACTIVE_COMPUTE_NODE_NUMBER_JOB_PARAM,
               new JobParameter(Long.valueOf(activeComputeNodeNum)));
      }
      param.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      param.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM,
            new JobParameter(ClusterStatus.RUNNING.name()));
      JobParameters jobParameters = new JobParameters(param);
      try {
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.VHM_RUNNING);
         return jobManager.runJob(JobConstants.SET_MANUAL_ELASTICITY_JOB_NAME,
               jobParameters);
      } catch (Exception e) {
         logger.error("Failed to set manual elasticity for cluster "
               + clusterName, e);
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.RUNNING);
         throw e;
      }
   }

   /*
    * Change the disk I/O priority of the cluster or a node group
    */
   public void prioritizeCluster(String clusterName, Priority ioShares)
         throws Exception {
      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }

      if (ioShares.equals(cluster.getIoShares())) {
         return;
      }

      logger.info("Change all nodes' disk I/O shares to " + ioShares
            + " in the cluster " + clusterName);

      // cluster must be in RUNNING or STOPPEED status
      if (!cluster.getStatus().isActiveServiceStatus()
            && !ClusterStatus.STOPPED.equals(cluster.getStatus())) {
         String msg = "The cluster's status must be RUNNING or STOPPED";
         logger.error(msg);
         throw ClusterManagerException.PRIORITIZE_CLUSTER_NOT_ALLOWED_ERROR(
               clusterName, msg);
      }

      // get target nodeuster
      List<NodeEntity> targetNodes;
      targetNodes = clusterEntityMgr.findAllNodes(clusterName);

      if (targetNodes.isEmpty()) {
         throw ClusterManagerException.PRIORITIZE_CLUSTER_NOT_ALLOWED_ERROR(
               clusterName, "Target node set is empty");
      }

      clusterEntityMgr.cleanupActionError(clusterName);
      // call clustering service to set the io shares
      Map<String, String> failedNodes =
            clusteringService
                  .configIOShares(clusterName, targetNodes, ioShares);
      if (failedNodes.isEmpty()) {
         logger.info("configured " + targetNodes.size() + " nodes' IO share level to "
               + ioShares.toString());
      } else {
         // update node table
         for (String name : failedNodes.keySet()) {
            NodeEntity node = clusterEntityMgr.findNodeByName(name);
            node.setActionFailed(true);
            node.setErrMessage(failedNodes.get(name));
            clusterEntityMgr.update(node);
         }
         throw ClusterManagerException.PRIORITIZE_CLUSTER_FAILED(clusterName,
               failedNodes.size(), targetNodes.size());
      }

      // update io shares in db
      cluster.setIoShares(ioShares);
      clusterEntityMgr.update(cluster);
   }

   public Long fixDiskFailures(String clusterName, String groupName)
         throws Exception {
      opsBlocker.blockUnsupportedOpsByCluster("fixDisk", clusterName);

      ClusterEntity cluster = clusterEntityMgr.findByName(clusterName);
      if (cluster == null) {
         logger.error("cluster " + clusterName + " does not exist");
         throw BddException.NOT_FOUND("Cluster", clusterName);
      }
      SoftwareManager softMgr =
         softwareManagerCollector
               .getSoftwareManager(cluster.getAppManager());

      ValidationUtils.validateVersion(clusterEntityMgr, clusterName);

      ClusterStatus oldStatus = cluster.getStatus();

      if (!oldStatus.isActiveServiceStatus()) {
         throw ClusterHealServiceException.NOT_SUPPORTED(clusterName,
               "The cluster status must be RUNNING");
      }

      List<NodeGroupEntity> nodeGroups;

      if (groupName != null) {
         NodeGroupEntity nodeGroup =
               clusterEntityMgr.findByName(clusterName, groupName);
         if (nodeGroup == null) {
            logger.error("node group " + groupName + " does not exist");
            throw BddException.NOT_FOUND("group", groupName);
         }

         nodeGroups = new ArrayList<NodeGroupEntity>(1);
         nodeGroups.add(nodeGroup);
      } else {
         nodeGroups = clusterEntityMgr.findAllGroups(clusterName);
      }

      // only fix worker nodes that have datanode or tasktracker roles
      boolean workerNodesFound = false;
      JobParametersBuilder parametersBuilder = new JobParametersBuilder();
      List<JobParameters> jobParameterList = new ArrayList<JobParameters>();

      for (NodeGroupEntity nodeGroup : nodeGroups) {
         List<String> roles = nodeGroup.getRoleNameList();

         // TODO: more fine control on node roles
         if (softMgr.hasMgmtRole(roles)) {
            logger.info("node group " + nodeGroup.getName()
                  + " contains management roles, pass it");
            continue;
         }

         workerNodesFound = true;
         for (NodeEntity node : clusterEntityMgr.findAllNodes(clusterName,
               nodeGroup.getName())) {
            if (node.isObsoleteNode()) {
               logger.info("Ingore node " + node.getVmName()
                     + ", for it violate VM name convention."
                     + "or exceed defined group instance number. ");
               continue;
            }
            if (clusterHealService.hasBadDisks(node.getVmName())) {
               logger.warn("node " + node.getVmName()
                     + " has bad disks. Fixing it..");

               boolean vmPowerOn =
                     (node.getStatus().ordinal() != NodeStatus.POWERED_OFF
                           .ordinal());

               JobParameters nodeParameters =
                     parametersBuilder
                           .addString(JobConstants.CLUSTER_NAME_JOB_PARAM,
                                 clusterName)
                           .addString(JobConstants.TARGET_NAME_JOB_PARAM,
                                 node.getVmName())
                           .addString(JobConstants.GROUP_NAME_JOB_PARAM,
                                 nodeGroup.getName())
                           .addString(JobConstants.SUB_JOB_NODE_NAME,
                                 node.getVmName())
                           .addString(JobConstants.IS_VM_POWER_ON,
                                 String.valueOf(vmPowerOn)).toJobParameters();
               jobParameterList.add(nodeParameters);
            }
         }
      }

      if (!workerNodesFound) {
         throw ClusterHealServiceException
               .NOT_SUPPORTED(clusterName,
                     "only support fixing disk failures for worker/non-management nodes");
      }

      // all target nodes are healthy, simply return
      if (jobParameterList.isEmpty()) {
         logger.info("all target nodes are healthy, simply return");
         throw ClusterHealServiceException.NOT_NEEDED(clusterName);
      }

      try {
         clusterEntityMgr.updateClusterStatus(clusterName,
               ClusterStatus.MAINTENANCE);
         clusterEntityMgr.cleanupActionError(clusterName);
         return jobManager.runSubJobForNodes(
               JobConstants.FIX_NODE_DISK_FAILURE_JOB_NAME, jobParameterList,
               clusterName, oldStatus, oldStatus);
      } catch (Exception e) {
         logger.error("failed to fix disk failures, " + e.getMessage());
         throw e;
      }
   }

   public Map<String, String> getRackTopology(String clusterName, String topology) {
     ClusterRead cluster = clusterEntityMgr.toClusterRead(clusterName);
      Set<String> hosts = new HashSet<String>();
      Set<String> racks = new HashSet<String>();
      List<NodeRead> nodes = new ArrayList<NodeRead>();
      for (NodeGroupRead nodeGroup : cluster.getNodeGroups()) {
         for (NodeRead node : nodeGroup.getInstances()) {
            hosts.add(node.getHostName());
            racks.add(node.getRack());
            nodes.add(node);
         }
      }

      if (CommonUtil.isBlank(topology)) {
         topology = cluster.getTopologyPolicy().toString();
      }

      AuAssert.check(hosts.size() > 0);
      clusterConfigMgr.validateRackTopologyUploaded(hosts, topology);

      return clusterConfigMgr.buildTopology(nodes, topology);
   }

   public HadoopStack filterDistroFromAppManager(
         SoftwareManager softwareManager, String distroName) {
      return clusterConfigMgr.filterDistroFromAppManager(softwareManager,
            distroName);
   }

   private void verifyNetworkNamesExsitInDB(Set<String> networkNames,
         String clusterName) {
      NetworkEntity networkEntity = null;
      for (String networkName : networkNames) {
         networkEntity = networkManager.getNetworkEntityByName(networkName);
         if (networkEntity == null) {
            throw ClusterConfigException.NETWORK_IS_NOT_FOUND(networkName,
                  clusterName);
         }
      }
   }
}
TOP

Related Classes of com.vmware.bdd.manager.ClusterManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.