Package org.apache.hadoop.yarn.server.resourcemanager

Source Code of org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.yarn.server.resourcemanager;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.AMRMProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.AMResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider;
import org.apache.hadoop.yarn.service.AbstractService;

@SuppressWarnings("unchecked")
@Private
public class ApplicationMasterService extends AbstractService implements
    AMRMProtocol {
  private static final Log LOG = LogFactory.getLog(ApplicationMasterService.class);
  private final AMLivelinessMonitor amLivelinessMonitor;
  private YarnScheduler rScheduler;
  private InetSocketAddress bindAddress;
  private Server server;
  private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
  private final ConcurrentMap<ApplicationAttemptId, AMResponse> responseMap =
      new ConcurrentHashMap<ApplicationAttemptId, AMResponse>();
  private final AMResponse reboot = recordFactory.newRecordInstance(AMResponse.class);
  private final RMContext rmContext;

  public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) {
    super(ApplicationMasterService.class.getName());
    this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor();
    this.rScheduler = scheduler;
    this.reboot.setReboot(true);
//    this.reboot.containers = new ArrayList<Container>();
    this.rmContext = rmContext;
  }

  @Override
  public void start() {
    Configuration conf = getConfig();
    YarnRPC rpc = YarnRPC.create(conf);

    InetSocketAddress masterServiceAddress = conf.getSocketAddr(
        YarnConfiguration.RM_SCHEDULER_ADDRESS,
        YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS,
        YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);

    this.server =
      rpc.getServer(AMRMProtocol.class, this, masterServiceAddress,
          conf, this.rmContext.getApplicationTokenSecretManager(),
          conf.getInt(YarnConfiguration.RM_SCHEDULER_CLIENT_THREAD_COUNT,
              YarnConfiguration.DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT));
   
    // Enable service authorization?
    if (conf.getBoolean(
        CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
        false)) {
      refreshServiceAcls(conf, new RMPolicyProvider());
    }
   
    this.server.start();

    this.bindAddress =
        conf.updateConnectAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS,
                               server.getListenerAddress());
    super.start();
  }

  @Private
  public InetSocketAddress getBindAddress() {
    return this.bindAddress;
  }

  private void authorizeRequest(ApplicationAttemptId appAttemptID)
      throws YarnRemoteException {

    if (!UserGroupInformation.isSecurityEnabled()) {
      return;
    }

    String appAttemptIDStr = appAttemptID.toString();

    UserGroupInformation remoteUgi;
    try {
      remoteUgi = UserGroupInformation.getCurrentUser();
    } catch (IOException e) {
      String msg = "Cannot obtain the user-name for ApplicationAttemptID: "
          + appAttemptIDStr + ". Got exception: "
          + StringUtils.stringifyException(e);
      LOG.warn(msg);
      throw RPCUtil.getRemoteException(msg);
    }

    if (!remoteUgi.getUserName().equals(appAttemptIDStr)) {
      String msg = "Unauthorized request from ApplicationMaster. "
          + "Expected ApplicationAttemptID: " + remoteUgi.getUserName()
          + " Found: " + appAttemptIDStr;
      LOG.warn(msg);
      throw RPCUtil.getRemoteException(msg);
    }
  }

  @Override
  public RegisterApplicationMasterResponse registerApplicationMaster(
      RegisterApplicationMasterRequest request) throws YarnRemoteException {

    ApplicationAttemptId applicationAttemptId = request
        .getApplicationAttemptId();
    authorizeRequest(applicationAttemptId);

    ApplicationId appID = applicationAttemptId.getApplicationId();
    AMResponse lastResponse = responseMap.get(applicationAttemptId);
    if (lastResponse == null) {
      String message = "Application doesn't exist in cache "
          + applicationAttemptId;
      LOG.error(message);
      RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(),
          AuditConstants.REGISTER_AM, message, "ApplicationMasterService",
          "Error in registering application master", appID,
          applicationAttemptId);
      throw RPCUtil.getRemoteException(message);
    }

    // Allow only one thread in AM to do registerApp at a time.
    synchronized (lastResponse) {

      LOG.info("AM registration " + applicationAttemptId);
      this.amLivelinessMonitor.receivedPing(applicationAttemptId);

      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMAppAttemptRegistrationEvent(applicationAttemptId, request
              .getHost(), request.getRpcPort(), request.getTrackingUrl()));

      RMApp app = this.rmContext.getRMApps().get(appID);
      RMAuditLogger.logSuccess(app.getUser(),
          AuditConstants.REGISTER_AM, "ApplicationMasterService", appID,
          applicationAttemptId);

      // Pick up min/max resource from scheduler...
      RegisterApplicationMasterResponse response = recordFactory
          .newRecordInstance(RegisterApplicationMasterResponse.class);
      response.setMinimumResourceCapability(rScheduler
          .getMinimumResourceCapability());
      response.setMaximumResourceCapability(rScheduler
          .getMaximumResourceCapability());
      response.setApplicationACLs(app.getRMAppAttempt(applicationAttemptId)
          .getSubmissionContext().getAMContainerSpec().getApplicationACLs());
      return response;
    }
  }

  @Override
  public FinishApplicationMasterResponse finishApplicationMaster(
      FinishApplicationMasterRequest request) throws YarnRemoteException {

    ApplicationAttemptId applicationAttemptId = request
        .getApplicationAttemptId();
    authorizeRequest(applicationAttemptId);

    AMResponse lastResponse = responseMap.get(applicationAttemptId);
    if (lastResponse == null) {
      String message = "Application doesn't exist in cache "
          + applicationAttemptId;
      LOG.error(message);
      throw RPCUtil.getRemoteException(message);
    }

    // Allow only one thread in AM to do finishApp at a time.
    synchronized (lastResponse) {

      this.amLivelinessMonitor.receivedPing(applicationAttemptId);

      rmContext.getDispatcher().getEventHandler().handle(
          new RMAppAttemptUnregistrationEvent(applicationAttemptId, request
              .getTrackingUrl(), request.getFinalApplicationStatus(), request
              .getDiagnostics()));

      FinishApplicationMasterResponse response = recordFactory
          .newRecordInstance(FinishApplicationMasterResponse.class);
      return response;
    }
  }

  @Override
  public AllocateResponse allocate(AllocateRequest request)
      throws YarnRemoteException {

    ApplicationAttemptId appAttemptId = request.getApplicationAttemptId();
    authorizeRequest(appAttemptId);

    this.amLivelinessMonitor.receivedPing(appAttemptId);

    /* check if its in cache */
    AllocateResponse allocateResponse = recordFactory
        .newRecordInstance(AllocateResponse.class);
    AMResponse lastResponse = responseMap.get(appAttemptId);
    if (lastResponse == null) {
      LOG.error("AppAttemptId doesnt exist in cache " + appAttemptId);
      allocateResponse.setAMResponse(reboot);
      return allocateResponse;
    }
    if ((request.getResponseId() + 1) == lastResponse.getResponseId()) {
      /* old heartbeat */
      allocateResponse.setAMResponse(lastResponse);
      return allocateResponse;
    } else if (request.getResponseId() + 1 < lastResponse.getResponseId()) {
      LOG.error("Invalid responseid from appAttemptId " + appAttemptId);
      // Oh damn! Sending reboot isn't enough. RM state is corrupted. TODO:
      allocateResponse.setAMResponse(reboot);
      return allocateResponse;
    }

    // Allow only one thread in AM to do heartbeat at a time.
    synchronized (lastResponse) { // BUG TODO: Locking order is screwed.

      // Send the status update to the appAttempt.
      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMAppAttemptStatusupdateEvent(appAttemptId, request
              .getProgress()));

      List<ResourceRequest> ask = request.getAskList();
      List<ContainerId> release = request.getReleaseList();

      // Send new requests to appAttempt.
      Allocation allocation =
          this.rScheduler.allocate(appAttemptId, ask, release);

      RMApp app = this.rmContext.getRMApps().get(appAttemptId.getApplicationId());
      RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId);

      AMResponse response = recordFactory.newRecordInstance(AMResponse.class);
      response.setAllocatedContainers(allocation.getContainers());
      response.setCompletedContainersStatuses(appAttempt
          .pullJustFinishedContainers());
      response.setResponseId(lastResponse.getResponseId() + 1);
      response.setAvailableResources(allocation.getResourceLimit());
      responseMap.put(appAttemptId, response);
      allocateResponse.setAMResponse(response);
      allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes());
      return allocateResponse;
    }
  }

  public void registerAppAttempt(ApplicationAttemptId attemptId) {
    AMResponse response = recordFactory.newRecordInstance(AMResponse.class);
    response.setResponseId(0);
    LOG.info("Registering " + attemptId);
    responseMap.put(attemptId, response);
  }

  public void unregisterAttempt(ApplicationAttemptId attemptId) {
    AMResponse lastResponse = responseMap.get(attemptId);
    if (lastResponse != null) {
      synchronized (lastResponse) {
        responseMap.remove(attemptId);
      }
    }
  }

  public void refreshServiceAcls(Configuration configuration,
      PolicyProvider policyProvider) {
    this.server.refreshServiceAcl(configuration, policyProvider);
  }
 
  @Override
  public void stop() {
    if (this.server != null) {
      this.server.stop();
    }
    super.stop();
  }
}
TOP

Related Classes of org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.