Package org.apache.giraph.yarn

Source Code of org.apache.giraph.yarn.GiraphApplicationMaster$LaunchContainerRunnable

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.yarn;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import com.google.common.collect.Maps;
import java.security.PrivilegedAction;
import org.apache.giraph.conf.GiraphConfiguration;
import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.AMRMProtocol;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ContainerManager;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords
  .FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords
  .RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.AMResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;

import org.apache.log4j.Logger;

import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

/**
* The YARN Application Master for Giraph is launched when the GiraphYarnClient
* successfully requests an execution container from the Resource Manager. The
* Application Master is provided by Giraph to manage all requests for resources
* (worker nodes, memory, jar files, job configuration metadata, etc.) that
* Giraph will need to perform the job. When Giraph runs in a non-YARN context,
* the role of the Application Master is played by Hadoop when it launches our
* GraphMappers (worker/master task nodes) to run the job.
*/
public class GiraphApplicationMaster {
  /** Logger */
  private static final Logger LOG =
    Logger.getLogger(GiraphApplicationMaster.class);
  /** Exit code for YARN containers that were manually killed/aborted */
  private static final int YARN_ABORT_EXIT_STATUS = -100;
  /** Exit code for successfully run YARN containers */
  private static final int YARN_SUCCESS_EXIT_STATUS = 0;
  /** millis to sleep between heartbeats during long loops */
  private static final int SLEEP_BETWEEN_HEARTBEATS_MSECS = 900;
  /** A reusable map of resources already in HDFS for each task to copy-to-local
   * env and use to launch each GiraphYarnTask. */
  private static Map<String, LocalResource> LOCAL_RESOURCES;
  /** Initialize the Configuration class with the resource file exported by
   * the YarnClient. We will need to export this resource to the tasks also.
   * Construct the HEARTBEAT to use to ping the RM about job progress/health.
   */
  static {
    // pick up new conf XML file and populate it with stuff exported from client
    Configuration.addDefaultResource(GiraphConstants.GIRAPH_YARN_CONF_FILE);
  }

  /** Handle to AppMaster's RPC connection to YARN and the RM. */
  private final AMRMProtocol resourceManager;
  /** bootstrap handle to YARN RPC service */
  private final YarnRPC rpc;
  /** GiraphApplicationMaster's application attempt id */
  private final ApplicationAttemptId appAttemptId;
  /** GiraphApplicationMaster container id. Leave me here, I'm very useful */
  private final ContainerId containerId;
  /** number of containers Giraph needs (conf.getMaxWorkers() + 1 master) */
  private final int containersToLaunch;
  /** MB of JVM heap per Giraph task container */
  private final int heapPerContainer;
  /** Giraph configuration for this job, transported here by YARN framework */
  private final ImmutableClassesGiraphConfiguration giraphConf;
  /** Completed Containers Counter */
  private final AtomicInteger completedCount;
  /** Failed Containers Counter */
  private final AtomicInteger failedCount;
  /** Number of containers requested (hopefully '-w' from our conf) */
  private final AtomicInteger allocatedCount;
  /** Number of successfully completed containers in this job run. */
  private final AtomicInteger successfulCount;
  /** the ACK #'s for AllocateRequests + heartbeats == last response # */
  private AtomicInteger lastResponseId;
  /** Executor to attempt asynchronous launches of Giraph containers */
  private ExecutorService executor;
  /** YARN progress is a <code>float</code> between 0.0f and 1.0f */
  private float progress;
  /** An empty resource request with which to send heartbeats + progress */
  private AllocateRequest heartbeat;

  /**
   * Construct the GiraphAppMaster, populate fields using env vars
   * set up by YARN framework in this execution container.
   * @param cId the ContainerId
   * @param aId the ApplicationAttemptId
   */
  protected GiraphApplicationMaster(ContainerId cId, ApplicationAttemptId aId)
    throws IOException {
    containerId = cId; // future good stuff will need me to operate.
    appAttemptId = aId;
    progress = 0.0f;
    lastResponseId = new AtomicInteger(0);
    giraphConf =
      new ImmutableClassesGiraphConfiguration(new GiraphConfiguration());
    completedCount = new AtomicInteger(0);
    failedCount = new AtomicInteger(0);
    allocatedCount = new AtomicInteger(0);
    successfulCount = new AtomicInteger(0);
    rpc = YarnRPC.create(giraphConf);
    resourceManager = getHandleToRm();
    containersToLaunch = giraphConf.getMaxWorkers() + 1;
    executor = Executors.newFixedThreadPool(containersToLaunch);
    heapPerContainer = giraphConf.getYarnTaskHeapMb();
  }

  /**
   * Coordinates all requests for Giraph's worker/master task containers, and
   * manages application liveness heartbeat, completion status, teardown, etc.
   */
  private void run() {
    // register Application Master with the YARN Resource Manager so we can
    // begin requesting resources. The response contains useful cluster info.
    try {
      resourceManager.registerApplicationMaster(getRegisterAppMasterRequest());
    } catch (IOException ioe) {
      throw new IllegalStateException(
        "GiraphApplicationMaster failed to register with RM.", ioe);
    }

    try {
      // make the request only ONCE; only request more on container failure etc.
      AMResponse amResponse = sendAllocationRequest();
      logClusterResources(amResponse);
      // loop here, waiting for TOTAL # REQUESTED containers to be available
      // and launch them piecemeal they are reported to us in heartbeat pings.
      launchContainersAsynchronously(amResponse);
      // wait for the containers to finish & tally success/fails
      awaitJobCompletion(); // all launched tasks are done before complete call
    } finally {
      // if we get here w/o problems, the executor is already long finished.
      if (null != executor && !executor.isTerminated()) {
        executor.shutdownNow(); // force kill, especially if got here by throw
      }
      // When the application completes, it should send a "finish request" to RM
      try {
        resourceManager.finishApplicationMaster(buildFinishAppMasterRequest());
      } catch (YarnRemoteException yre) {
        LOG.error("GiraphApplicationMaster failed to un-register with RM", yre);
      }
      if (null != rpc) {
        rpc.stopProxy(resourceManager, giraphConf);
      }
    }
  }

  /**
   * Reports the cluster resources in the AM response to our initial ask.
   * @param amResponse the AM response from YARN.
   */
  private void logClusterResources(final AMResponse amResponse) {
    // Check what the current available resources in the cluster are
    Resource availableResources = amResponse.getAvailableResources();
    LOG.info("Initial Giraph resource request for " + containersToLaunch +
      " containers has been submitted. " +
      "The RM reports cluster headroom is: " + availableResources);
  }

  /**
   * Utility to build the final "job run is finished" request to the RM.
   * @return the finish app master request, to send to the RM.
   */
  private FinishApplicationMasterRequest buildFinishAppMasterRequest() {
    LOG.info("Application completed. Signalling finish to RM");
    FinishApplicationMasterRequest finishRequest =
      Records.newRecord(FinishApplicationMasterRequest.class);
    finishRequest.setAppAttemptId(appAttemptId);
    FinalApplicationStatus appStatus;
    String appMessage = "Container Diagnostics: " +
      " allocated=" + allocatedCount.get() +
      ", completed=" + completedCount.get() +
      ", succeeded=" + successfulCount.get() +
      ", failed=" + failedCount.get();
    if (successfulCount.get() == containersToLaunch) {
      appStatus = FinalApplicationStatus.SUCCEEDED;
    } else {
      appStatus = FinalApplicationStatus.FAILED;
    }
    finishRequest.setDiagnostics(appMessage);
    finishRequest.setFinishApplicationStatus(appStatus);
    return finishRequest;
  }

  /**
   * Loop and check the status of the containers until all are finished,
   * logging how each container meets its end: success, error, or abort.
   */
  private void awaitJobCompletion() {
    List<ContainerStatus> completedContainers;
    do {
      try {
        Thread.sleep(SLEEP_BETWEEN_HEARTBEATS_MSECS);
      } catch (InterruptedException ignored) {
        final int notFinished = containersToLaunch - completedCount.get();
        LOG.info("GiraphApplicationMaster interrupted from sleep while " +
          " waiting for " + notFinished + "containers to finish job.");
      }
      updateProgress();
      completedContainers =
          sendHeartbeat().getAMResponse().getCompletedContainersStatuses();
      for (ContainerStatus containerStatus : completedContainers) {
        LOG.info("Got container status for containerID= " +
          containerStatus.getContainerId() +
          ", state=" + containerStatus.getState() +
          ", exitStatus=" + containerStatus.getExitStatus() +
          ", diagnostics=" + containerStatus.getDiagnostics());
        switch (containerStatus.getExitStatus()) {
        case YARN_SUCCESS_EXIT_STATUS:
          successfulCount.incrementAndGet();
          break;
        case YARN_ABORT_EXIT_STATUS:
          break; // not success or fail
        default:
          failedCount.incrementAndGet();
          break;
        }
        completedCount.incrementAndGet();
      } // end completion check loop
    } while (completedCount.get() < containersToLaunch);
  }

  /** Update the progress value for our next heartbeat (allocate request) */
  private void updateProgress() {
    // set progress to "half done + ratio of completed containers so far"
    final float ratio = completedCount.get() / (float) containersToLaunch;
    progress = 0.5f + ratio / 2.0f;
  }

  /**
   * Loop while checking container request status, adding each new bundle of
   * containers allocated to our executor to launch (run Giraph BSP task) the
   * job on each. Giraph's full resource request was sent ONCE, but these
   * containers will become available in groups, over a period of time.
   * @param amResponse metadata about our AllocateRequest's results.
   */
  private void launchContainersAsynchronously(AMResponse amResponse) {
    List<Container> allocatedContainers;
    do {
      // get fresh report on # alloc'd containers, sleep between checks
      if (null == amResponse) {
        amResponse = sendHeartbeat().getAMResponse();
      }
      allocatedContainers = amResponse.getAllocatedContainers();
      allocatedCount.addAndGet(allocatedContainers.size());
      LOG.info("Waiting for task containers: " + allocatedCount.get() +
        " allocated out of " + containersToLaunch + " required.");
      startContainerLaunchingThreads(allocatedContainers);
      amResponse = null;
      try {
        Thread.sleep(SLEEP_BETWEEN_HEARTBEATS_MSECS);
      } catch (InterruptedException ignored) {
        LOG.info("launchContainerAsynchronously() raised InterruptedException");
      }
    } while (containersToLaunch > allocatedCount.get());
  }

  /**
   * For each container successfully allocated, attempt to set up and launch
   * a Giraph worker/master task.
   * @param allocatedContainers the containers we have currently allocated.
   */
  private void startContainerLaunchingThreads(final List<Container>
    allocatedContainers) {
    progress = allocatedCount.get() / (2.0f * containersToLaunch);
    int placeholder = 0;
    for (Container allocatedContainer : allocatedContainers) {
      LOG.info("Launching shell command on a new container." +
        ", containerId=" + allocatedContainer.getId() +
        ", containerNode=" + allocatedContainer.getNodeId().getHost() +
        ":" + allocatedContainer.getNodeId().getPort() +
        ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() +
        ", containerState=" + allocatedContainer.getState() +
        ", containerResourceMemory=" +
        allocatedContainer.getResource().getMemory());
      // Launch and start the container on a separate thread to keep the main
      // thread unblocked as all containers may not be allocated at one go.
      LaunchContainerRunnable launchThread =
        new LaunchContainerRunnable(allocatedContainer, heapPerContainer);
      executor.execute(launchThread);
    }
  }

  /**
   * Sends heartbeat messages that include progress amounts. These are in the
   * form of a YARN AllocateRequest object that asks for 0 resources.
   * @return the AllocateResponse, which we may or may not need.
   */
  private AllocateResponse sendHeartbeat() {
    heartbeat.setProgress(progress);
    heartbeat.setResponseId(lastResponseId.incrementAndGet());
    AllocateResponse allocateResponse = null;
    try {
      allocateResponse = resourceManager.allocate(heartbeat);
      final int responseId = allocateResponse.getAMResponse().getResponseId();
      if (responseId != lastResponseId.get()) {
        lastResponseId.set(responseId);
      }
      checkForRebootFlag(allocateResponse.getAMResponse());
      return allocateResponse;
    } catch (YarnRemoteException yre) {
      throw new IllegalStateException("sendHeartbeat() failed with " +
        "YarnRemoteException: ", yre);
    }
  }

  /**
   * Compose and send the allocation request for our Giraph BSP worker/master
   * compute nodes. Right now the requested containers are identical, mirroring
   * Giraph's behavior when running on Hadoop MRv1. Giraph could use YARN
   * to set fine-grained capability to each container, including host choice.
   * @return The AM resource descriptor with our container allocations.
   */
  private AMResponse sendAllocationRequest() {
    AllocateRequest allocRequest = Records.newRecord(AllocateRequest.class);
    try {
      List<ResourceRequest> containerList = buildResourceRequests();
      allocRequest.addAllAsks(containerList);
      List<ContainerId> releasedContainers = Lists.newArrayListWithCapacity(0);
      allocRequest.setResponseId(lastResponseId.get());
      allocRequest.setApplicationAttemptId(appAttemptId);
      allocRequest.addAllReleases(releasedContainers);
      allocRequest.setProgress(progress);
      AllocateResponse allocResponse = resourceManager.allocate(allocRequest);
      AMResponse amResponse = allocResponse.getAMResponse();
      if (amResponse.getResponseId() != lastResponseId.get()) {
        lastResponseId.set(amResponse.getResponseId());
      }
      checkForRebootFlag(amResponse);
      // now, make THIS our new HEARTBEAT object, but with ZERO new requests!
      initHeartbeatRequestObject(allocRequest);
      return amResponse;
    } catch (YarnRemoteException yre) {
      throw new IllegalStateException("Giraph Application Master could not " +
        "successfully allocate the specified containers from the RM.", yre);
    }
  }

  /**
   * If the YARN RM gets way out of sync with our App Master, its time to
   * fail the job/restart. This should trigger the job end and cleanup.
   * @param amResponse RPC response from YARN RM to check for reboot flag.
   */
  private void checkForRebootFlag(AMResponse amResponse) {
    if (amResponse.getReboot()) {
      LOG.error("AMResponse: " + amResponse + " raised YARN REBOOT FLAG!");
      throw new RuntimeException("AMResponse " + amResponse +
        " signaled GiraphApplicationMaster with REBOOT FLAG. Failing job.");
    }
  }


  /**
   * Reuses the initial container request (switched to "0 asks" so no new allocs
   * occur) and sends all heartbeats using that request object.
   * @param allocRequest the allocation request object to use as heartbeat.
   */
  private void initHeartbeatRequestObject(AllocateRequest allocRequest) {
    allocRequest.clearAsks();
    allocRequest.addAllAsks(Lists.<ResourceRequest>newArrayListWithCapacity(0));
    heartbeat = allocRequest;
  }

  /**
   * Utility to construct the ResourceRequest for our resource ask: all the
   * Giraph containers we need, and their memory/priority requirements.
   * @return a list of ResourceRequests to send (just one, for Giraph tasks)
   */
  private List<ResourceRequest> buildResourceRequests() {
    // set up resource request for our Giraph BSP application
    ResourceRequest resourceRequest = Records.newRecord(ResourceRequest.class);
    resourceRequest.setHostName("*"); // hand pick our worker locality someday
    Priority pri = Records.newRecord(Priority.class);
    pri.setPriority(GiraphConstants.GIRAPH_YARN_PRIORITY);
    resourceRequest.setPriority(pri);
    Resource capability = Records.newRecord(Resource.class);
    capability.setVirtualCores(1); // new YARN API, won't work version < 2.0.3
    capability.setMemory(heapPerContainer);
    resourceRequest.setCapability(capability);
    resourceRequest.setNumContainers(containersToLaunch);
    return ImmutableList.of(resourceRequest);
  }

  /**
   * Obtain handle to RPC connection to Resource Manager.
   * @return the AMRMProtocol handle to YARN RPC.
   */
  private AMRMProtocol getHandleToRm() {
    YarnConfiguration yarnConf = new YarnConfiguration(giraphConf);
    final InetSocketAddress rmAddress = yarnConf.getSocketAddr(
      YarnConfiguration.RM_SCHEDULER_ADDRESS,
      YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS,
      YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT);
    LOG.info("Connecting to ResourceManager at " + rmAddress);
    if (UserGroupInformation.isSecurityEnabled()) {
      UserGroupInformation currentUser;
      try {
        currentUser = UserGroupInformation.getCurrentUser();
      } catch (IOException ioe) {
        throw new IllegalStateException("Could not obtain UGI for user.", ioe);
      }
      String tokenURLEncodedStr = System.getenv(
        ApplicationConstants.APPLICATION_MASTER_TOKEN_ENV_NAME);
      Token<? extends TokenIdentifier> token = new Token<TokenIdentifier>();
      try {
        token.decodeFromUrlString(tokenURLEncodedStr);
      } catch (IOException ioe) {
        throw new IllegalStateException("Could not decode token from URL", ioe);
      }
      SecurityUtil.setTokenService(token, rmAddress);
      if (LOG.isDebugEnabled()) {
        LOG.debug("AppMasterToken is: " + token);
      }
      currentUser.addToken(token);
      return currentUser.doAs(new PrivilegedAction<AMRMProtocol>() {
        @Override
        public AMRMProtocol run() {
          return (AMRMProtocol) rpc.getProxy(AMRMProtocol.class,
            rmAddress, giraphConf);
        }
      });
    } else { // non-secure
      return (AMRMProtocol) rpc.getProxy(AMRMProtocol.class,
        rmAddress, yarnConf);
    }
  }

  /**
   * Get the request to register this Application Master with the RM.
   * @return the populated AM request.
   */
  private RegisterApplicationMasterRequest getRegisterAppMasterRequest() {
    RegisterApplicationMasterRequest appMasterRequest =
        Records.newRecord(RegisterApplicationMasterRequest.class);
    appMasterRequest.setApplicationAttemptId(appAttemptId);
    try {
      appMasterRequest.setHost(InetAddress.getLocalHost().getHostName());
    } catch (UnknownHostException uhe) {
      throw new IllegalStateException(
        "Cannot resolve GiraphApplicationMaster's local hostname.", uhe);
    }
    // useful for a Giraph WebUI or whatever: play with these
    // appMasterRequest.setRpcPort(appMasterRpcPort);
    // appMasterRequest.setTrackingUrl(appMasterTrackingUrl);
    return appMasterRequest;
  }

  /**
   * Lazily compose the map of jar and file names to LocalResource records for
   * inclusion in GiraphYarnTask container requests. Can re-use the same map
   * as Giraph tasks need identical HDFS-based resources (jars etc.) to run.
   * @return the resource map for a ContainerLaunchContext
   */
  private Map<String, LocalResource> getTaskResourceMap() {
    // Set the local resources: just send the copies already in HDFS
    if (null == LOCAL_RESOURCES) {
      LOCAL_RESOURCES = Maps.newHashMap();
      try {
        // if you have to update the giraphConf for export to tasks, do it now
        updateGiraphConfForExport();
        YarnUtils.addFsResourcesToMap(LOCAL_RESOURCES, giraphConf,
          appAttemptId.getApplicationId());
      } catch (IOException ioe) {
        // fail fast, this container will never launch.
        throw new IllegalStateException("Could not configure the container" +
          "launch context for GiraphYarnTasks.", ioe);
      }
    }
    // else, return the prepopulated copy to reuse for each GiraphYarkTask
    return LOCAL_RESOURCES;
  }

  /**
   * If you're going to make ANY CHANGES to your local GiraphConfiguration
   * while running the GiraphApplicationMaster, put them here.
   * This method replaces the current XML file GiraphConfiguration
   * stored in HDFS with the copy you have modified locally in-memory.
   */
  private void updateGiraphConfForExport()
    throws IOException {
    // Giraph expects this MapReduce stuff
    giraphConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
      appAttemptId.getAttemptId());
    // now republish the giraph-conf.xml in HDFS
    YarnUtils.exportGiraphConfiguration(giraphConf,
      appAttemptId.getApplicationId());
  }

  /**
   * Thread to connect to the {@link ContainerManager} and launch the container
   * that will house one of our Giraph worker (or master) tasks.
   */
  private class LaunchContainerRunnable implements Runnable {
    /** Allocated container */
    private Container container;
    /** Handle to communicate with ContainerManager */
    private ContainerManager containerManager;
    /** Heap memory in MB to allocate for this JVM in the launched container */
    private final int heapSize;

    /**
     * Constructor.
     * @param newGiraphTaskContainer Allocated container
     * @param heapMb the <code>-Xmx</code> setting for each launched task.
     */
    public LaunchContainerRunnable(final Container newGiraphTaskContainer,
      final int heapMb) {
      this.container = newGiraphTaskContainer;
      this.heapSize = heapMb;
    }

    /**
     * Helper function to connect to ContainerManager, which resides on the
     * same compute node as this Giraph task's container. The CM starts tasks.
     */
    private void connectToCM() {
      LOG.debug("Connecting to CM for containerid=" + container.getId());
      String cmIpPortStr = container.getNodeId().getHost() + ":" +
        container.getNodeId().getPort();
      InetSocketAddress cmAddress = NetUtils.createSocketAddr(cmIpPortStr);
      LOG.info("Connecting to CM at " + cmIpPortStr);
      this.containerManager = (ContainerManager)
        rpc.getProxy(ContainerManager.class, cmAddress, giraphConf);
    }

    /**
     * Connects to CM, sets up container launch context
     * for shell command and eventually dispatches the container
     * start request to the CM.
     */
    public void run() {
      // Connect to ContainerManager
      connectToCM();
      // configure the launcher for the Giraph task it will host
      StartContainerRequest startReq =
        Records.newRecord(StartContainerRequest.class);
      startReq.setContainerLaunchContext(buildContainerLaunchContext());
      // request CM to start this container as spec'd in ContainerLaunchContext
      try {
        containerManager.startContainer(startReq);
      } catch (YarnRemoteException yre) {
        LOG.error("StartContainerRequest failed for containerId=" +
                    container.getId(), yre);
      }
    }

    /**
     * Boilerplate to set up the ContainerLaunchContext to tell the Container
     * Manager how to launch our Giraph task in the execution container we have
     * already allocated.
     * @return a populated ContainerLaunchContext object.
     */
    private ContainerLaunchContext buildContainerLaunchContext() {
      LOG.info("Setting up container launch container for containerid=" +
        container.getId());
      ContainerLaunchContext launchContext = Records
        .newRecord(ContainerLaunchContext.class);
      launchContext.setContainerId(container.getId());
      launchContext.setResource(container.getResource());
      // args inject the CLASSPATH, heap MB, and TaskAttemptID for launched task
      final List<String> commands = generateShellExecCommand();
      launchContext.setCommands(commands);
      // add user information to the job
      String jobUserName = "ERROR_UNKNOWN_USER";
      UserGroupInformation ugi = null;
      try {
        ugi = UserGroupInformation.getCurrentUser();
        jobUserName = ugi.getUserName();
      } catch (IOException ioe) {
        jobUserName =
          System.getenv(ApplicationConstants.Environment.USER.name());
      }
      launchContext.setUser(jobUserName);
      LOG.info("Setting username in ContainerLaunchContext to: " + jobUserName);
      // Set the environment variables to inject into remote task's container
      buildEnvironment(launchContext);
      // Set the local resources: just send the copies already in HDFS
      launchContext.setLocalResources(getTaskResourceMap());
      return launchContext;
    }

    /**
     * Generates our command line string used to launch our Giraph tasks.
     * @return the BASH shell commands to launch the job.
     */
    private List<String> generateShellExecCommand() {
      return ImmutableList.of("java " +
        "-Xmx" + heapSize + "M " +
        "-Xms" + heapSize + "M " +
        "-cp .:${CLASSPATH} " +
        "org.apache.giraph.yarn.GiraphYarnTask " +
        appAttemptId.getApplicationId().getClusterTimestamp() + " " +
        appAttemptId.getApplicationId().getId() + " " +
        container.getId().getId() + " " +
        appAttemptId.getAttemptId() + " " +
        "1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR +
        "/task-" + container.getId().getId() + "-stdout.log " +
        "2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR +
        "/task-" + container.getId().getId() + "-stderr.log "
      );
    }

    /**
     * Utility to populate the environment vars we wish to inject into the new
     * containter's env when the Giraph BSP task is executed.
     * @param launchContext the launch context which will set our environment
     *                      vars in the app master's execution container.
     */
    private void buildEnvironment(final ContainerLaunchContext launchContext) {
      Map<String, String> classPathForEnv = Maps.<String, String>newHashMap();
      // pick up the local classpath so when we instantiate a Configuration
      // remotely, we also get the "mapred-site.xml" and "yarn-site.xml"
      YarnUtils.addLocalClasspathToEnv(classPathForEnv, giraphConf);
      // set this map of env vars into the launch context.
      launchContext.setEnvironment(classPathForEnv);
    }
  }

  /**
   * Application entry point
   * @param args command-line args (set by GiraphYarnClient, if any)
   */
  public static void main(final String[] args) {
    String containerIdString =
        System.getenv(ApplicationConstants.AM_CONTAINER_ID_ENV);
    if (containerIdString == null) {
      // container id should always be set in the env by the framework
      throw new IllegalArgumentException("ContainerId not found in env vars.");
    }
    ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
    ApplicationAttemptId appAttemptId = containerId.getApplicationAttemptId();
    try {
      GiraphApplicationMaster giraphAppMaster =
        new GiraphApplicationMaster(containerId, appAttemptId);
      giraphAppMaster.run();
      // CHECKSTYLE: stop IllegalCatch
    } catch (Throwable t) {
      // CHECKSTYLE: resume IllegalCatch
      LOG.error("GiraphApplicationMaster caught a " +
                  "top-level exception in main.", t);
      System.exit(2);
    }
    System.exit(0);
  }
}
TOP

Related Classes of org.apache.giraph.yarn.GiraphApplicationMaster$LaunchContainerRunnable

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.