Package org.apache.hadoop.yarn.api.protocolrecords

Examples of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse


          lastResponseID,
          0.0f,
          createBSPTaskRequest(numBSPTasks - allocatedContainers.size(),
              taskMemoryInMb, priority), releasedContainers);

      AllocateResponse allocateResponse = resourceManager.allocate(req);
      AMResponse amResponse = allocateResponse.getAMResponse();
      LOG.info("Got response! ID: " + amResponse.getResponseId()
          + " with num of containers: "
          + amResponse.getAllocatedContainers().size()
          + " and following resources: "
          + amResponse.getAvailableResources().getMemory() + "mb");
View Full Code Here


    int allocatedContainerCount = 0;
    int iterationsLeft = 2;
    Set<Container> containers = new TreeSet<Container>();
    while (allocatedContainerCount < containersRequestedAny
        && iterationsLeft > 0) {
      AllocateResponse allocResponse = rmClient.allocate(0.1f);

      allocatedContainerCount += allocResponse.getAllocatedContainers().size();
      for(Container container : allocResponse.getAllocatedContainers()) {
        containers.add(container);
      }
      if (!allocResponse.getNMTokens().isEmpty()) {
        for (NMToken token : allocResponse.getNMTokens()) {
          NMTokenCache.setNMToken(token.getNodeId().toString(),
              token.getToken());
        }
      }
      if(allocatedContainerCount < containersRequestedAny) {
View Full Code Here

      super("AMRM Heartbeater thread");
    }
   
    public void run() {
      while (true) {
        AllocateResponse response = null;
        // synchronization ensures we don't send heartbeats after unregistering
        synchronized (unregisterHeartbeatLock) {
          if (!keepRunning) {
            return;
          }
           
          try {
            response = client.allocate(progress);
          } catch (YarnException ex) {
            LOG.error("Yarn exception on heartbeat", ex);
            savedException = ex;
            // interrupt handler thread in case it waiting on the queue
            handlerThread.interrupt();
            return;
          } catch (IOException e) {
            LOG.error("IO exception on heartbeat", e);
            savedException = e;
            // interrupt handler thread in case it waiting on the queue
            handlerThread.interrupt();
            return;
          }
        }
        if (response != null) {
          while (true) {
            try {
              responseQueue.put(response);
              if (response.getAMCommand() == AMCommand.AM_RESYNC
                  || response.getAMCommand() == AMCommand.AM_SHUTDOWN) {
                return;
              }
              break;
            } catch (InterruptedException ex) {
              LOG.info("Interrupted while waiting to put on response queue", ex);
View Full Code Here

      while (true) {
        if (!keepRunning) {
          return;
        }
        try {
          AllocateResponse response;
          if(savedException != null) {
            LOG.error("Stopping callback due to: ", savedException);
            handler.onError(savedException);
            return;
          }
          try {
            response = responseQueue.take();
          } catch (InterruptedException ex) {
            LOG.info("Interrupted while waiting for queue", ex);
            continue;
          }

          if (response.getAMCommand() != null) {
            switch(response.getAMCommand()) {
            case AM_RESYNC:
            case AM_SHUTDOWN:
              handler.onShutdownRequest();
              LOG.info("Shutdown requested. Stopping callback.");
              return;
            default:
              String msg =
                    "Unhandled value of RM AMCommand: " + response.getAMCommand();
              LOG.error(msg);
              throw new YarnRuntimeException(msg);
            }
          }
          List<NodeReport> updatedNodes = response.getUpdatedNodes();
          if (!updatedNodes.isEmpty()) {
            handler.onNodesUpdated(updatedNodes);
          }

          List<ContainerStatus> completed =
              response.getCompletedContainersStatuses();
          if (!completed.isEmpty()) {
            handler.onContainersCompleted(completed);
          }

          List<Container> allocated = response.getAllocatedContainers();
          if (!allocated.isEmpty()) {
            handler.onContainersAllocated(allocated);
          }

          progress = handler.getProgress();
View Full Code Here

  @Override
  public AllocateResponse allocate(float progressIndicator)
      throws YarnException, IOException {
    Preconditions.checkArgument(progressIndicator >= 0,
        "Progress indicator should not be negative");
    AllocateResponse allocateResponse = null;
    ArrayList<ResourceRequest> askList = null;
    ArrayList<ContainerId> releaseList = null;
    AllocateRequest allocateRequest = null;
   
    try {
      synchronized (this) {
        askList = new ArrayList<ResourceRequest>(ask.size());
        for(ResourceRequest r : ask) {
          // create a copy of ResourceRequest as we might change it while the
          // RPC layer is using it to send info across
          askList.add(ResourceRequest.newInstance(r.getPriority(),
              r.getResourceName(), r.getCapability(), r.getNumContainers(),
              r.getRelaxLocality()));
        }
        releaseList = new ArrayList<ContainerId>(release);
        // optimistically clear this collection assuming no RPC failure
        ask.clear();
        release.clear();
        allocateRequest =
            AllocateRequest.newInstance(lastResponseId, progressIndicator,
              askList, releaseList, null);
      }

      allocateResponse = rmClient.allocate(allocateRequest);

      synchronized (this) {
        // update these on successful RPC
        clusterNodeCount = allocateResponse.getNumClusterNodes();
        lastResponseId = allocateResponse.getResponseId();
        clusterAvailableResources = allocateResponse.getAvailableResources();
        if (!allocateResponse.getNMTokens().isEmpty()) {
          populateNMTokens(allocateResponse);
        }
      }
    } finally {
      // TODO how to differentiate remote yarn exception vs error in rpc
View Full Code Here

      List<ContainerId> releaseContainerList =
          new ArrayList<ContainerId>();
      HashMap<String, Token> nmTokens = new HashMap<String, Token>();

      // initially requesting 2 containers.
      AllocateResponse response =
          am.allocate("h1", 1000, 2, releaseContainerList);
      nm1.nodeHeartbeat(true);
      Assert.assertEquals(0, response.getAllocatedContainers().size());
      allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 2,
          nmTokens);
      Assert.assertEquals(1, nmTokens.size());

     
      // requesting 2 more containers.
      response = am.allocate("h1", 1000, 2, releaseContainerList);
      nm1.nodeHeartbeat(true);
      Assert.assertEquals(0, response.getAllocatedContainers().size());
      allocateContainersAndValidateNMTokens(am, containersReceivedForNM1, 4,
          nmTokens);
      Assert.assertEquals(1, nmTokens.size());
     
     
      // We will be simulating NM restart so restarting newly added h2:1234
      // NM 2 now registers.
      MockNM nm2 = rm.registerNode("h2:1234", 10000);
      nm2.nodeHeartbeat(true);
      ArrayList<Container> containersReceivedForNM2 =
          new ArrayList<Container>();
     
      response = am.allocate("h2", 1000, 2, releaseContainerList);
      nm2.nodeHeartbeat(true);
      Assert.assertEquals(0, response.getAllocatedContainers().size());
      allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 2,
          nmTokens);
      Assert.assertEquals(2, nmTokens.size());
     
      // Simulating NM-2 restart.
      nm2 = rm.registerNode("h2:1234", 10000);
      nm2.nodeHeartbeat(true);
     
      int interval = 40;
      // Wait for nm Token to be cleared.
      while (nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
              nm2.getNodeId()) && interval-- > 0) {
        LOG.info("waiting for nmToken to be cleared for : " + nm2.getNodeId());
        Thread.sleep(1000);
      }
      Assert.assertTrue(nmTokenSecretManager
          .isApplicationAttemptRegistered(attempt.getAppAttemptId()));
     
      // removing NMToken for h2:1234
      nmTokens.remove(nm2.getNodeId().toString());
      Assert.assertEquals(1, nmTokens.size());
     
      // We should again receive the NMToken.
      response = am.allocate("h2", 1000, 2, releaseContainerList);
      nm2.nodeHeartbeat(true);
      Assert.assertEquals(0, response.getAllocatedContainers().size());
      allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 4,
          nmTokens);
      Assert.assertEquals(2, nmTokens.size());

      // Now rolling over NMToken masterKey. it should resend the NMToken in
      // next allocate call.
      Assert.assertTrue(nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
              nm1.getNodeId()));
      Assert.assertTrue(nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
              nm2.getNodeId()));
     
      nmTokenSecretManager.rollMasterKey();
      nmTokenSecretManager.activateNextMasterKey();
     
      Assert.assertFalse(nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
              nm1.getNodeId()));
      Assert.assertFalse(nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
              nm2.getNodeId()));
      // It should not remove application attempt entry.
      Assert.assertTrue(nmTokenSecretManager
          .isApplicationAttemptRegistered(attempt.getAppAttemptId()));

      nmTokens.clear();
      Assert.assertEquals(0, nmTokens.size());
      // We should again receive the NMToken.
      response = am.allocate("h2", 1000, 1, releaseContainerList);
      nm2.nodeHeartbeat(true);
      Assert.assertEquals(0, response.getAllocatedContainers().size());
      allocateContainersAndValidateNMTokens(am, containersReceivedForNM2, 5,
          nmTokens);
      Assert.assertEquals(1, nmTokens.size());
      Assert.assertTrue(nmTokenSecretManager
          .isApplicationAttemptNMTokenPresent(attempt.getAppAttemptId(),
View Full Code Here

  protected void allocateContainersAndValidateNMTokens(MockAM am,
      ArrayList<Container> containersReceived, int totalContainerRequested,
      HashMap<String, Token> nmTokens) throws Exception, InterruptedException {
    ArrayList<ContainerId> releaseContainerList = new ArrayList<ContainerId>();
    AllocateResponse response;
    ArrayList<ResourceRequest> resourceRequest =
        new ArrayList<ResourceRequest>();     
    while (containersReceived.size() < totalContainerRequested) {
      LOG.info("requesting containers..");
      response =
          am.allocate(resourceRequest, releaseContainerList);
      containersReceived.addAll(response.getAllocatedContainers());
      if (!response.getNMTokens().isEmpty()) {
        for (NMToken nmToken : response.getNMTokens()) {
          String nodeId = nmToken.getNodeId().toString();
          if (nmTokens.containsKey(nodeId)) {
            Assert.fail("Duplicate NMToken received for : " + nodeId);
          }
          nmTokens.put(nodeId, nmToken.getToken());
View Full Code Here

  protected AllocateResponse makeRemoteRequest() throws IOException {
    AllocateRequest allocateRequest =
        AllocateRequest.newInstance(lastResponseID,
          super.getApplicationProgress(), new ArrayList<ResourceRequest>(ask),
          new ArrayList<ContainerId>(release), null);
    AllocateResponse allocateResponse;
    try {
      allocateResponse = scheduler.allocate(allocateRequest);
    } catch (YarnException e) {
      throw new IOException(e);
    }
    lastResponseID = allocateResponse.getResponseId();
    availableResources = allocateResponse.getAvailableResources();
    lastClusterNmCount = clusterNmCount;
    clusterNmCount = allocateResponse.getNumClusterNodes();

    if (ask.size() > 0 || release.size() > 0) {
      LOG.info("getResources() for " + applicationId + ":" + " ask="
          + ask.size() + " release= " + release.size() + " newContainers="
          + allocateResponse.getAllocatedContainers().size()
          + " finishedContainers="
          + allocateResponse.getCompletedContainersStatuses().size()
          + " resourcelimit=" + availableResources + " knownNMs="
          + clusterNmCount);
    }

    ask.clear();
View Full Code Here

      int containers) throws Exception {
    requests.addAll(createReq(hosts, memory, priority, containers));
  }

  public AllocateResponse schedule() throws Exception {
    AllocateResponse response = allocate(requests, releases);
    requests.clear();
    releases.clear();
    return response;
  }
View Full Code Here

 
  @SuppressWarnings("unchecked")
  private List<Container> getResources() throws Exception {
    int headRoom = getAvailableResources() != null
        ? getAvailableResources().getMemory() : 0;//first time it would be null
    AllocateResponse response;
    /*
     * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
     * milliseconds before aborting. During this interval, AM will still try
     * to contact the RM.
     */
    try {
      response = makeRemoteRequest();
      // Reset retry count if no exception occurred.
      retrystartTime = System.currentTimeMillis();
    } catch (Exception e) {
      // This can happen when the connection to the RM has gone down. Keep
      // re-trying until the retryInterval has expired.
      if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
        LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
        eventHandler.handle(new JobEvent(this.getJob().getID(),
                                         JobEventType.INTERNAL_ERROR));
        throw new YarnRuntimeException("Could not contact RM after " +
                                retryInterval + " milliseconds.");
      }
      // Throw this up to the caller, which may decide to ignore it and
      // continue to attempt to contact the RM.
      throw e;
    }
    if (response.getAMCommand() != null) {
      switch(response.getAMCommand()) {
      case AM_RESYNC:
      case AM_SHUTDOWN:
        // This can happen if the RM has been restarted. If it is in that state,
        // this application must clean itself up.
        eventHandler.handle(new JobEvent(this.getJob().getID(),
                                         JobEventType.JOB_AM_REBOOT));
        throw new YarnRuntimeException("Resource Manager doesn't recognize AttemptId: " +
                                 this.getContext().getApplicationID());
      default:
        String msg =
              "Unhandled value of AMCommand: " + response.getAMCommand();
        LOG.error(msg);
        throw new YarnRuntimeException(msg);
      }
    }
    int newHeadRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0;
    List<Container> newContainers = response.getAllocatedContainers();
    // Setting NMTokens
    if (response.getNMTokens() != null) {
      for (NMToken nmToken : response.getNMTokens()) {
        NMTokenCache.setNMToken(nmToken.getNodeId().toString(),
            nmToken.getToken());
      }
    }
   
    List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
    if (newContainers.size() + finishedContainers.size() > 0 || headRoom != newHeadRoom) {
      //something changed
      recalculateReduceSchedule = true;
      if (LOG.isDebugEnabled() && headRoom != newHeadRoom) {
        LOG.debug("headroom=" + newHeadRoom);
View Full Code Here

TOP

Related Classes of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.