Package org.apache.hadoop.yarn.server.api.protocolrecords

Examples of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse


      } finally {
        heartBeatID++;
      }
      NodeStatus nodeStatus = request.getNodeStatus();
      nodeStatus.setResponseId(heartBeatID);
      NodeHeartbeatResponse nhResponse =
          YarnServerBuilderUtils.newNodeHeartbeatResponse(heartBeatID,
                                                          heartBeatNodeAction,
                                                          null, null, null,
                                                          null, 1000L);
      return nhResponse;
View Full Code Here


    public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
        throws YarnException, IOException {
      NodeStatus nodeStatus = request.getNodeStatus();
      nodeStatus.setResponseId(heartBeatID++);

      NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.
          newNodeHeartbeatResponse(heartBeatID, NodeAction.NORMAL, null,
              null, null, null, 1000L);
      return nhResponse;
    }
View Full Code Here

        new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>());
    Assert.assertTrue(allocResponse.getAMCommand() == AMCommand.AM_RESYNC);
   
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
   
    // new NM to represent NM re-register
    nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
    nm2 = rm2.registerNode("127.0.0.2:5678", 15120);

    // verify no more reboot response sent
    hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
   
    // assert app1 attempt is saved
    attempt1 = loadedApp1.getCurrentAppAttempt();
    attemptId1 = attempt1.getAppAttemptId();
    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
View Full Code Here

          new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
      return shutDown;
    }
   
    // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
    NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.getLastNodeHeartBeatResponse();
    if (remoteNodeStatus.getResponseId() + 1 == lastNodeHeartbeatResponse
        .getResponseId()) {
      LOG.info("Received duplicate heartbeat from node "
          + rmNode.getNodeAddress());
      return lastNodeHeartbeatResponse;
    } else if (remoteNodeStatus.getResponseId() + 1 < lastNodeHeartbeatResponse
        .getResponseId()) {
      String message =
          "Too far behind rm response id:"
              + lastNodeHeartbeatResponse.getResponseId() + " nm response id:"
              + remoteNodeStatus.getResponseId();
      LOG.info(message);
      resync.setDiagnosticsMessage(message);
      // TODO: Just sending reboot is not enough. Think more.
      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING));
      return resync;
    }

    // Heartbeat response
    NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils
        .newNodeHeartbeatResponse(lastNodeHeartbeatResponse.
            getResponseId() + 1, NodeAction.NORMAL, null, null, null, null,
            nextHeartBeatInterval);
    rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
View Full Code Here

            @Override
            public NodeHeartbeatResponse nodeHeartbeat(
                NodeHeartbeatRequest request) throws YarnException,
                IOException {
              NodeHeartbeatResponse response;
              try {
                response = rt.nodeHeartbeat(request);
              } catch (YarnException e) {
                LOG.info("Exception in heartbeat from node " +
                    request.getNodeStatus().getNodeId(), e);
View Full Code Here

    Assert.assertNotNull(nmToken
        + "Registration should cause a key-update!", nmTokenMasterKey);
   
    dispatcher.await();

    NodeHeartbeatResponse response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "First heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "First heartbeat after registration shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Even second heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Even second heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
   
    dispatcher.await();

    // Let's force a roll-over
    rm.getRMContext().getContainerTokenSecretManager().rollMasterKey();
    rm.getRMContext().getNMTokenSecretManager().rollMasterKey();

    // Heartbeats after roll-over and before activation should be fine.
    response = nm.nodeHeartbeat(true);
    Assert.assertNotNull(containerToken +
        "Heartbeats after roll-over and before activation should not err out.",
        response.getContainerTokenMasterKey());
    Assert.assertNotNull(nmToken +
        "Heartbeats after roll-over and before activation should not err out.",
        response.getNMTokenMasterKey());
   
    Assert.assertEquals(containerToken +
        "Roll-over should have incremented the key-id only by one!",
        containerTokenMasterKey.getKeyId() + 1,
        response.getContainerTokenMasterKey().getKeyId());
    Assert.assertEquals(nmToken +
        "Roll-over should have incremented the key-id only by one!",
        nmTokenMasterKey.getKeyId() + 1,
        response.getNMTokenMasterKey().getKeyId());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Second heartbeat after roll-over shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Second heartbeat after roll-over shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    // Let's force activation
    rm.getRMContext().getContainerTokenSecretManager().activateNextMasterKey();
    rm.getRMContext().getNMTokenSecretManager().activateNextMasterKey();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken
        + "Activation shouldn't cause any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken
        + "Activation shouldn't cause any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Even second heartbeat after activation shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Even second heartbeat after activation shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    rm.stop();
  }
View Full Code Here

      public void run() {
        int lastHeartBeatID = 0;
        while (!isStopped) {
          // Send heartbeat
          try {
            NodeHeartbeatResponse response = null;
            NodeStatus nodeStatus = getNodeStatus(lastHeartBeatID);
           
            NodeHeartbeatRequest request =
                NodeHeartbeatRequest.newInstance(nodeStatus,
                  NodeStatusUpdaterImpl.this.context
                    .getContainerTokenSecretManager().getCurrentKey(),
                  NodeStatusUpdaterImpl.this.context.getNMTokenSecretManager()
                    .getCurrentKey());
            response = resourceTracker.nodeHeartbeat(request);
            //get next heartbeat interval from response
            nextHeartBeatInterval = response.getNextHeartBeatInterval();
            updateMasterKeys(response);

            if (response.getNodeAction() == NodeAction.SHUTDOWN) {
              LOG
                .warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                    + " hence shutting down.");
              LOG.warn("Message from ResourceManager: "
                  + response.getDiagnosticsMessage());
              context.setDecommissioned(true);
              dispatcher.getEventHandler().handle(
                  new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
              break;
            }
            if (response.getNodeAction() == NodeAction.RESYNC) {
              LOG.warn("Node is out of sync with ResourceManager,"
                  + " hence resyncing.");
              LOG.warn("Message from ResourceManager: "
                  + response.getDiagnosticsMessage());
              // Invalidate the RMIdentifier while resync
              NodeStatusUpdaterImpl.this.rmIdentifier =
                  ResourceManagerConstants.RM_INVALID_IDENTIFIER;
              dispatcher.getEventHandler().handle(
                  new NodeManagerEvent(NodeManagerEventType.RESYNC));
              break;
            }

            // Explicitly put this method after checking the resync response. We
            // don't want to remove the completed containers before resync
            // because these completed containers will be reported back to RM
            // when NM re-registers with RM.
            removeCompletedContainersFromContext();

            lastHeartBeatID = response.getResponseId();
            List<ContainerId> containersToCleanup = response
                .getContainersToCleanup();
            if (!containersToCleanup.isEmpty()) {
              dispatcher.getEventHandler().handle(
                  new CMgrCompletedContainersEvent(containersToCleanup,
                    CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER));
            }
            List<ApplicationId> appsToCleanup =
                response.getApplicationsToCleanup();
            //Only start tracking for keepAlive on FINISH_APP
            trackAppsForKeepAlive(appsToCleanup);
            if (!appsToCleanup.isEmpty()) {
              dispatcher.getEventHandler().handle(
                  new CMgrCompletedAppsEvent(appsToCleanup,
                      CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER));
            }
          } catch (ConnectException e) {
            //catch and throw the exception if tried MAX wait time to connect RM
            dispatcher.getEventHandler().handle(
                new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
            throw new YarnRuntimeException(e);
          } catch (Throwable e) {

            // TODO Better error handling. Thread can die with the rest of the
            // NM still running.
            LOG.error("Caught exception in status-updater", e);
          } finally {
            synchronized (heartbeatMonitor) {
              nextHeartBeatInterval = nextHeartBeatInterval <= 0 ?
                  YarnConfiguration.DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS :
                    nextHeartBeatInterval;
              try {
                heartbeatMonitor.wait(nextHeartBeatInterval);
              } catch (InterruptedException e) {
                // Do Nothing
              }
            }
          }
        }
      }

      private void updateMasterKeys(NodeHeartbeatResponse response) {
        // See if the master-key has rolled over
        MasterKey updatedMasterKey = response.getContainerTokenMasterKey();
        if (updatedMasterKey != null) {
          // Will be non-null only on roll-over on RM side
          context.getContainerTokenSecretManager().setMasterKey(updatedMasterKey);
        }
       
        updatedMasterKey = response.getNMTokenMasterKey();
        if (updatedMasterKey != null) {
          context.getNMTokenSecretManager().setMasterKey(updatedMasterKey);
        }
      }
    };
View Full Code Here

    ns.setNodeId(node.getNodeID());
    ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
    ns.setResponseId(RESPONSE_ID ++);
    ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
    beatRequest.setNodeStatus(ns);
    NodeHeartbeatResponse beatResponse =
        rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
    if (! beatResponse.getContainersToCleanup().isEmpty()) {
      // remove from queue
      synchronized(releasedContainerList) {
        for (ContainerId containerId : beatResponse.getContainersToCleanup()){
          if (amContainerList.contains(containerId)) {
            // AM container (not killed?, only release)
            synchronized(amContainerList) {
              amContainerList.remove(containerId);
            }
            LOG.debug(MessageFormat.format("NodeManager {0} releases " +
                "an AM ({1}).", node.getNodeID(), containerId));
          } else {
            cs = runningContainers.remove(containerId);
            containerQueue.remove(cs);
            releasedContainerList.add(containerId);
            LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
                "container ({1}).", node.getNodeID(), containerId));
          }
        }
      }
    }
    if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
      lastStep();
    }
  }
View Full Code Here

        ConcurrentMap<ContainerId, Container> activeContainers =
            this.context.getContainers();
        Assert.assertEquals(2, activeContainers.size());
      }

      NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.
          newNodeHeartbeatResponse(heartBeatID, null, null, null, null, null,
            1000L);
      return nhResponse;
    }
View Full Code Here

    public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
        throws YarnException, IOException {
      NodeStatus nodeStatus = request.getNodeStatus();
      nodeStatus.setResponseId(heartBeatID++);
     
      NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.
          newNodeHeartbeatResponse(heartBeatID, heartBeatNodeAction, null,
              null, null, null, 1000L);
      nhResponse.setDiagnosticsMessage(shutDownMessage);
      return nhResponse;
    }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.