Package org.apache.hadoop.yarn.server.api.protocolrecords

Examples of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse


      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION));
      return shutDown;
    }

    NodeHeartbeatResponse nodeHeartBeatResponse = recordFactory
        .newRecordInstance(NodeHeartbeatResponse.class);
   
    // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat
    HeartbeatResponse lastHeartbeatResponse = rmNode.getLastHeartBeatResponse();
    if (remoteNodeStatus.getResponseId() + 1 == lastHeartbeatResponse
        .getResponseId()) {
      LOG.info("Received duplicate heartbeat from node "
          + rmNode.getNodeAddress());
      nodeHeartBeatResponse.setHeartbeatResponse(lastHeartbeatResponse);
      return nodeHeartBeatResponse;
    } else if (remoteNodeStatus.getResponseId() + 1 < lastHeartbeatResponse
        .getResponseId()) {
      LOG.info("Too far behind rm response id:"
          + lastHeartbeatResponse.getResponseId() + " nm response id:"
          + remoteNodeStatus.getResponseId());
      // TODO: Just sending reboot is not enough. Think more.
      this.rmContext.getDispatcher().getEventHandler().handle(
          new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING));
      return reboot;
    }

    // Heartbeat response
    HeartbeatResponse latestResponse = recordFactory
        .newRecordInstance(HeartbeatResponse.class);
    latestResponse.setResponseId(lastHeartbeatResponse.getResponseId() + 1);
    rmNode.updateHeartbeatResponseForCleanup(latestResponse);
    latestResponse.setNodeAction(NodeAction.NORMAL);

    // Check if node's masterKey needs to be updated and if the currentKey has
    // roller over, send it across
    if (isSecurityEnabled()) {

      boolean shouldSendMasterKey = false;

      MasterKey nextMasterKeyForNode =
          this.containerTokenSecretManager.getNextKey();
      if (nextMasterKeyForNode != null) {
        // nextMasterKeyForNode can be null if there is no outstanding key that
        // is in the activation period.
        MasterKey nodeKnownMasterKey = request.getLastKnownMasterKey();
        if (nodeKnownMasterKey.getKeyId() != nextMasterKeyForNode.getKeyId()) {
          shouldSendMasterKey = true;
        }
      }
      if (shouldSendMasterKey) {
        latestResponse.setMasterKey(nextMasterKeyForNode);
      }
    }

    // 4. Send status to RMNode, saving the latest response.
    this.rmContext.getDispatcher().getEventHandler().handle(
        new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
            remoteNodeStatus.getContainersStatuses(),
            remoteNodeStatus.getKeepAliveApplications(), latestResponse));

    nodeHeartBeatResponse.setHeartbeatResponse(latestResponse);
    return nodeHeartBeatResponse;
  }
View Full Code Here


          return new ResourceTracker() {

            @Override
            public NodeHeartbeatResponse nodeHeartbeat(
                NodeHeartbeatRequest request) throws YarnRemoteException {
              NodeHeartbeatResponse response = recordFactory.newRecordInstance(
                  NodeHeartbeatResponse.class);
              try {
                response.setHeartbeatResponse(rt.nodeHeartbeat(request)
                    .getHeartbeatResponse());
              } catch (IOException ioe) {
                LOG.info("Exception in heartbeat from node " +
                    request.getNodeStatus().getNodeId(), ioe);
                throw RPCUtil.getRemoteException(ioe);
              }
              return response;
            }

            @Override
            public RegisterNodeManagerResponse registerNodeManager(
                RegisterNodeManagerRequest request)
                throws YarnRemoteException {
              RegisterNodeManagerResponse response = recordFactory.
                  newRecordInstance(RegisterNodeManagerResponse.class);
              try {
                response.setRegistrationResponse(rt
                    .registerNodeManager(request)
                    .getRegistrationResponse());
              } catch (IOException ioe) {
                LOG.info("Exception in node registration from "
                    + request.getNodeId().toString(), ioe);
View Full Code Here

      public void run() {
        int lastHeartBeatID = 0;
        while (!isStopped) {
          // Send heartbeat
          try {
            NodeHeartbeatResponse response = null;
            NodeStatus nodeStatus = getNodeStatusAndUpdateContainersInContext();
            nodeStatus.setResponseId(lastHeartBeatID);
           
            NodeHeartbeatRequest request = recordFactory
                .newRecordInstance(NodeHeartbeatRequest.class);
            request.setNodeStatus(nodeStatus);
            request
              .setLastKnownContainerTokenMasterKey(NodeStatusUpdaterImpl.this.context
                .getContainerTokenSecretManager().getCurrentKey());
            request
              .setLastKnownNMTokenMasterKey(NodeStatusUpdaterImpl.this.context
                .getNMTokenSecretManager().getCurrentKey());
            response = resourceTracker.nodeHeartbeat(request);
            //get next heartbeat interval from response
            nextHeartBeatInterval = response.getNextHeartBeatInterval();
            updateMasterKeys(response);

            if (response.getNodeAction() == NodeAction.SHUTDOWN) {
              LOG
                .warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
                    + " hence shutting down.");
              LOG.warn("Message from ResourceManager: "
                  + response.getDiagnosticsMessage());
              dispatcher.getEventHandler().handle(
                  new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
              break;
            }
            if (response.getNodeAction() == NodeAction.RESYNC) {
              LOG.warn("Node is out of sync with ResourceManager,"
                  + " hence resyncing.");
              LOG.warn("Message from ResourceManager: "
                  + response.getDiagnosticsMessage());
              // Invalidate the RMIdentifier while resync
              NodeStatusUpdaterImpl.this.rmIdentifier =
                  ResourceManagerConstants.RM_INVALID_IDENTIFIER;
              dispatcher.getEventHandler().handle(
                  new NodeManagerEvent(NodeManagerEventType.RESYNC));
              break;
            }

            lastHeartBeatID = response.getResponseId();
            List<ContainerId> containersToCleanup = response
                .getContainersToCleanup();
            if (containersToCleanup.size() != 0) {
              dispatcher.getEventHandler().handle(
                  new CMgrCompletedContainersEvent(containersToCleanup,
                      CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER));
            }
            List<ApplicationId> appsToCleanup =
                response.getApplicationsToCleanup();
            //Only start tracking for keepAlive on FINISH_APP
            trackAppsForKeepAlive(appsToCleanup);
            if (appsToCleanup.size() != 0) {
              dispatcher.getEventHandler().handle(
                  new CMgrCompletedAppsEvent(appsToCleanup));
            }
          } catch (ConnectException e) {
            //catch and throw the exception if tried MAX wait time to connect RM
            dispatcher.getEventHandler().handle(
                new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
            throw new YarnRuntimeException(e);
          } catch (Throwable e) {

            // TODO Better error handling. Thread can die with the rest of the
            // NM still running.
            LOG.error("Caught exception in status-updater", e);
          } finally {
            synchronized (heartbeatMonitor) {
              nextHeartBeatInterval = nextHeartBeatInterval <= 0 ?
                  YarnConfiguration.DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS :
                    nextHeartBeatInterval;
              try {
                heartbeatMonitor.wait(nextHeartBeatInterval);
              } catch (InterruptedException e) {
                // Do Nothing
              }
            }
          }
        }
      }

      private void updateMasterKeys(NodeHeartbeatResponse response) {
        // See if the master-key has rolled over
        MasterKey updatedMasterKey = response.getContainerTokenMasterKey();
        if (updatedMasterKey != null) {
          // Will be non-null only on roll-over on RM side
          context.getContainerTokenSecretManager().setMasterKey(updatedMasterKey);
        }
       
        updatedMasterKey = response.getNMTokenMasterKey();
        if (updatedMasterKey != null) {
          context.getNMTokenSecretManager().setMasterKey(updatedMasterKey);
        }
      }
    };
View Full Code Here

            @Override
            public NodeHeartbeatResponse nodeHeartbeat(
                NodeHeartbeatRequest request) throws YarnException,
                IOException {
              NodeHeartbeatResponse response = recordFactory.newRecordInstance(
                  NodeHeartbeatResponse.class);
              try {
                response = rt.nodeHeartbeat(request);
              } catch (YarnException e) {
                LOG.info("Exception in heartbeat from node " +
View Full Code Here

    Assert.assertNotNull(nmToken
        + "Registration should cause a key-update!", nmTokenMasterKey);
   
    dispatcher.await();

    NodeHeartbeatResponse response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "First heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "First heartbeat after registration shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Even second heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Even second heartbeat after registration shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
   
    dispatcher.await();

    // Let's force a roll-over
    rm.getRMContainerTokenSecretManager().rollMasterKey();
    rm.getRMNMTokenSecretManager().rollMasterKey();

    // Heartbeats after roll-over and before activation should be fine.
    response = nm.nodeHeartbeat(true);
    Assert.assertNotNull(containerToken +
        "Heartbeats after roll-over and before activation should not err out.",
        response.getContainerTokenMasterKey());
    Assert.assertNotNull(nmToken +
        "Heartbeats after roll-over and before activation should not err out.",
        response.getNMTokenMasterKey());
   
    Assert.assertEquals(containerToken +
        "Roll-over should have incremented the key-id only by one!",
        containerTokenMasterKey.getKeyId() + 1,
        response.getContainerTokenMasterKey().getKeyId());
    Assert.assertEquals(nmToken +
        "Roll-over should have incremented the key-id only by one!",
        nmTokenMasterKey.getKeyId() + 1,
        response.getNMTokenMasterKey().getKeyId());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Second heartbeat after roll-over shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Second heartbeat after roll-over shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    // Let's force activation
    rm.getRMContainerTokenSecretManager().activateNextMasterKey();
    rm.getRMNMTokenSecretManager().activateNextMasterKey();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken
        + "Activation shouldn't cause any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken
        + "Activation shouldn't cause any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    response = nm.nodeHeartbeat(true);
    Assert.assertNull(containerToken +
        "Even second heartbeat after activation shouldn't get any key updates!",
        response.getContainerTokenMasterKey());
    Assert.assertNull(nmToken +
        "Even second heartbeat after activation shouldn't get any key updates!",
        response.getNMTokenMasterKey());
    dispatcher.await();

    rm.stop();
  }
View Full Code Here

    rm.start();

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);

    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(4000, nodeHeartbeat.getNextHeartBeatInterval());

    NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true);
    Assert.assertEquals(4000, nodeHeartbeat2.getNextHeartBeatInterval());

  }
View Full Code Here

   
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int metricCount = metrics.getNumDecommisionedNMs();

    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    // To test that IPs also work
    String ip = NetUtils.normalizeHostName("localhost");
    writeToHostsFile("host1", ip);

    rm.getNodesListManager().refreshNodes(conf);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert
        .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue("Node is not decommisioned.", NodeAction.SHUTDOWN
        .equals(nodeHeartbeat.getNodeAction()));

    checkDecommissionedNMCount(rm, ++metricCount);

    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    Assert.assertEquals(metricCount, ClusterMetrics.getMetrics()
      .getNumDecommisionedNMs());
  }
View Full Code Here

    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    MockNM nm3 = rm.registerNode("localhost:4433", 1024);

    int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();

    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));

    // To test that IPs also work
    String ip = NetUtils.normalizeHostName("localhost");
    writeToHostsFile("host2", ip);

    rm.getNodesListManager().refreshNodes(conf);

    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertTrue("The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
    checkDecommissionedNMCount(rm, ++metricCount);

    nodeHeartbeat = nm3.nodeHeartbeat(true);
    Assert.assertTrue("The decommisioned metrics are not updated",
        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
    checkDecommissionedNMCount(rm, ++metricCount);
  }
View Full Code Here

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    writeToHostsFile("host1");
    conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile
        .getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        "Node should not have been decomissioned.",
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals("Node should have been decomissioned but is in state" +
        nodeHeartbeat.getNodeAction(),
        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

    MockNM nm1 = rm.registerNode("host1:1234", 5120);
    MockNM nm2 = rm.registerNode("host2:5678", 10240);
    ClusterMetrics metrics = ClusterMetrics.getMetrics();
    assert(metrics != null);
    int initialMetricCount = metrics.getNumDecommisionedNMs();
    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals(
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    writeToHostsFile("host2");
    conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile
        .getAbsolutePath());
    rm.getNodesListManager().refreshNodes(conf);
    nodeHeartbeat = nm1.nodeHeartbeat(true);
    Assert.assertEquals(
        "Node should not have been decomissioned.",
        NodeAction.NORMAL,
        nodeHeartbeat.getNodeAction());
    nodeHeartbeat = nm2.nodeHeartbeat(true);
    Assert.assertEquals("Node should have been decomissioned but is in state" +
        nodeHeartbeat.getNodeAction(),
        NodeAction.SHUTDOWN, nodeHeartbeat.getNodeAction());
    checkDecommissionedNMCount(rm, ++initialMetricCount);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.