Package com.cloudera.flume.agent

Source Code of com.cloudera.flume.agent.LivenessManager$HeartbeatThread

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.cloudera.flume.agent;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.agent.durability.WALCompletionNotifier;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.FlumeConfigData;
import com.cloudera.flume.handlers.endtoend.AckListener.Empty;
import com.cloudera.util.Clock;
import com.google.common.base.Preconditions;

/**
* This manages heartbeating the node with the master, notifications of
* configuration updates, and spawning/decommissioning of logical nodes.
*
* TODO (jon) rename to HeartbeatManager
*/
public class LivenessManager {
  static final Logger LOG = LoggerFactory.getLogger(LivenessManager.class);
  final long BACKOFF_MILLIS;

  MasterRPC master;
  LogicalNodeManager nodesman;
  HeartbeatThread t;
  final WALAckManager ackcheck;
  final WALCompletionNotifier walman;

  class RetryAckListener extends Empty {
    @Override
    public void end(String group) throws IOException {
      walman.toAcked(group);
    }

    @Override
    public void expired(String group) throws IOException {
      walman.retry(group);
    }
  };

  /**
   * Create a liveness manager with the specified managers.
   *
   * LogicalNodeManager is necessary for tracking physical/logical node
   * mappings. MasterRPC is the connection to the master, WALCompletionNotifier
   * is necessary for check on acks
   */
  public LivenessManager(LogicalNodeManager nodesman, MasterRPC master,
      WALCompletionNotifier walman) {
    Preconditions.checkNotNull(nodesman);
    Preconditions.checkNotNull(master);
    BACKOFF_MILLIS = FlumeConfiguration.get().getHeartbeatBackoff();
    this.walman = walman;
    this.nodesman = nodesman;
    this.master = master;
    this.t = new HeartbeatThread();
    this.ackcheck = new WALAckManager(master, new RetryAckListener(),
        FlumeConfiguration.get().getAgentAckedRetransmit());
  }

  /**
   * Checks against the master to get new physical nodes or to learn about
   * decommissioned logical nodes
   *
   * Invariant: There is always at least logical per physical node. When there
   * is one, it has the same name as the physical node.
   */
  public void checkLogicalNodes() throws IOException, InterruptedException {
    // TODO (jon) Make this a single batched rpc call instead of
    // multiple calls

    String physNode = nodesman.getPhysicalNodeName();
    // get logical nodes list for this node.
    List<String> lns = master.getLogicalNodes(physNode);
    if (!lns.contains(physNode)) {
      // physical node node present? make sure it stays around.
      lns = new ArrayList<String>(lns); // copy the unmodifiable list
      lns.add(physNode);
    }
    for (String ln : lns) {
      // a logical node is not present? spawn it.
      if (nodesman.get(ln) == null) {
        try {
          nodesman.spawn(ln, "null", "null");
        } catch (FlumeSpecException e) {
          LOG.error("This should never happen", e);
        }
      }
    }
    // Update the Chokeinformation for the ChokeManager

    FlumeNode.getInstance().getChokeManager().updateChokeLimitMap(
        master.getChokeMap(physNode));

    nodesman.decommissionAllBut(lns);
  }

  /**
   * Checks registered nodes to see if they need a new configuraiton.
   */
  public void checkLogicalNodeConfigs() throws IOException {
    // TODO (jon) batch all these rpc requests into one multi-part rpc
    // request.

    for (LogicalNode nd : nodesman.getNodes()) {
      boolean needsCfg = master.heartbeat(nd);
      if (needsCfg) {
        final FlumeConfigData data = master.getConfig(nd);
        if (data == null) {
          LOG.debug("Logical Node '" + nd.getName()
              + "' not configured on master");
        }
        final LogicalNode node = nd;
        // TODO This is quite gross, but prevents heartbeat from blocking
        new Thread("SpawningLogicalNode " + nd.getName()) {
          public void run() {
            node.checkConfig(data);
          }
        }.start();
      }
    }
  }

  /**
   * All the core functionality of a heartbeat accessible without having to be
   * in the heartbeat thread.
   */
  public void heartbeatChecks() throws IOException, InterruptedException {
    // these will call ensure open on the master
    checkLogicalNodes();

    checkLogicalNodeConfigs();

    // check for end to end acks.
    ackcheck.checkAcks(); // check for acks on master

    // check local ack ages. If too old, retry those event groups.
    ackcheck.checkRetry();

  }

  /**
   * This thread periodically contacts the master with a heartbeat.
   */
  class HeartbeatThread extends Thread {
    volatile boolean done = false;
    long backoff = BACKOFF_MILLIS;
    long backoffLimit = FlumeConfiguration.get().getNodeHeartbeatBackoffLimit();
    long heartbeatPeriod = FlumeConfiguration.get().getConfigHeartbeatPeriod();
    CountDownLatch stopped = new CountDownLatch(1);

    HeartbeatThread() {
      super("Heartbeat");
    }

    public void run() {
      try {
        while (!done) {
          try {
            heartbeatChecks();
            backoff = BACKOFF_MILLIS; // was successful, reset backoff

            Clock.sleep(heartbeatPeriod);

          } catch (Exception e) {
            backoff *= 2; // sleep twice as long
            backoff = backoff > backoffLimit ? backoffLimit : backoff;

            LOG.warn("Connection to master(s) failed, " + e.getMessage()
                + ". Backing off for " + backoff + " ms ");
            LOG.debug("Current master is " + master.toString(), e);

            try {
              master.close();
            } catch (IOException e1) {
              LOG.error("Failed when attempting to close master", e1);
            }

            Clock.sleep(backoff);
          }
        }

      } catch (InterruptedException e) {
        LOG.error("Heartbeat interrupted, this is not expected!", e);
      }
      stopped.countDown();
    }

  };

  /**
   * Starts the heartbeat thread and then returns.
   */
  public void start() {
    t.start();
  }

  public void stop() {
    CountDownLatch stopped = t.stopped;
    t.done = true;
    try {
      stopped.await();
    } catch (InterruptedException e) {
      LOG.error("Problem waiting for livenessManager to stop", e);
    }
  }

  public WALAckManager getAckChecker() {
    return ackcheck;
  }
}
TOP

Related Classes of com.cloudera.flume.agent.LivenessManager$HeartbeatThread

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.