Package com.cloudera.flume.agent.diskfailover

Source Code of com.cloudera.flume.agent.diskfailover.TestDiskFailoverAgent

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.agent.diskfailover;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;

import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.agent.DirectMasterRPC;
import com.cloudera.flume.agent.FlumeNode;
import com.cloudera.flume.agent.LivenessManager;
import com.cloudera.flume.agent.LogicalNode;
import com.cloudera.flume.agent.MasterRPC;
import com.cloudera.flume.agent.durability.NaiveFileWALDeco;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.core.Driver;
import com.cloudera.flume.core.Driver.DriverState;
import com.cloudera.flume.master.FlumeMaster;
import com.cloudera.util.FileUtil;
import com.cloudera.util.NetUtils;

// TODO on failures, these tests leak data to the flume-${user} dir.
public class TestDiskFailoverAgent {
  public static final Logger LOG = LoggerFactory
      .getLogger(TestDiskFailoverAgent.class);

  FlumeMaster master = null;
  FlumeConfiguration cfg;
  File tmpDir;

  @Before
  public void setCfg() throws IOException {
    LOG.info("============================================================");

    tmpDir = FileUtil.mktempdir();

    // Isolate tests by only using simple cfg store
    cfg = FlumeConfiguration.createTestableConfiguration();
    cfg.set(FlumeConfiguration.MASTER_STORE, "memory");
    cfg.set(FlumeConfiguration.WEBAPPS_PATH, "build/webapps");
    cfg.set(FlumeConfiguration.AGENT_LOG_DIR_NEW, tmpDir.getAbsolutePath());
    org.apache.log4j.Logger.getLogger(NaiveFileWALDeco.class).setLevel(
        Level.DEBUG);
  }

  @After
  public void shutdownMaster() throws IOException {
    if (master != null) {
      master.shutdown();
      master = null;
    }
    FileUtil.rmr(tmpDir);
  }

  /**
   * This test starts a DFO agent that attempts to go to a port that shouldn't
   * be open. This triggers the error recovery mechanism. We then wait for 10s
   * and then simulate the node doing a heartbeat to a master which tells that
   * that node is decomissioned. As part of decomissioning, the act of closing
   * the dfo agent should not hang the node.
   *
   * @throws IOException
   * @throws FlumeSpecException
   */
  @Test
  public void testActiveDFOClose() throws InterruptedException, IOException,
      FlumeSpecException {
    final String lnode = "DFOSimple";
    final FlumeMaster master = new FlumeMaster(cfg);
    MasterRPC rpc = new DirectMasterRPC(master);

    final FlumeNode node = new FlumeNode(rpc, false, false);
    // should have nothing.
    assertEquals(0, node.getLogicalNodeManager().getNodes().size());
    LivenessManager liveMan = node.getLivenessManager();
    // update config node to something that will be interrupted.
    LOG.info("setting to invalid dfo host");
    master.getSpecMan().setConfig(lnode, "flow", "asciisynth(0)",
        "agentDFOSink(\"localhost\", 12345)");
    master.getSpecMan().addLogicalNode(NetUtils.localhost(), lnode);
    liveMan.heartbeatChecks();

    LogicalNode n = node.getLogicalNodeManager().get(lnode);
    Driver d = n.getDriver();
    assertTrue("Attempting to start driver timed out",
        d.waitForAtLeastState(DriverState.ACTIVE, 10000));

    // update config node to something that will be interrupted.
    LOG.info("!!! decommissioning node on master");
    master.getSpecMan().removeLogicalNode(lnode);

    // as node do heartbeat and update due to decommission
    liveMan.heartbeatChecks();
    LOG.info("!!! node should be decommissioning on node");
    assertTrue("Attempting to decommission driver timed out",
        d.waitForAtLeastState(DriverState.IDLE, 10000));

    assertEquals("Only expected default logical node", 1, node
        .getLogicalNodeManager().getNodes().size());
    assertNull(node.getLogicalNodeManager().get(lnode));
  }

  /**
   * This test starts a DFO agent that attempts to go to a port that shouldn't
   * be open. This triggers the error recovery mechanism. We then wait for 10s
   * and then simulate the node doing a heartbeat to a master which tells that
   * that node is decommissioned. As part of decommissioning, the act of closing
   * the dfo agent should not hang the node.
   *
   * This test differs from the previous by having an bad dns name/request that
   * will eventually fail (ubuntu/java1.6 takes about 10s)
   *
   * @throws IOException
   * @throws FlumeSpecException
   */
  @Test
  public void testActiveDFOCloseBadDNS() throws InterruptedException,
      IOException, FlumeSpecException {
    final String lnode = "DFOBadDNS";
    final FlumeMaster master = new FlumeMaster(cfg);
    MasterRPC rpc = new DirectMasterRPC(master);

    final FlumeNode node = new FlumeNode(rpc, false, false);
    // should have nothing.
    assertEquals(0, node.getLogicalNodeManager().getNodes().size());
    LivenessManager liveMan = node.getLivenessManager();
    // update config node to something that will be interrupted.
    LOG.info("setting to invalid dfo host");
    master.getSpecMan().setConfig("node2", "flow", "asciisynth(0)",
        "agentDFOSink(\"invalid\", 12346)");
    master.getSpecMan().addLogicalNode(NetUtils.localhost(), lnode);
    liveMan.heartbeatChecks();

    LogicalNode n = node.getLogicalNodeManager().get(lnode);
    Driver d = n.getDriver();
    assertTrue("Attempting to start driver timed out",
        d.waitForAtLeastState(DriverState.ACTIVE, 20000));

    // update config node to something that will be interrupted.
    LOG.info("!!! decommissioning node on master");
    master.getSpecMan().removeLogicalNode(lnode);

    liveMan.heartbeatChecks();
    LOG.info("!!! logical node should be decommissioning on node");
    assertTrue("Attempting to start driver timed out",
        d.waitForAtLeastState(DriverState.IDLE, 20000));
    LOG.info("Clean close.");

    // false means timeout, takes about 10 seconds to shutdown.
    assertTrue("Attempting to decommission driver timed out",
        d.waitForAtLeastState(DriverState.IDLE, 10000));

    assertEquals("Only expected default logical node", 1, node
        .getLogicalNodeManager().getNodes().size());
    assertNull(node.getLogicalNodeManager().get(lnode));
  }

  /**
   * This test starts a E2E agent that attempts to go to a port that shouldn't
   * be open. This triggers the error recovery mechanism. We then wait for 10s
   * and then simulate the node doing a heartbeat to a master which tells that
   * that node is decommissioned. As part of decommissioning, the act of closing
   * the E2E agent should not hang the node.
   */
  @Test
  public void testActiveE2ECloseSimple() throws InterruptedException,
      IOException, FlumeSpecException {
    final String lnode = "e2eSimple";
    final FlumeMaster master = new FlumeMaster(cfg);
    MasterRPC rpc = new DirectMasterRPC(master);

    final FlumeNode node = new FlumeNode(rpc, false, false);
    // should have nothing.
    assertEquals(0, node.getLogicalNodeManager().getNodes().size());

    LivenessManager liveMan = node.getLivenessManager();
    // update config node to something that will be interrupted.
    LOG.info("setting to invalid e2e host");
    master.getSpecMan().setConfig(lnode, "flow", "asciisynth(0)",
        "agentE2ESink(\"localhost\", 12347)");
    master.getSpecMan().addLogicalNode(NetUtils.localhost(), lnode);
    liveMan.heartbeatChecks();

    // TODO It we only wait for opening state, this test can hang
    LogicalNode n = node.getLogicalNodeManager().get(lnode);
    Driver d = n.getDriver();
    assertTrue("Attempting to start driver timed out",
        d.waitForAtLeastState(DriverState.ACTIVE, 10000));

    // update config node to something that will be interrupted.
    LOG.info("!!! decommissioning node on master");
    master.getSpecMan().removeLogicalNode(lnode);
    liveMan.heartbeatChecks();
    assertTrue("Attempting to stop driver timed out",
        d.waitForAtLeastState(DriverState.ERROR, 15000));
  }

  /**
   * This test starts a E2E agent that attempts to go to a port that shouldn't
   * be open. This triggers the error recovery mechanism. We then wait for 10s
   * and then simulate the node doing a heartbeat to a master which tells that
   * that node is decommissioned. As part of decommissioning, the act of closing
   * the E2E agent should not hang the node.
   *
   * This test differs from the previous by having an bad dns name/request that
   * will eventually fail (ubuntu/java1.6 takes about 10s)
   *
   * @throws IOException
   * @throws FlumeSpecException
   */
  @Test
  public void testActiveE2ECloseBadDNS() throws InterruptedException,
      IOException, FlumeSpecException {
    final String lnode = "e2eBadDNS";
    final FlumeMaster master = new FlumeMaster(cfg);
    MasterRPC rpc = new DirectMasterRPC(master);

    final FlumeNode node = new FlumeNode(rpc, false, false);
    // should have nothing.
    assertEquals(0, node.getLogicalNodeManager().getNodes().size());

    LivenessManager liveMan = node.getLivenessManager();
    // update config node to something that will be interrupted.
    LOG.info("setting to invalid e2e host");
    master.getSpecMan().setConfig(lnode, "flow", "asciisynth(0)",
        "agentE2ESink(\"localhost\", 12348)");
    master.getSpecMan().addLogicalNode(NetUtils.localhost(), lnode);
    liveMan.heartbeatChecks();

    // TODO It we only wait for opening state, this test can hang
    LogicalNode n = node.getLogicalNodeManager().get(lnode);
    Driver d = n.getDriver();
    assertTrue("Attempting to start driver timed out",
        d.waitForAtLeastState(DriverState.ACTIVE, 15000));

    // update config node to something that will be interrupted.
    LOG.info("!!! decommissioning node on master");
    master.getSpecMan().removeLogicalNode(lnode);
    liveMan.heartbeatChecks();
    assertTrue("Attempting to stop driver timed out",
        d.waitForAtLeastState(DriverState.ERROR, 15000));
  }

}
TOP

Related Classes of com.cloudera.flume.agent.diskfailover.TestDiskFailoverAgent

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.