/*
Copyright (c) 2012 LinkedIn Corp.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.linkedin.d2.loadbalancer;
import com.linkedin.d2.D2BaseTest;
import com.linkedin.d2.balancer.clients.DynamicClient;
import com.linkedin.d2.balancer.util.LoadBalancerClientCli;
import com.linkedin.d2.balancer.util.LoadBalancerEchoServer;
import com.linkedin.d2.balancer.util.LoadBalancerUtil;
import com.linkedin.d2.quorum.ZKPeer;
import com.linkedin.d2.quorum.ZKQuorum;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterTest;
import org.testng.annotations.Test;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
@Test (groups = {"d2integration"})
public class TestD2ZKQuorumFailover extends D2BaseTest
{
private static final Logger _log = LoggerFactory.getLogger(TestD2ZKQuorumFailover.class);
private static final int QUORUM_SIZE = 9;
private List<LoadBalancerEchoServer> _echoServers;
private String[] _zkHosts;
private ZKQuorum _quorum;
private LoadBalancerClientCli _cli;
private DynamicClient _client;
private String _zkUriString;
private void setup() throws IOException, Exception
{
// Start _quorum
_quorum = new ZKQuorum(QUORUM_SIZE);
_quorum.startAll();
_quorum.assertAllPeersUp();
_zkUriString = "zk://"+_quorum.getHosts();
_zkHosts = _quorum.getHosts().split(",");
// Register clusters/services with zookeeper _quorum
LoadBalancerClientCli.runDiscovery(_quorum.getHosts(), "/d2", D2_CONFIG_DATA);
// Echo servers startup
startAllEchoServers();
assertAllEchoServersRunning(_echoServers);
// Get LoadBalancer Client
_cli = new LoadBalancerClientCli(_quorum.getHosts(), "/d2");
_client = _cli.createZKFSTogglingLBClient(_quorum.getHosts(), "/d2", null);
assertAllEchoServersRegistered(_cli.getZKClient(), _zkUriString, _echoServers);
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
@AfterMethod
public void teardownMethod() throws Exception
{
teardown();
}
@AfterTest
public void teardownTest() throws Exception
{
teardown();
}
//D2 With _quorum Follower Peer shutdown, restart
public void testD2WithQuorumFollowerPeerServerDown() throws Exception
{
setup();
// Shutdown zk quorum follower server (non-leader member of zk _quorum)
ZKPeer follower = _quorum.getQuorumFollower();
follower.shutdownPeerZkServer();
assertFalse(follower.getZKServer().isRunning(), "Quorum Peer #"+follower.getId()+"/port "+follower.getClientPort()+" server has not shut down.");
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
// Restart follower zk server
follower.startupPeerZkServer();
// Assert requests are processed while follower is restarting
long start = System.currentTimeMillis();
while (follower.getZKServer() == null && System.currentTimeMillis() < start + TIMEOUT)
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
_quorum.assertAllPeersUp();
// Assert requests are processed after follower restarted
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
public void testD2WithQuorumFollowerPeerDown() throws Exception
{
setup();
// Shutdown zk _quorum follower peer (non-leader member of a zk _quorum)
ZKPeer follower = _quorum.getQuorumFollower();
follower.shutdownPeer();
// Assert requests are processed while follower goes down
long start = System.currentTimeMillis();
while (follower.getZKServer() != null && System.currentTimeMillis() < start + TIMEOUT)
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
assertFalse(follower.isRunning(), "_quorum Peer #"+follower.getId()+"/port "+follower.getClientPort()+" server has not shut down.");
// Restart peer
_quorum.restart(follower.getId());
// Assert requests processed while follower is restarting
start = System.currentTimeMillis();
while ((_quorum.getPeerZKServer(follower.getId()) == null || ! _quorum.getPeerZKServer(follower.getId()).isRunning()) && System.currentTimeMillis() < start + TIMEOUT)
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
// Assert requests are processed after peer restart
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
public void testD2WithQuorumFollowerServerKilled() throws Exception
{
setup();
// Kill quorum peer follower zk server
ZKPeer follower = _quorum.getQuorumFollower();
// Simulate zk server crash
follower.killPeerZkServer();
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
assertFalse(follower.getZKServer().isRunning(), "quorum Peer #"+follower.getId()+"/port "+follower.getClientPort()+" server has not shut down.");
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
// D2 With _quorum Leader Peer shutdown, restart
public void testD2WithQuorumLeaderPeerServerDown() throws Exception
{
setup();
// Shutdown quorum leader zk server
ZKPeer peer = _quorum.getQuorumLeader();
if (peer != null)
{
peer.shutdownPeerZkServer();
assertFalse(peer.getZKServer().isRunning(), "_quorum Peer #"+peer.getId()+"/port "+peer.getClientPort()+" server has not shut down.");
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
// Restart leader zk server
peer.startupPeerZkServer();
// Assert requests are processed while peer is restarting
long start = System.currentTimeMillis();
while (peer.getZKServer() == null && System.currentTimeMillis() < start + TIMEOUT)
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
_quorum.assertAllPeersUp();
// After restart
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
else
{
fail("Quorum is unable to identify zk leader. No tests were executed.");
}
}
public void testD2WithQuorumLeaderPeerDown() throws Exception
{
setup();
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
// Shutdown zk _quorum leader peer
ZKPeer peer = _quorum.getQuorumLeader();
if (peer != null)
{
int peerId = peer.getId();
peer.shutdownPeer();
// Verify requests are processed while leader goes down
long start = System.currentTimeMillis();
while (_quorum.getQuorumLeader() != null && _quorum.getQuorumLeader().equals(peer) && System.currentTimeMillis() < (start + TIMEOUT))
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
start = System.currentTimeMillis();
while (_quorum.getQuorumLeader() == null && System.currentTimeMillis() < (start + TIMEOUT))
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA); // wait while zookeeper is establishing a new leader
}
if (_quorum.getQuorumLeader() == null)
{
_quorum.restartAll();
}
else
{
assertTrue(! _quorum.getQuorumLeader().equals(peer), "No new _quorum leader was established. New leader id=" + _quorum.getQuorumLeader().getId());
// Restart leader peer
_quorum.restart(peerId);
}
start = System.currentTimeMillis();
peer = _quorum.get(peerId);
// Verify requests are processed while peer is restarting
while (System.currentTimeMillis() < (start + TIMEOUT) && ( peer.getZKServer() == null || ! _quorum.areAllPeersUp() || ! peer.getZKServer().isRunning()))
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
// After restart
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
else
{
fail("Quorum is unable to identify zk leader. No tests were executed.");
}
}
public void testD2WithQuorumLeaderServerKilled() throws Exception
{
setup();
// Kill _quorum leader peer zk server
ZKPeer peer = _quorum.getQuorumLeader();
if (peer != null)
{
peer.killPeerZkServer();
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
assertFalse(peer.getZKServer().isRunning(),"_quorum Peer #"+peer.getId()+"/port "+peer.getClientPort()+" server has not shut down.");
// Restart peer zk server
peer.startupPeerZkServer();
// Assert requests are processed while peer is restarting
long start = System.currentTimeMillis();
while (peer.getZKServer() == null && System.currentTimeMillis() < start + TIMEOUT)
{
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
_quorum.assertAllPeersUp();
// After restart
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
else
{
fail("Quorum unable to identify zk leader. No tests were executed.");
}
}
public void testD2WithQuorumPeerAdded() throws Exception
{
setup();
// Add new peer to ZK _quorum
int newPeerId = _quorum.addPeer();
_quorum.start(newPeerId);
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
// Remove peer
_quorum.get(newPeerId).shutdown(true);
assertQuorumProcessAllRequests(D2_CONFIG_DATA);
}
private void assertQuorumProcessAllRequests(String clustersData) throws Exception
{
_quorum.printPeersStats();
assertQuorumProcessAllRequests(1, clustersData, _zkUriString, _cli, _client, null);
}
private void startAllEchoServers() throws Exception
{
_echoServers = new ArrayList<LoadBalancerEchoServer>();
Map<Integer, Double> partitionWeight = new HashMap<Integer, Double>();
partitionWeight.put(new Integer(1), new Double(1.0d));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT1_1, "cluster-1", "service-1_1", "service-1_2", "service-1_3" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT1_2, "cluster-1", "service-1_1", "service-1_2", "service-1_3" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT2_1, "cluster-2", "service-2_1", "service-2_2", "service-2_3" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT2_2, "cluster-2", "service-2_1", "service-2_2", "service-2_3" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT3, "cluster-3", "service-3_1", "service-3_2", "service-3_3" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT4, "cluster-4", partitionWeight, "service-4_11", "service-4_12", "service-4_13" ));
_echoServers.add(startEchoServer(getHost(_zkHosts[0]), getPort(_zkHosts[0]), ECHO_SERVER_HOST, ECHO_SERVER_PORT5, "cluster-4", partitionWeight, "service-4_11", "service-4_12", "service-4_13" ));
}
private void teardown() throws Exception
{
try
{
_quorum.shutdownAll(true);
}
catch (Exception e)
{
}
try
{
LoadBalancerUtil.syncShutdownClient(_client, _log);
}
catch (Exception e)
{
}
try
{
_cli.shutdown();
}
catch (Exception e)
{
}
try
{
stopAllEchoServers(_echoServers);
}
catch (Exception e)
{
}
cleanupTempDir();
}
}