/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.io.FileOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import java.util.ArrayList;
import java.util.Random;
import java.net.InetSocketAddress;
import java.net.NetworkInterface;
import java.util.concurrent.atomic.AtomicInteger;
import junit.framework.Assert;
import org.apache.zookeeper.server.NIOServerCnxnFactory;
import org.apache.zookeeper.server.ZooKeeperServer;
import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.server.ServerConfig;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster.ShutdownInterface;
import org.apache.hadoop.hdfs.MiniDFSCluster.ShutDownUtil;
import org.apache.hadoop.hdfs.protocol.AvatarConstants;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.AvatarNode;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.Standby;
import org.apache.hadoop.hdfs.server.namenode.NNStorageDirectoryRetentionManager;
import org.apache.hadoop.hdfs.server.datanode.AvatarDataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.net.StaticMapping;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.net.DNSToSwitchMapping;
/**
* This class manages a Avatar/HDFS cluster with all nodes running
* locally.
* To synchronize the AvatarNodes, it uses a local ZooKeeper
* server.
*/
public class MiniAvatarCluster {
public static final String NAMESERVICE_ID_PREFIX = "nameserviceId";
public static int currNSId = 0;
public static int instantiationRetries = 15;
public static final String JID = "test-journal";
public static class DataNodeProperties implements ShutdownInterface {
public AvatarDataNode datanode;
public Configuration conf;
public String[] dnArgs;
DataNodeProperties(AvatarDataNode node, Configuration conf, String[] args) {
this.datanode = node;
this.conf = conf;
this.dnArgs = args;
}
@Override
public void shutdown() throws IOException {
if (this.datanode != null)
this.datanode.shutdown();
}
}
public static enum AvatarState {
ACTIVE,
STANDBY,
DEAD
}
public static class AvatarInfo implements ShutdownInterface {
public AvatarNode avatar;
AvatarState state;
int nnPort;
int nnDnPort;
int httpPort;
int rpcPort;
String startupOption;
AvatarInfo(AvatarNode avatar, AvatarState state,
int nnPort, int nnDnPort, int httpPort,
int rpcPort, String startupOption) {
this.avatar = avatar;
this.state = state;
this.nnPort = nnPort;
this.nnDnPort = nnDnPort;
this.httpPort = httpPort;
this.rpcPort = rpcPort;
this.startupOption = startupOption;
}
@Override
public void shutdown() throws IOException {
if (this.avatar != null)
this.avatar.shutdown(true);
}
}
private static final Log LOG = LogFactory.getLog(MiniAvatarCluster.class);
private static final String DEFAULT_TEST_DIR =
"build/contrib/highavailability/test/data";
public static final String TEST_DIR =
new File(System.getProperty("test.build.data", DEFAULT_TEST_DIR)).
getAbsolutePath();
private static final AtomicInteger ClusterId = new AtomicInteger(1);
private static final String ZK_DATA_DIR = TEST_DIR + "/zk.data";
private static final String ZK_CONF_FILE = TEST_DIR + "/zk.conf";
public static final int zkClientPort = MiniDFSCluster.getFreePort();
private static String baseAvatarDir;
private static String dataDir;
private int numDataNodes;
private boolean format;
private String[] racks;
private String[] hosts;
private boolean federation;
private NameNodeInfo[] nameNodes;
private final boolean enableQJM;
private StartupOption startOpt;
private final int numJournalNodes;
private MiniJournalCluster journalCluster = null;
private Configuration conf;
/**
* Some test cases only work with FileJournalManager, need a way to tell
* if QJM is enabled here.
*/
public boolean isUsingJournalCluster() {
return journalCluster != null;
}
public MiniJournalCluster getJournalCluster() {
if (journalCluster == null) {
throw new IllegalArgumentException(
"MiniAvatarCluster not configured to use journal cluster");
}
return journalCluster;
}
public class NameNodeInfo {
Configuration conf;
public ArrayList<AvatarInfo> avatars = null;
private final String fsimage0Dir;
private final String fsimage1Dir;
private final String fsedits0Dir;
private final String fsedits1Dir;
private final String fsimagelocalDir;
private final String fseditslocalDir;
private final int nnPort;
private final int nn0Port;
private final int nn1Port;
private final int nnDnPort;
private final int nnDn0Port;
private final int nnDn1Port;
private final int httpPort;
private final int http0Port;
private final int http1Port;
private final int rpcPort;
private final int rpc0Port;
private final int rpc1Port;
private Configuration clientConf;
private Configuration a0Conf;
private Configuration a1Conf;
private final String avatarDir;
String nameserviceId;
NameNodeInfo(int nnIndex) {
avatarDir = baseAvatarDir;
fsimagelocalDir = avatarDir + "/fsimagelocal-"
+ FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD;
fseditslocalDir = avatarDir + "/fseditslocal-"
+ FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD;
fsimage0Dir = avatarDir + "/fsimage0";
fsimage1Dir = avatarDir + "/fsimage1";
fsedits0Dir = avatarDir + "/fsedits0";
fsedits1Dir = avatarDir + "/fsedits1";
rpcPort = nnPort = MiniDFSCluster.getFreePort();
nnDnPort = MiniDFSCluster.getFreePort();
httpPort = MiniDFSCluster.getFreePort();
rpc0Port = nn0Port = MiniDFSCluster.getFreePorts(2);
nnDn0Port = MiniDFSCluster.getFreePort();
http0Port = MiniDFSCluster.getFreePort();
rpc1Port = nn1Port = MiniDFSCluster.getFreePorts(2);
nnDn1Port = MiniDFSCluster.getFreePort();
http1Port = MiniDFSCluster.getFreePort();
}
public void setAvatarNodes(ArrayList<AvatarInfo> avatars) {
this.avatars = avatars;
}
void unlockStorageDirectory(String instance) {
if (!instance.equals("zero") && !instance.equals("one")) {
throw new IllegalArgumentException(
"Specify one or zero, invalid argument : " + instance);
}
new File(fsimagelocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, instance), "in_use.lock")
.delete();
new File(fseditslocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, instance), "in_use.lock")
.delete();
}
public void initClientConf(Configuration conf) {
clientConf = new Configuration(conf);
clientConf.set("fs.default.name", "hdfs://localhost:" + nnPort);
clientConf.set("fs.default.name0", "hdfs://127.0.0.1:" + nn0Port);
clientConf.set("fs.default.name1", "hdfs://127.0.0.1:" + nn1Port);
clientConf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY, "localhost:"
+ nnPort);
clientConf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY + "0", "127.0.0.1:"
+ nn0Port);
clientConf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY + "1", "127.0.0.1:"
+ nn1Port);
clientConf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort);
clientConf.set("dfs.namenode.dn-address0", "127.0.0.1:" + nnDn0Port);
clientConf.set("dfs.namenode.dn-address1", "127.0.0.1:" + nnDn1Port);
clientConf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedAvatarFileSystem");
clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
// Lower the number of retries to close connections quickly.
clientConf.setInt("ipc.client.connect.max.retries", 3);
}
public void initGeneralConf(Configuration conf, String nameserviceId) {
// overwrite relevant settings
initClientConf(conf);
this.nameserviceId = nameserviceId;
// avatar nodes
if (federation) {
conf.set("dfs.namenode.rpc-address0", "127.0.0.1:" + rpc0Port);
conf.set("dfs.namenode.rpc-address1", "127.0.0.1:" + rpc1Port);
} else {
conf.set("fs.default.name", "hdfs://localhost:" + nnPort);
conf.set("fs.default.name0", "hdfs://localhost:" + nn0Port);
conf.set("fs.default.name1", "hdfs://localhost:" + nn1Port);
conf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort);
conf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY, "localhost:" + nnPort);
conf.set("dfs.http.address", "127.0.0.1:" + httpPort);
}
// Enable avatar testing framework for unit tests.
conf.setFloat("dfs.avatarnode.failover.sample.percent", 1.0f);
conf.set("dfs.avatarnode.failover.test.data.dir", avatarDir);
conf.set("dfs.namenode.dn-address0", "127.0.0.1:" + nnDn0Port);
conf.set("dfs.namenode.dn-address1", "127.0.0.1:" + nnDn1Port);
conf.set("dfs.http.address0", "127.0.0.1:" + http0Port);
conf.set("dfs.http.address1", "127.0.0.1:" + http1Port);
conf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY + "0", "127.0.0.1:"
+ nn0Port);
conf.set(NameNode.DFS_NAMENODE_RPC_ADDRESS_KEY + "1", "127.0.0.1:"
+ nn1Port);
// set the shared edits and image dirs.
if (enableQJM) {
String journalURI = journalCluster.getQuorumJournalURI(JID).toString();
// set the edits dir
conf.set("dfs.name.edits.dir.shared0", journalURI + "/zero");
conf.set("dfs.name.edits.dir.shared1", journalURI + "/one");
// set the image dir
conf.set("dfs.name.dir.shared0", journalURI + "/zero");
conf.set("dfs.name.dir.shared1", journalURI + "/one");
conf.setBoolean("dfs.force.remote.image", true);
} else {
conf.set("dfs.name.edits.dir.shared0", fsedits0Dir);
conf.set("dfs.name.edits.dir.shared1", fsedits1Dir);
conf.set("dfs.name.dir.shared0", fsimage0Dir);
conf.set("dfs.name.dir.shared1", fsimage1Dir);
}
conf.setInt("dfs.safemode.extension", 1000);
// These two ipc parameters help RPC connections to shut down quickly in
// unit tests.
conf.setInt("ipc.client.connect.max.retries", 3);
conf.setInt("ipc.client.connect.timeout", 2000);
// We need to disable the filesystem cache so that unit tests and
// MiniAvatarCluster don't end up sharing FileSystem objects.
if (federation) {
for (String key: AvatarNode.AVATARSERVICE_SPECIFIC_KEYS) {
String value = conf.get(key);
if (value != null) {
String newKey = DFSUtil.getNameServiceIdKey(key, nameserviceId);
conf.set(newKey, value);
conf.set(key, "");
}
}
String rpcKey = DFSUtil.getNameServiceIdKey(
AvatarNode.DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId);
conf.set(rpcKey, "localhost:" + rpcPort);
String dnKey = DFSUtil.getNameServiceIdKey(
NameNode.DATANODE_PROTOCOL_ADDRESS, nameserviceId);
conf.set(dnKey, "localhost:" + nnDnPort);
String httpKey = DFSUtil.getNameServiceIdKey(
NameNode.DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId);
conf.set(httpKey, "localhost:" + httpPort);
}
}
public void updateAvatarConf(Configuration newConf) {
conf = new Configuration(newConf);
if (federation) {
conf.set(FSConstants.DFS_FEDERATION_NAMESERVICE_ID, nameserviceId);
}
// server config for avatar nodes
a0Conf = new Configuration(conf);
a1Conf = new Configuration(conf);
a0Conf.set("dfs.name.dir", fsimagelocalDir);
a0Conf.set("dfs.name.edits.dir", fseditslocalDir);
a0Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint0");
a1Conf.set("dfs.name.dir", fsimagelocalDir);
a1Conf.set("dfs.name.edits.dir", fseditslocalDir);
a1Conf.set("fs.checkpoint.dir", avatarDir + "/checkpoint1");
}
public void createAvatarDirs() {
new File(fsimagelocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "zero")).mkdirs();
new File(fsimagelocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "one")).mkdirs();
new File(fsimage0Dir).mkdirs();
new File(fsimage1Dir).mkdirs();
new File(fseditslocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "zero")).mkdirs();
new File(fseditslocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "one")).mkdirs();
new File(fsedits0Dir).mkdirs();
new File(fsedits1Dir).mkdirs();
}
public void cleanupAvatarDirs() throws IOException {
String[] files = new String[] {
fsimagelocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "zero"),
fsimagelocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "one"),
fsimage0Dir,
fsimage1Dir,
fseditslocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "zero"),
fseditslocalDir.replaceAll(
FSConstants.DFS_NAMENODE_NAME_DIR_WILDCARD, "one"),
fsedits0Dir, fsedits1Dir
};
for (String filename : files) {
FileUtil.fullyDelete(new File(filename));
}
}
public String getNameserviceId() {
return nameserviceId;
}
}
private static ZooKeeperServer zooKeeper;
private static NIOServerCnxnFactory cnxnFactory;
private ArrayList<DataNodeProperties> dataNodes =
new ArrayList<DataNodeProperties>();
static {
DataNode.setSecureRandom(new Random());
}
public static class Builder {
private Configuration conf;
private int numDataNodes = 1;
private boolean format = true;
private String[] racks = null;
private String[] hosts = null;
private int numNameNodes = 1;
private boolean federation = false;
private long[] simulatedCapacities = null;
private int numJournalNodes = 3;
private boolean enableQJM = true;
private MiniJournalCluster journalCluster = null;
private StartupOption startOpt = null;
private int instantiationRetries = 15;
public Builder(Configuration conf) {
this.conf = conf;
}
public Builder startOpt(StartupOption startOpt) {
this.startOpt = startOpt;
return this;
}
public Builder instantionRetries(int instantionRetries) {
this.instantiationRetries = instantionRetries;
return this;
}
public Builder numDataNodes(int numDataNodes) {
this.numDataNodes = numDataNodes;
return this;
}
public Builder format(boolean format) {
this.format = format;
return this;
}
public Builder racks(String[] racks) {
this.racks = racks;
return this;
}
public Builder hosts(String[] hosts) {
this.hosts = hosts;
return this;
}
public Builder numNameNodes(int numNameNodes) {
this.numNameNodes = numNameNodes;
return this;
}
public Builder federation(boolean federation) {
this.federation = federation;
return this;
}
public Builder simulatedCapacities(long[] simulatedCapacities) {
this.simulatedCapacities = simulatedCapacities;
return this;
}
public Builder numJournalNodes(int numJournalNodes) {
this.numJournalNodes = numJournalNodes;
return this;
}
public Builder enableQJM(boolean enableQJM) {
this.enableQJM = enableQJM;
return this;
}
public Builder setJournalCluster(MiniJournalCluster journalCluster) {
this.journalCluster = journalCluster;
this.enableQJM = true;
return this;
}
public MiniAvatarCluster build()
throws IOException, ConfigException, InterruptedException {
return new MiniAvatarCluster(this);
}
}
public MiniAvatarCluster(Configuration conf,
int numDataNodes,
boolean format,
String[] racks,
String[] hosts)
throws IOException, ConfigException, InterruptedException {
this(new Builder(conf).numDataNodes(numDataNodes).format(format)
.racks(racks).hosts(hosts));
}
public MiniAvatarCluster(Configuration conf,
int numDataNodes,
boolean format,
String[] racks,
String[] hosts,
int numNameNodes,
boolean federation)
throws IOException, ConfigException, InterruptedException {
this(new Builder(conf).numDataNodes(numDataNodes).format(format)
.racks(racks)
.hosts(hosts)
.numNameNodes(numNameNodes)
.federation(federation));
}
/**
* Modify the config and start up the servers. The rpc and info ports for
* servers are guaranteed to use free ports.
* <p>
* NameNode and DataNode directory creation and configuration will be
* managed by this class.
*
* @param conf the base configuration to use in starting the servers. This
* will be modified as necessary.
* @param numDataNodes Number of DataNodes to start; may be zero
* @param format if true, format the NameNode and DataNodes before starting up
* @param racks array of strings indicating the rack that each DataNode is on
* @param hosts array of strings indicating the hostname of each DataNode
* @param numNameNodes Number of NameNodes to start;
* @param federation if true, we start it with federation configure;
*/
public MiniAvatarCluster(Configuration conf,
int numDataNodes,
boolean format,
String[] racks,
String[] hosts,
int numNameNodes,
boolean federation,
long[] simulatedCapacities)
throws IOException, ConfigException, InterruptedException {
this(new Builder(conf).numDataNodes(numDataNodes).format(format)
.racks(racks)
.hosts(hosts)
.numNameNodes(numNameNodes)
.federation(federation)
.simulatedCapacities(simulatedCapacities));
}
public MiniAvatarCluster(Builder b)
throws IOException, ConfigException, InterruptedException {
Standby.CHECKPOINT_SLEEP_BEFORE_RETRY = 100;
this.conf = b.conf;
final String testDir = TEST_DIR + "/" + conf.get(MiniDFSCluster.DFS_CLUSTER_ID, "");
baseAvatarDir = testDir + "/avatar";
dataDir = testDir + "/data";
this.instantiationRetries = b.instantiationRetries;
this.numDataNodes = b.numDataNodes;
this.format = b.format;
this.racks = b.racks;
this.hosts = b.hosts;
this.numJournalNodes = b.numJournalNodes;
this.enableQJM = b.enableQJM;
this.startOpt = b.startOpt;
this.journalCluster = b.journalCluster;
int clusterId = ClusterId.getAndIncrement();
conf.setInt(FSConstants.DFS_CLUSTER_ID, clusterId);
conf.set(FSConstants.DFS_CLUSTER_NAME, "MiniAvatarCluster-" + clusterId);
conf.setInt("dfs.secondary.info.port", 0);
conf.set("fs.ha.zookeeper.prefix", "/hdfs");
conf.set("fs.ha.zookeeper.quorum", "localhost:" + zkClientPort);
conf.setInt("fs.ha.zookeeper.connect.timeout", 30000);
conf.setInt("fs.ha.zookeeper.timeout", 30000);
// datanodes
conf.setInt("dfs.datanode.fullblockreport.delay", 1000);
conf.setInt("dfs.datanode.blockreceived.retry.internval", 1000);
conf.set(FSConstants.DFS_DATANODE_ADDRESS_KEY, "localhost:0");
conf.set("dfs.datanode.http.address", "localhost:0");
conf.set("dfs.datanode.ipc.address", "localhost:0");
String loopBack = getLoopBackInterface();
LOG.info("LoopBack interface is : " + loopBack);
conf.set(FSConstants.DFS_DATANODE_DNS_INTERFACE, loopBack);
conf.set(FSConstants.DFS_NAMENODE_DNS_INTERFACE, loopBack);
// other settings
conf.setBoolean("dfs.permissions", false);
conf.setBoolean("dfs.persist.blocks", true);
conf.set("fs.hdfs.impl",
"org.apache.hadoop.hdfs.DistributedAvatarFileSystem");
conf.setLong("dfs.blockreport.initialDelay", 0);
conf.setClass("topology.node.switch.mapping.impl",
StaticMapping.class, DNSToSwitchMapping.class);
if (conf.get("dfs.ingest.retries") == null) {
conf.setInt("dfs.ingest.retries", 2);
}
conf.setLong("rpc.polling.interval", 10);
conf.setLong("lease.check.interval", 10);
conf.set("dfs.secondary.http.address", "0.0.0.0:0");
// enable checkpoint by default
if(conf.get("fs.checkpoint.enabled") == null) {
conf.setBoolean("fs.checkpoint.enabled", true);
}
//http image download timeout - 5s
if(conf.get("dfs.image.transfer.timeout") == null) {
conf.setInt("dfs.image.transfer.timeout", 5 * 1000);
}
// make the standby actions (e.g., checkpoint trigger) quicker
conf.setInt("hdfs.avatarnode.sleep", 1000);
// disable standby backup limits
conf.setInt(NNStorageDirectoryRetentionManager.NN_IMAGE_DAYS_TOKEEP, 0);
conf.setInt(NNStorageDirectoryRetentionManager.NN_IMAGE_COPIES_TOKEEP, 0);
// start the JournalCluster.
if (this.enableQJM) {
startJournalCluster();
}
this.federation = b.federation;
Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
if(nameserviceIds.size() > 1)
this.federation = true;
if (!federation && b.numNameNodes != 1) {
throw new IOException("Only 1 namenode is allowed in non-federation cluster.");
}
nameNodes = new NameNodeInfo[b.numNameNodes];
for (int nnIndex = 0; nnIndex < b.numNameNodes; nnIndex++) {
nameNodes[nnIndex] = new NameNodeInfo(nnIndex);
if (format)
nameNodes[nnIndex].cleanupAvatarDirs();
nameNodes[nnIndex].createAvatarDirs();
}
if (!federation) {
nameNodes[0].initGeneralConf(conf, null);
} else {
if (nameserviceIds.isEmpty()) {
for (int i = 0; i < nameNodes.length; i++) {
nameserviceIds.add(NAMESERVICE_ID_PREFIX + getNSId());
}
}
initFederationConf(conf, nameserviceIds);
}
if (this.format) {
File data_dir = new File(dataDir);
if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
throw new IOException("Cannot remove data directory: " + data_dir);
}
}
// Need to start datanodes before avatarnodes, since the primary starts up
// in safemode and when the standby starts up, it waits for the primary to
// exit safemode. So if we start avatarnodes first with non-empty FSImage
// and FSEdits, the primary avatar would wait for datanode block reports and
// the standby would wait for the primary to exit safemode and since we
// wouldn't return from the standby initialization we would never start the
// datanodes and hence we enter a deadlock.
registerZooKeeperNodes();
startDataNodes(b.simulatedCapacities);
startAvatarNodes();
waitAvatarNodesActive();
waitDataNodesActive();
waitExitSafeMode();
waitForTheFirstCheckpoint();
}
/**
* Retrieves the name of the loopback interface in a platform independent way.
*/
private static String getLoopBackInterface() throws IOException {
String loopBack = "lo";
Enumeration<NetworkInterface> ifaces = NetworkInterface
.getNetworkInterfaces();
while (ifaces.hasMoreElements()) {
NetworkInterface iface = ifaces.nextElement();
if (iface.isLoopback()) {
loopBack = iface.getName();
break;
}
}
return loopBack;
}
private void startJournalCluster() throws IOException {
if (journalCluster == null) {
this.journalCluster = new MiniJournalCluster.Builder(conf)
.numJournalNodes(numJournalNodes).build();
}
}
private void initFederationConf(Configuration conf,
Collection<String> nameserviceIds) {
String nameserviceIdList = "";
int nnIndex = 0;
for (String nameserviceId : nameserviceIds) {
// Create comma separated list of nameserviceIds
if (nameserviceIdList.length() > 0) {
nameserviceIdList += ",";
}
nameserviceIdList += nameserviceId;
nameNodes[nnIndex].initGeneralConf(conf, nameserviceId);
nnIndex++;
}
conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIdList);
}
private static ServerConfig createZooKeeperConf()
throws IOException, ConfigException {
// create conf file
File zkConfDir = new File(TEST_DIR);
zkConfDir.mkdirs();
File zkConfFile = new File(ZK_CONF_FILE);
zkConfFile.delete();
zkConfFile.createNewFile();
Properties zkConfProps = new Properties();
zkConfProps.setProperty("tickTime", "2000");
zkConfProps.setProperty("dataDir", ZK_DATA_DIR);
zkConfProps.setProperty("clientPort", new Integer(zkClientPort).toString());
zkConfProps.setProperty("maxClientCnxns", "500");
zkConfProps.store(new FileOutputStream(zkConfFile), "");
// create config object
ServerConfig zkConf = new ServerConfig();
zkConf.parse(ZK_CONF_FILE);
return zkConf;
}
private static ServerConfig getZooKeeperConf() throws Exception {
if (new File(ZK_CONF_FILE).exists()) {
ServerConfig zkConf = new ServerConfig();
zkConf.parse(ZK_CONF_FILE);
return zkConf;
} else {
return createZooKeeperConf();
}
}
public static boolean clearZooKeeperData() throws Exception {
ServerConfig zkConf = getZooKeeperConf();
File dataLogDir = new File(zkConf.getDataLogDir());
File dataDir = new File(zkConf.getDataDir());
return (FileUtil.fullyDelete(dataLogDir) && FileUtil.fullyDelete(dataDir));
}
public static void createAndStartZooKeeper()
throws IOException, ConfigException, InterruptedException {
logStateChange("Creating zookeeper server");
AvatarShell.retrySleep = 1000;
ServerConfig zkConf = createZooKeeperConf();
zooKeeper = new ZooKeeperServer();
FileTxnSnapLog ftxn = new
FileTxnSnapLog(new File(zkConf.getDataLogDir()),
new File(zkConf.getDataDir()));
zooKeeper.setTxnLogFactory(ftxn);
zooKeeper.setTickTime(zkConf.getTickTime());
zooKeeper.setMinSessionTimeout(zkConf.getMinSessionTimeout());
zooKeeper.setMaxSessionTimeout(zkConf.getMaxSessionTimeout());
cnxnFactory = new NIOServerCnxnFactory();
cnxnFactory.configure(zkConf.getClientPortAddress(),
zkConf.getMaxClientCnxns());
cnxnFactory.startup(zooKeeper);
logStateChange("Creating zookeeper server - completed");
}
private void registerZooKeeperNode(int nnPrimaryPort, int nnDnPrimaryPort,
int httpPrimaryPort, int rpcPrimaryPort, NameNodeInfo nni) throws IOException {
int retries = 5;
for(int i =0; i<retries; i++) {
try {
AvatarZooKeeperClient zkClient =
new AvatarZooKeeperClient(nni.conf, null, false);
zkClient.registerPrimary("localhost:" + nni.nnPort,
"127.0.0.1:" + nnPrimaryPort, true);
zkClient.registerPrimary("localhost:" + nni.nnDnPort,
"127.0.0.1:" + nnDnPrimaryPort, true);
zkClient.registerPrimary("localhost:" + nni.httpPort,
"127.0.0.1:" + httpPrimaryPort, true);
zkClient.registerPrimary("localhost:" + nni.rpcPort,
"127.0.0.1:" + rpcPrimaryPort, true);
try {
zkClient.shutdown();
} catch (InterruptedException ie) {
throw new IOException("zkClient.shutdown() interrupted");
}
LOG.info("Closed zk client connection for registerZookeeper");
return;
} catch (IOException e) {
LOG.info("Got exception when registering to zk, retrying", e);
sleep(1000);
}
}
throw new IOException("Cannot talk to ZK.");
}
public void clearZooKeeperNode(int nnIndex) throws IOException {
int retries = 5;
for(int i =0; i<retries; i++) {
try {
NameNodeInfo nni = this.nameNodes[nnIndex];
AvatarZooKeeperClient zkClient =
new AvatarZooKeeperClient(nni.conf, null, false);
zkClient.clearPrimary("localhost:" + nni.httpPort);
zkClient.clearPrimary("localhost:" + nni.nnPort);
zkClient.clearPrimary("localhost:" + nni.nnDnPort);
zkClient.clearPrimary("localhost:" + nni.rpcPort);
try {
zkClient.shutdown();
} catch (InterruptedException ie) {
throw new IOException("zkClient.shutdown() interrupted");
}
LOG.info("Closed zk client connection for clearZKNode");
return;
} catch (IOException e) {
LOG.info("Got exception when clearing zk, retrying", e);
sleep(1000);
}
}
throw new IOException("Cannot talk to ZK.");
}
static Configuration getServerConf(String startupOption,
NameNodeInfo nni) {
// namenode should use DFS, not DAFS
if (startupOption.
equals(AvatarConstants.StartupOption.NODEZERO.getName())) {
return new Configuration(nni.a0Conf);
} else if (startupOption.
equals(AvatarConstants.StartupOption.NODEONE.getName())) {
return new Configuration(nni.a1Conf);
} else {
throw new IllegalArgumentException("invalid avatar");
}
}
public void registerZooKeeperNodes() throws IOException {
for (NameNodeInfo nni : this.nameNodes) {
nni.updateAvatarConf(this.conf);
registerZooKeeperNode(nni.nn0Port, nni.nnDn0Port, nni.http0Port,
nni.rpc0Port, nni);
}
}
private void startAvatarNodes() throws IOException {
for (NameNodeInfo nni: this.nameNodes) {
nni.updateAvatarConf(this.conf);
startAvatarNode(nni, startOpt);
}
}
private void startAvatarNode(NameNodeInfo nni, StartupOption operation) throws IOException {
registerZooKeeperNode(nni.nn0Port, nni.nnDn0Port, nni.http0Port,
nni.rpc0Port, nni);
if (format) {
LOG.info("formatting");
// Start the NameNode
String[] a0FormatArgs;
ArrayList<String> argList = new ArrayList<String>();
argList.add(AvatarConstants.StartupOption.
NODEZERO.getName());
argList.add(AvatarConstants.StartupOption.
FORMATFORCE.getName());
if (federation) {
argList.add(StartupOption.SERVICE.getName());
argList.add(nni.nameserviceId);
}
a0FormatArgs = new String[argList.size()];
argList.toArray(a0FormatArgs);
instantiateAvatarNode(a0FormatArgs,
getServerConf(AvatarConstants.StartupOption.
NODEZERO.getName(), nni));
}
ArrayList<AvatarInfo> avatars = new ArrayList<AvatarInfo>(2);
{
LOG.info("starting avatar 0");
String[] a0Args;
ArrayList<String> argList = new ArrayList<String>();
if (operation != null) {
argList.add(operation.getName());
}
argList.add(AvatarConstants.StartupOption.NODEZERO.getName());
if (federation) {
argList.add(StartupOption.SERVICE.getName());
argList.add(nni.nameserviceId);
}
a0Args = new String[argList.size()];
argList.toArray(a0Args);
AvatarNode a0 = instantiateAvatarNode(a0Args,
getServerConf(AvatarConstants.
StartupOption.
NODEZERO.
getName(), nni));
avatars.add(new AvatarInfo(a0,
AvatarState.ACTIVE,
nni.nn0Port, nni.nnDn0Port, nni.http0Port, nni.rpc0Port,
AvatarConstants.StartupOption.NODEZERO.
getName()));
// wait for up to 10 seconds until the ACTIVE is initialized
for (int i = 0; i < 10; i++) {
if (a0.isInitDone())
break;
LOG.info("Waiting for the ACTIVE to be initialized...");
sleep(1000);
}
if (!a0.isInitDone()) {
throw new IOException("The ACTIVE cannot be initialized");
}
}
{
LOG.info("starting avatar 1");
String[] a1Args;
ArrayList<String> argList = new ArrayList<String>();
argList.add(AvatarConstants.StartupOption.NODEONE.getName());
argList.add(AvatarConstants.StartupOption.STANDBY.getName());
argList.add(AvatarConstants.StartupOption.REGULAR.getName());
if (federation) {
argList.add(StartupOption.SERVICE.getName());
argList.add(nni.nameserviceId);
}
a1Args = new String[argList.size()];
argList.toArray(a1Args);
avatars.add(new AvatarInfo(
instantiateAvatarNode(a1Args,
getServerConf(AvatarConstants.
StartupOption.
NODEONE.
getName(), nni)),
AvatarState.STANDBY,
nni.nn1Port, nni.nnDn1Port, nni.http1Port, nni.rpc1Port,
AvatarConstants.StartupOption.NODEONE.
getName()));
}
for (AvatarInfo avatar: avatars) {
if (avatar.avatar == null) {
throw new IOException("Cannot create avatar nodes");
}
Assert.assertTrue(
avatar.avatar.getConf().getBoolean("dfs.persist.blocks", false));
}
nni.setAvatarNodes(avatars);
DFSUtil.setGenericConf(nni.conf, nni.nameserviceId,
AvatarNode.AVATARSERVICE_SPECIFIC_KEYS);
nni.updateAvatarConf(nni.conf);
}
public void restartAvatarNodes() throws Exception {
logStateChange("Restarting avatar nodes");
shutDownAvatarNodes();
for (NameNodeInfo nni : this.nameNodes) {
nni.avatars.clear();
}
this.format = false;
startAvatarNodes();
waitAvatarNodesActive();
waitDataNodesActive();
waitExitSafeMode();
logStateChange("Restarting avatar nodes - completed");
}
/*
* Adds all datanodes to shutdown list
*/
private void processDatanodesForShutdown(Collection<Thread> threads) {
for (int i = 0; i < dataNodes.size(); i++) {
LOG.info("Shutting down data node " + i);
Thread st = new Thread(new ShutDownUtil(dataNodes.get(i)));
st.start();
threads.add(st);
}
}
/*
* Adds all namenodes to shutdown list
*/
private void processNamenodesForShutdown(Collection<Thread> threads) {
for (NameNodeInfo nni : this.nameNodes) {
for (AvatarInfo avatar: nni.avatars) {
if (avatar.state == AvatarState.ACTIVE ||
avatar.state == AvatarState.STANDBY) {
LOG.info("Shutting down Avatar " + avatar.state);
Thread st = new Thread(new ShutDownUtil(avatar));
st.start();
threads.add(st);
}
}
}
}
public void shutDownDataNode(int i) throws IOException, InterruptedException {
logStateChange("Shutting down datanode: " + i);
dataNodes.get(i).datanode.shutdown();
logStateChange("Shutting down datanode: " + i + " - completed");
}
public void shutDownDataNodes() throws IOException, InterruptedException {
logStateChange("Shutting down avatar datanodes");
List<Thread> threads = new ArrayList<Thread>();
processDatanodesForShutdown(threads);
MiniDFSCluster.joinThreads(threads);
logStateChange("Shutting down avatar datanodes - completed");
}
private void shutDownJournalCluster() throws IOException {
if (journalCluster != null) {
journalCluster.shutdown();
}
}
public void shutDownAvatarNodes() throws IOException, InterruptedException {
logStateChange("Shutting down avatar nodes");
List<Thread> threads = new ArrayList<Thread>();
processNamenodesForShutdown(threads);
MiniDFSCluster.joinThreads(threads);
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
// do nothing
}
logStateChange("Shutting down avatar nodes - completed");
}
public static void shutDownZooKeeper() throws IOException, InterruptedException {
logStateChange("Shutting down zookeeper server");
cnxnFactory.shutdown();
cnxnFactory.join();
LOG.info("Zookeeper Connection Factory shutdown");
if (zooKeeper.isRunning()) {
zooKeeper.shutdown();
}
logStateChange("Shutting down zookeeper server - completed");
}
/**
* Shut down the cluster
*/
public void shutDown() throws IOException, InterruptedException {
logStateChange("Shutting down Mini Avatar Cluster");
List<Thread> threads = new ArrayList<Thread>();
// add all datanodes to be shutdown
processDatanodesForShutdown(threads);
// add all namenodes to be shutdown
processNamenodesForShutdown(threads);
MiniDFSCluster.joinThreads(threads);
shutDownJournalCluster();
logStateChange("Shutting down Mini Avatar Cluster - completed");
}
private void startDataNodes(long[] simulatedCapacities) throws IOException {
startDataNodes(simulatedCapacities, numDataNodes, hosts, racks, conf);
}
private void startDataNodes() throws IOException {
startDataNodes(numDataNodes, racks, hosts, conf);
}
public void startDataNodes(int numDataNodes, String[] racks, String[] hosts,
Configuration conf) throws IOException {
startDataNodes(null, numDataNodes, racks, hosts, conf);
}
public void startDataNodes(long[] simulatedCapacities, int numDataNodes,
String[] racks, String[] hosts, Configuration conf) throws IOException {
int curDn = dataNodes.size();
if (racks != null && numDataNodes > racks.length ) {
throw new IllegalArgumentException( "The length of racks [" +
racks.length +
"] is less than the number " +
"of datanodes [" +
numDataNodes + "].");
}
if (hosts != null && numDataNodes > hosts.length ) {
throw new IllegalArgumentException( "The length of hosts [" +
hosts.length +
"] is less than the number " +
"of datanodes [" +
numDataNodes + "].");
}
//Generate some hostnames if required
if (racks != null && hosts == null) {
LOG.info("Generating host names for datanodes");
hosts = new String[numDataNodes];
for (int i = 0; i < numDataNodes; i++) {
hosts[i] = "host" + (curDn + i) + ".foo.com";
}
}
ArrayList<Thread> threads = new ArrayList<Thread>();
for (int i = 0; i < numDataNodes; i++) {
Thread st = new Thread(new StartDatanodeUtil(i, curDn, simulatedCapacities));
st.start();
threads.add(st);
}
if(!MiniDFSCluster.joinThreads(threads)){
throw new IOException("Failed to startup the nodes");
}
this.numDataNodes = dataNodes.size();
}
class StartDatanodeUtil implements Runnable {
private int i;
private int curDn;
private long[] simulatedCapacities;
StartDatanodeUtil(int node, int curDn, long[] simulatedCapacities) {
this.i = node;
this.curDn = curDn;
this.simulatedCapacities = simulatedCapacities;
}
@Override
public void run() {
try {
String dnArg = StartupOption.REGULAR.getName();
if (startOpt != null && startOpt == StartupOption.ROLLBACK) {
dnArg = startOpt.getName();
}
String[] dnArgs = { dnArg };
int iN = curDn + i;
Configuration dnConf = new Configuration(conf);
if (simulatedCapacities != null) {
dnConf.setBoolean("dfs.datanode.simulateddatastorage", true);
dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY,
simulatedCapacities[i]);
}
File dir1 = new File(dataDir, "data" + (2 * iN + 1));
File dir2 = new File(dataDir, "data" + (2 * iN + 2));
dir1.mkdirs();
dir2.mkdirs();
if (!dir1.isDirectory() || !dir2.isDirectory()) {
throw new IOException(
"Mkdirs failed to create directory for DataNode " + iN + ": "
+ dir1 + " or " + dir2);
}
dnConf.set("dfs.data.dir", dir1.getPath() + "," + dir2.getPath());
LOG.info("Starting DataNode " + iN + " with dfs.data.dir: "
+ dnConf.get("dfs.data.dir"));
if (hosts != null) {
dnConf.set(FSConstants.SLAVE_HOST_NAME, hosts[i]);
LOG.info("Starting DataNode " + iN + " with hostname set to: "
+ dnConf.get(FSConstants.SLAVE_HOST_NAME));
}
if (racks != null) {
String name = hosts[i];
LOG.info("Adding node with hostname : " + name + " to rack "
+ racks[i]);
StaticMapping.addNodeToRack(name, racks[i]);
}
Configuration newconf = new Configuration(dnConf); // save config
AvatarDataNode dn = instantiateDataNode(dnArgs, dnConf);
// since the HDFS does things based on IP:port, we need to add the
// mapping
// for IP:port to rackId
String ipAddr = dn.getSelfAddr().getAddress().getHostAddress();
if (racks != null) {
int port = dn.getSelfAddr().getPort();
System.out.println("Adding node with IP:port : " + ipAddr + ":"
+ port + " to rack " + racks[i]);
StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i]);
}
dn.runDatanodeDaemon();
synchronized (dataNodes) {
dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs));
}
} catch (IOException e) {
LOG.error("Exception when creating datanode", e);
}
}
}
public void waitAvatarNodesActive() {
for (int nnIndex = 0; nnIndex < this.nameNodes.length; nnIndex++) {
waitAvatarNodesActive(nnIndex);
}
}
public void waitAvatarNodesActive(int nnIndex) {
NameNodeInfo nni = this.nameNodes[nnIndex];
for (AvatarInfo avatar: nni.avatars) {
while (avatar.avatar.getNameNodeDNAddress() == null) {
try {
logStateChange("Waiting for avatar");
Thread.sleep(200);
} catch (InterruptedException ignore) {
// do nothing
}
}
}
}
/* wait Datanodes active for all namespaces */
public void waitDataNodesActive() throws IOException {
if(conf.getBoolean("fs.datanodes.wait", true)) {
for (int nnIndex = 0; nnIndex < this.nameNodes.length; nnIndex++) {
waitDataNodesActive(nnIndex);
}
} else {
LOG.info("Will not wait for datanodes");
}
}
/* wait Datanodes active for specific namespaces */
public void waitDataNodesActive(int nnIndex) throws IOException {
DistributedAvatarFileSystem dafs = null;
logStateChange("Waiting for data nodes");
int liveDataNodes = 0;
// make sure all datanodes are alive
while(liveDataNodes != numDataNodes) {
try {
dafs = getFileSystem(nnIndex);
Thread.sleep(200);
liveDataNodes = dafs.getLiveDataNodeStats(false).length;
logStateChange("Waiting for data nodes : live=" + liveDataNodes + ", total=" + numDataNodes);
} catch (Exception e) {
LOG.warn("Exception waiting for datanodes : ", e);
} finally {
if (dafs != null) {
dafs.close();
}
}
}
logStateChange("Waiting for data nodes - completed");
}
private void checkSingleNameNode() {
if (nameNodes.length != 1) {
throw new IllegalArgumentException("It's not a single namenode cluster, use index instead.");
}
}
public AvatarInfo getPrimaryAvatar(int nnIndex) {
return getAvatarByState(nnIndex, AvatarState.ACTIVE);
}
public AvatarInfo getStandbyAvatar(int nnIndex) {
return getAvatarByState(nnIndex, AvatarState.STANDBY);
}
private AvatarInfo getDeadAvatar(int nnIndex) {
return getAvatarByState(nnIndex, AvatarState.DEAD);
}
private AvatarInfo getAvatarByState(int nnIndex, AvatarState state) {
for (AvatarInfo avatar: this.nameNodes[nnIndex].avatars) {
if (avatar.state == state) {
return avatar;
}
}
return null;
}
/**
* Wait until the primary avatars have been checkpointed
*/
private void waitForTheFirstCheckpoint() {
if((!conf.getBoolean("fs.checkpoint.wait", true)) ||
(!conf.getBoolean("fs.checkpoint.enabled", true))) {
logStateChange("Waiting for checkpoint is disabled");
return;
}
logStateChange("Waiting for first checkpoint");
// wait for the first checkpoint to happen, as we
// assert txids which depend on the checkpoints
for (int nnIndex=0; nnIndex < this.nameNodes.length; nnIndex++) {
while(!isCheckpointed(nnIndex)) {
try {
logStateChange("Waiting until avatar0 has been checkpointed");
Thread.sleep(50);
} catch (InterruptedException ignore) {
// do nothing
}
}
}
logStateChange("Waiting for first checkpoint - completed");
}
/**
* Return if the primary avatar has been checkpointed.
*/
private boolean isCheckpointed(int nnIndex) {
AvatarInfo primary = getPrimaryAvatar(nnIndex);
return (primary != null && primary.avatar.getFSImage().getLastCheckpointTxId() > -1);
}
/**
* Return true if primary avatar has left safe mode
*/
private boolean hasLeftSafeMode(int nnIndex) throws IOException {
AvatarInfo primary = getPrimaryAvatar(nnIndex);
return (primary != null && !primary.avatar.isInSafeMode() &&
(this.numDataNodes==0 || primary.avatar.getStats()[0] != 0)) ;
}
private void waitExitSafeMode() throws IOException {
for (int nnIndex=0; nnIndex < this.nameNodes.length; nnIndex++) {
// make sure all datanodes are alive
while(!hasLeftSafeMode(nnIndex)) {
try {
logStateChange("Waiting until avatar0 has left safe mode");
Thread.sleep(50);
} catch (InterruptedException ignore) {
// do nothing
}
}
}
}
public DistributedAvatarFileSystem getFileSystem()
throws IOException {
checkSingleNameNode();
return getFileSystem(0);
}
/**
* Get DAFS.
*/
public DistributedAvatarFileSystem getFileSystem(int nnIndex)
throws IOException {
FileSystem fs = FileSystem
.get(this.nameNodes[nnIndex].clientConf);
if (!(fs instanceof DistributedAvatarFileSystem)) {
throw new IOException("fs is not avatar fs");
}
return (DistributedAvatarFileSystem) fs;
}
/**
* Kill the primary avatar node.
* @param updateZK clear zookeeper?
*/
public void killPrimary() throws IOException {
checkSingleNameNode();
killPrimary(0, true);
}
public void killPrimary(int nnIndex) throws IOException {
killPrimary(nnIndex, true);
}
public void killPrimary(boolean clearZK) throws IOException {
checkSingleNameNode();
killPrimary(0, clearZK);
}
/**
* Kill the primary avatar node.
* @param clearZK clear zookeeper?
*/
public void killPrimary(int nnIndex, boolean clearZK) throws IOException {
logStateChange("Killing primary avatar: " + nnIndex);
AvatarInfo primary = getPrimaryAvatar(nnIndex);
if (primary != null) {
if (clearZK) {
clearZooKeeperNode(nnIndex);
}
primary.avatar.shutdown(true);
primary.avatar = null;
primary.state = AvatarState.DEAD;
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
// do nothing
}
logStateChange("Killing primary avatar: " + nnIndex + " - completed");
} else {
throw new IOException("can't kill primary avatar, already dead");
}
}
public void killStandby() throws IOException {
checkSingleNameNode();
killStandby(0);
}
/**
* Kill the standby avatar node.
*/
public void killStandby(int nnIndex) throws IOException {
logStateChange("Killing standby avatar: " + nnIndex);
AvatarInfo standby = getStandbyAvatar(nnIndex);
if (standby != null) {
standby.avatar.shutdown(true);
standby.avatar = null;
standby.state = AvatarState.DEAD;
try {
Thread.sleep(1000);
} catch (InterruptedException ignore) {
// do nothing
}
logStateChange("Killing standby avatar: " + nnIndex + " - completed");
} else {
logStateChange("Can't kill standby avatar, already dead");
}
}
public void failOver() throws IOException {
failOver(false);
}
public void failOver(boolean force) throws IOException {
checkSingleNameNode();
failOver(0, force);
}
/**
* Make standby avatar the new primary avatar. Kill the old
* primary avatar first if necessary.
*/
public void failOver(int nnIndex) throws IOException {
failOver(nnIndex, false);
}
public void failOver(int nnIndex, boolean force) throws IOException {
logStateChange("Failover avatar: " + nnIndex);
if (getPrimaryAvatar(nnIndex) != null) {
LOG.info("killing primary avatar before failover");
killPrimary(nnIndex);
}
AvatarInfo standby = getStandbyAvatar(nnIndex);
if (standby == null) {
throw new IOException("no standby avatar running");
}
standby.avatar.quiesceForFailover(force);
// Introduce a synthetic delay since this is what will happen in practice.
// There will be some delay between both calls and this is to make sure
// there are no locking issues since this was earlier one RPC under a single
// lock and now its two RPCs which take the lock twice.
DFSTestUtil.waitNSecond(5);
standby.avatar.performFailover();
standby.state = AvatarState.ACTIVE;
registerZooKeeperNode(standby.nnPort, standby.nnDnPort, standby.httpPort,
standby.rpcPort, this.nameNodes[nnIndex]);
logStateChange("Failover avatar: " + nnIndex + " : completed");
}
public void restartStandby() throws IOException {
checkSingleNameNode();
restartStandby(0);
}
/**
* Restart a dead avatar node as a standby avatar.
*/
public void restartStandby(int nnIndex) throws IOException {
AvatarInfo dead = getDeadAvatar(nnIndex);
if (getPrimaryAvatar(nnIndex) == null || dead == null) {
throw new IOException("cannot start standby avatar: " +
"primary or dead avatar not found");
}
logStateChange("Restarting " + dead.startupOption + " as standby");
NameNodeInfo nni = this.nameNodes[nnIndex];
String[] args;
ArrayList<String> argList = new ArrayList<String>();
argList.add(dead.startupOption);
argList.add(AvatarConstants.StartupOption.STANDBY.getName());
argList.add(AvatarConstants.StartupOption.REGULAR.getName());
if (federation) {
argList.add(StartupOption.SERVICE.getName());
argList.add(nni.nameserviceId);
}
args = new String[argList.size()];
argList.toArray(args);
dead.avatar = instantiateAvatarNode(args, getServerConf(dead.startupOption, nni));
dead.state = AvatarState.STANDBY;
if (dead.avatar == null) {
throw new IOException("cannot start avatar node");
}
logStateChange("Restarting " + dead.startupOption + " as standby - completed");
}
/**
* return NameNodeInfo
*/
public NameNodeInfo getNameNode(int nnIndex) {
return this.nameNodes[nnIndex];
}
public ArrayList<DataNodeProperties> getDataNodeProperties() {
return dataNodes;
}
/**
* Gets a list of the started DataNodes. May be empty.
*/
public ArrayList<AvatarDataNode> getDataNodes() {
ArrayList<AvatarDataNode> list = new ArrayList<AvatarDataNode>();
for (int i = 0; i < dataNodes.size(); i++) {
AvatarDataNode node = dataNodes.get(i).datanode;
list.add(node);
}
return list;
}
/*
* return number of namenodes
*/
public int getNumNameNodes() {
return this.nameNodes.length;
}
/**
* Add a namenode to cluster and start it. Configuration of datanodes
* in the cluster is refreshed to register with the new namenode.
* @return newly started namenode
*/
public NameNodeInfo addNameNode(Configuration conf)
throws IOException {
if(!federation) {
throw new IOException("cannot add namenode to non-federated cluster");
}
int nnIndex = nameNodes.length;
int numNameNodes = nameNodes.length + 1;
NameNodeInfo[] newlist = new NameNodeInfo[numNameNodes];
System.arraycopy(nameNodes, 0, newlist, 0, nameNodes.length);
nameNodes = newlist;
nameNodes[nnIndex] = new NameNodeInfo(nnIndex);
NameNodeInfo nni = nameNodes[nnIndex];
nni.createAvatarDirs();
String nameserviceId = NAMESERVICE_ID_PREFIX + getNSId();
String nameserviceIds = conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES);
nameserviceIds += "," + nameserviceId;
nni.initGeneralConf(conf, nameserviceId);
conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds);
nni.updateAvatarConf(conf);
startAvatarNode(nni, null);
// Refresh datanodes with the newly started namenode
for (DataNodeProperties dn : dataNodes) {
DataNode datanode = dn.datanode;
datanode.refreshNamenodes(conf);
}
// Wait for new namenode to get registrations from all the datanodes
waitDataNodesActive(nnIndex);
return nni;
}
private void updateAvatarConfWithServiceId(Configuration dstConf, Configuration srcConf,
String nameserviceId) {
for (String key: AvatarNode.AVATARSERVICE_SPECIFIC_KEYS) {
String federationKey = DFSUtil.getNameServiceIdKey(
key, nameserviceId);
String value = srcConf.get(federationKey);
if (value != null) {
dstConf.set(federationKey, value);
}
}
for (String key: NameNode.NAMESERVICE_SPECIFIC_KEYS) {
String federationKey = DFSUtil.getNameServiceIdKey(
key, nameserviceId);
String value = srcConf.get(federationKey);
if (value != null) {
dstConf.set(federationKey, value);
}
}
}
/**
* Add another cluster to current cluster and start it. Configuration of datanodes
* in the cluster is refreshed to register with the new namenodes;
*/
public void addCluster(MiniAvatarCluster cluster, boolean format)
throws IOException, InterruptedException{
if(!federation || !cluster.federation) {
throw new IOException("Cannot handle non-federated cluster");
}
if (cluster.dataNodes.size() > this.dataNodes.size()) {
throw new IOException("Cannot merge: new cluster has more datanodes the old one.");
}
this.shutDown();
cluster.shutDown();
int nnIndex = nameNodes.length;
int numNameNodes = nameNodes.length + cluster.nameNodes.length;
NameNodeInfo[] newlist = new NameNodeInfo[numNameNodes];
System.arraycopy(nameNodes, 0, newlist, 0, nameNodes.length);
System.arraycopy(cluster.nameNodes, 0, newlist, nameNodes.length,
cluster.nameNodes.length);
nameNodes = newlist;
String newNameserviceIds = cluster.conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES);
String nameserviceIds = conf.get(FSConstants.DFS_FEDERATION_NAMESERVICES);
nameserviceIds += "," + newNameserviceIds;
this.format = format;
conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds);
int i;
for (i = 0; i < nameNodes.length; i++) {
NameNodeInfo nni = nameNodes[i];
String nameserviceId = nni.nameserviceId;
nni.initGeneralConf(nni.conf, nni.nameserviceId);
nni.updateAvatarConf(nni.conf);
for (int dnIndex = 0; dnIndex < dataNodes.size(); dnIndex++) {
Configuration dstConf = dataNodes.get(dnIndex).conf;
if (i >= nnIndex) {
String dataStr = cluster.dataNodes.get(dnIndex).conf.get("dfs.data.dir");
dstConf.set("dfs.merge.data.dir." + nameserviceId, dataStr);
}
updateAvatarConfWithServiceId(dstConf, nni.conf, nameserviceId);
}
}
for (DataNodeProperties dn : dataNodes) {
dn.conf.set(FSConstants.DFS_FEDERATION_NAMESERVICES, nameserviceIds);
dn.datanode = instantiateDataNode(dn.dnArgs, dn.conf);
dn.datanode.runDatanodeDaemon();
}
for (i = 0; i < nameNodes.length; i++) {
NameNodeInfo nni = nameNodes[i];
Thread.sleep(2000);
if (i < nnIndex) {
startAvatarNode(nni, StartupOption.UPGRADE);
} else {
startAvatarNode(nni, null);
}
}
waitAvatarNodesActive();
waitDataNodesActive();
waitExitSafeMode();
}
public synchronized boolean restartDataNodes() throws IOException,
InterruptedException {
return restartDataNodes(true);
}
public synchronized void restartDataNode(boolean waitActive, int index)
throws IOException, InterruptedException {
this.shutDownDataNode(index);
DataNodeProperties dn = dataNodes.get(index);
LOG.info("Restart Datanode " + index);
// Use the same port since dn is identified by host:port.
int port = dn.datanode.getSelfAddr().getPort();
dn.conf.set(FSConstants.DFS_DATANODE_ADDRESS_KEY, "localhost:" + port);
dn.datanode = instantiateDataNode(dn.dnArgs, dn.conf);
dn.datanode.runDatanodeDaemon();
if (waitActive) {
waitDataNodeInitialized(dn.datanode);
}
}
/*
* Restart all datanodes
*/
public synchronized boolean restartDataNodes(boolean waitActive)
throws IOException, InterruptedException {
logStateChange("Restarting avatar datanodes");
shutDownDataNodes();
for (int i = 0; i < dataNodes.size(); i++) {
restartDataNode(waitActive, i);
}
if (waitActive) {
waitDataNodesActive();
}
logStateChange("Restarting avatar datanodes - completed");
return true;
}
/**
* Wait until the Datanode is initialized, or it throws an IOException
* @param AvatarDataNode dn;
* @throws IOException when some ServicePair threads are dead.
*/
public synchronized void waitDataNodeInitialized(AvatarDataNode dn) throws IOException {
if (dn == null) {
return ;
}
boolean initialized = false;
while (!initialized) {
initialized = true;
for (int i = 0; i<nameNodes.length; i++) {
InetSocketAddress nameNodeAddr = new InetSocketAddress("localhost",
getNameNode(i).avatars.get(0).nnDnPort);
if (!dn.initialized(nameNodeAddr)) {
initialized = false;
break;
}
}
try {
Thread.sleep(100);
} catch (Exception e) {
}
}
}
public int getNamespaceId(int index) {
return this.nameNodes[index].avatars.get(0).avatar.getNamespaceID();
}
static public int getNSId() {
return MiniAvatarCluster.currNSId++;
}
public static AvatarDataNode instantiateDataNode(String[] dnArgs,
Configuration conf) throws IOException {
IOException e = null;
for (int i = 0; i < instantiationRetries; i++) {
try {
return AvatarDataNode.instantiateDataNode(dnArgs, new Configuration(
conf));
} catch (IOException ioe) {
e = ioe;
LOG.info("Trying to instantiate datanode... ", e);
}
sleep(1000);
}
LOG.fatal("Exception when instantiating avatardatanode", e);
throw e;
}
public static AvatarNode instantiateAvatarNode(String argv[],
Configuration conf) throws IOException {
IOException e = null;
for (int i = 0; i < instantiationRetries; i++) {
try {
return AvatarNode.createAvatarNode(argv, conf);
} catch (IOException ioe) {
e = ioe;
LOG.info("Trying to instantiate avatarnode... ", e);
}
sleep(1000);
}
LOG.fatal("Exception when instantiating avatarnode", e);
throw e;
}
public static void clearAvatarDir() {
try {
FileUtil.fullyDelete(new File(baseAvatarDir));
} catch (Exception e) {
LOG.warn("Exception when deleting directory " + baseAvatarDir, e);
}
}
private static void sleep(long time) throws IOException {
try {
Thread.sleep(time);
} catch (InterruptedException e) {
LOG.fatal("Thread interrupted");
throw new IOException(e.toString());
}
}
private static void logStateChange(String msg) {
LOG.info("----- " + msg + " -----");
}
}