/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Proxy;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.net.UnknownHostException;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.ServerSocketChannel;
import java.nio.channels.SocketChannel;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Collection;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.management.ObjectName;
import javax.security.auth.login.LoginException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.ReconfigurableBase;
import org.apache.hadoop.conf.ReconfigurationException;
import org.apache.hadoop.conf.ReconfigurationServlet;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HDFSPolicyProvider;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockPathInfo;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.WriteBlockHeader;
import org.apache.hadoop.hdfs.protocol.UnregisteredDatanodeException;
import org.apache.hadoop.hdfs.protocol.ProtocolCompatible;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.datanode.FSDataset.FSVolume;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.FileChecksumServlets;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.StreamFile;
import org.apache.hadoop.hdfs.server.protocol.BlockAlreadyCommittedException;
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
import org.apache.hadoop.hdfs.server.protocol.BlockMetaDataInfo;
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.BlockReport;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.ReceivedBlockInfo;
import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.ProtocolSignature;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.metrics.util.MBeanUtil;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.ConfiguredPolicy;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
import org.apache.hadoop.util.PulseChecker;
import org.apache.hadoop.util.PulseCheckable;
import org.apache.hadoop.util.VersionInfo;
import org.mortbay.util.ajax.JSON;
/**********************************************************
* DataNode is a class (and program) that stores a set of
* blocks for a DFS deployment. A single deployment can
* have one or many DataNodes. Each DataNode communicates
* regularly with a single NameNode. It also communicates
* with client code and other DataNodes from time to time.
*
* DataNodes store a series of named blocks. The DataNode
* allows client code to read these blocks, or to write new
* block data. The DataNode may also, in response to instructions
* from its NameNode, delete blocks or copy blocks to/from other
* DataNodes.
*
* The DataNode maintains just one critical table:
* block-> stream of bytes (of BLOCK_SIZE or less)
*
* This info is stored on a local disk. The DataNode
* reports the table's contents to the NameNode upon startup
* and every so often afterwards.
*
* DataNodes spend their lives in an endless loop of asking
* the NameNode for something to do. A NameNode cannot connect
* to a DataNode directly; a NameNode simply returns values from
* functions invoked by a DataNode.
*
* DataNodes maintain an open server socket so that client code
* or other DataNodes can read/write data. The host/port for
* this server is reported to the NameNode, which then sends that
* information to clients or other DataNodes that might be interested.
*
**********************************************************/
public class DataNode extends ReconfigurableBase
implements InterDatanodeProtocol, ClientDatanodeProtocol, FSConstants, PulseCheckable,
DataNodeMXBean {
public static final Log LOG = LogFactory.getLog(DataNode.class);
static{
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
}
public static final String DN_CLIENTTRACE_FORMAT =
"src: %s" + // src IP
", dest: %s" + // dst IP
", bytes: %s" + // byte count
", op: %s" + // operation
", cliID: %s" + // DFSClient id
", offset: %s" + // offset
", srvID: %s" + // DatanodeRegistration
", blockid: %s" + // block id
", duration: %s"; // duration time
public static final Log ClientTraceLog =
LogFactory.getLog(DataNode.class.getName() + ".clienttrace");
/**
* Use {@link NetUtils#createSocketAddr(String)} instead.
*/
@Deprecated
public static InetSocketAddress createSocketAddr(String target
) throws IOException {
return NetUtils.createSocketAddr(target);
}
public FSDatasetInterface data = null;
//TODO this should be not used at all
private static InetSocketAddress nameNodeAddr;
public static int NAMESPACE_ID = 12345678;
volatile boolean shouldRun = true;
boolean isAlive = false;
protected NamespaceManager namespaceManager;
/** list of blocks being recovered */
private final Map<Block, Block> ongoingRecovery = new HashMap<Block, Block>();
AtomicInteger xmitsInProgress = new AtomicInteger();
AtomicBoolean shuttingDown = new AtomicBoolean(false);
AtomicBoolean checkingDisk = new AtomicBoolean(false);
volatile long timeLastCheckDisk = 0;
long minDiskCheckIntervalMsec;
Daemon dataXceiverServer = null;
ThreadGroup threadGroup = null;
long blockReportInterval;
long deletedReportInterval;
long initialBlockReportDelay = BLOCKREPORT_INITIAL_DELAY * 1000L;
long heartBeatInterval;
DataStorage storage = null;
HttpServer infoServer = null;
DataNodeMetrics myMetrics;
protected InetSocketAddress selfAddr;
String machineName;
static String dnThreadName;
int socketTimeout;
int socketReadExtentionTimeout;
int socketWriteTimeout = 0;
int socketWriteExtentionTimeout = 0;
boolean transferToAllowed = true;
boolean ignoreChecksumWhenRead = false;
int writePacketSize = 0;
boolean syncOnClose;
boolean supportAppends;
long heartbeatExpireInterval;
// heartbeatExpireInterval is how long namenode waits for datanode to report
/**
* Testing hook that allows tests to delay the sending of blockReceived
* RPCs to the namenode. This can help find bugs in append.
*/
int artificialBlockReceivedDelay = 0;
public DataBlockScannerSet blockScanner = null;
private static final String CONF_SERVLET_PATH = "/dnconf";
private static final Random R = new Random();
// For InterDataNodeProtocol
public Server ipcServer;
private final ExecutorService blockCopyExecutor;
public static final int BLOCK_COPY_THREAD_POOL_SIZE = 10;
private final int blockCopyRPCWaitTime;
AbstractList<File> dataDirs;
Configuration conf;
private PulseChecker pulseChecker;
/**
* Current system time.
* @return current time in msec.
*/
static long now() {
return System.currentTimeMillis();
}
/**
* Create the DataNode given a configuration and an array of dataDirs.
* 'dataDirs' is where the blocks are stored.
*/
DataNode(Configuration conf,
AbstractList<File> dataDirs) throws IOException {
super(conf);
supportAppends = conf.getBoolean("dfs.support.append", false);
// TODO(pritam): Integrate this into a threadpool for all operations of the
// datanode.
blockCopyExecutor = Executors.newCachedThreadPool();
// Time that the blocking version of RPC for copying block between
// datanodes should wait for. Default is 5 minutes.
blockCopyRPCWaitTime = conf.getInt("dfs.datanode.blkcopy.wait_time",
5 * 60);
try {
startDataNode(this.getConf(), dataDirs);
} catch (IOException ie) {
LOG.info("Failed to start datanode " + StringUtils.stringifyException(ie));
shutdown();
throw ie;
}
}
/**
* Initialize global settings for DN
*/
protected void initGlobalSetting(Configuration conf,
AbstractList<File> dataDirs) throws IOException {
this.dataDirs = dataDirs;
this.conf = conf;
storage = new DataStorage(this);
// global DN settings
initConfig(conf);
registerMXBean();
initDataXceiver(conf);
startInfoServer(conf);
initIpcServer(conf);
myMetrics = new DataNodeMetrics(conf, storage.getStorageID());
}
/**
* Initialize dataset and block scanner
*
* @param conf Configuration
* @param dataDirs data directories
* @param numOfNameSpaces number of name spaces
* @throws IOException
*/
protected void initDataSetAndScanner(Configuration conf,
AbstractList<File> dataDirs, int numOfNameSpaces) throws IOException {
initFsDataSet(conf, dataDirs, numOfNameSpaces);
initDataBlockScanner(conf);
}
/**
* This method starts the data node with the specified conf.
*
* @param conf - the configuration
* if conf's CONFIG_PROPERTY_SIMULATED property is set
* then a simulated storage based data node is created.
*
* @param dataDirs - only for a non-simulated storage data node
* @throws IOException
*/
void startDataNode(Configuration conf,
AbstractList<File> dataDirs
) throws IOException {
initGlobalSetting(conf, dataDirs);
/* Initialize namespace manager */
List<InetSocketAddress> nameNodeAddrs = DFSUtil.getNNServiceRpcAddresses(conf);
//TODO this will be no longer valid, since we will have multiple namenodes
// We might want to keep it and assign the first NN to it.
DataNode.nameNodeAddr = nameNodeAddrs.get(0);
namespaceManager = new NamespaceManager(conf, nameNodeAddrs);
initDataSetAndScanner(conf, dataDirs, nameNodeAddrs.size());
}
private void initConfig(Configuration conf) throws IOException {
if (conf.get("slave.host.name") != null) {
machineName = conf.get("slave.host.name");
}
if (machineName == null) {
machineName = DNS.getDefaultHost(
conf.get("dfs.datanode.dns.interface","default"),
conf.get("dfs.datanode.dns.nameserver","default"));
}
// Allow configuration to delay block reports to find bugs
artificialBlockReceivedDelay = conf.getInt(
"dfs.datanode.artificialBlockReceivedDelay", 0);
if (conf.getBoolean(
ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) {
PolicyProvider policyProvider = (PolicyProvider) (ReflectionUtils
.newInstance(conf.getClass(PolicyProvider.POLICY_PROVIDER_CONFIG,
HDFSPolicyProvider.class, PolicyProvider.class), conf));
SecurityUtil.setPolicy(new ConfiguredPolicy(conf, policyProvider));
}
this.socketTimeout = conf.getInt("dfs.socket.timeout",
HdfsConstants.READ_TIMEOUT);
this.socketReadExtentionTimeout = conf.getInt(
HdfsConstants.DFS_DATANODE_READ_EXTENSION,
HdfsConstants.READ_TIMEOUT_EXTENSION);
this.socketWriteTimeout = conf.getInt("dfs.datanode.socket.write.timeout",
HdfsConstants.WRITE_TIMEOUT);
this.socketWriteExtentionTimeout = conf.getInt(
HdfsConstants.DFS_DATANODE_WRITE_EXTENTSION,
HdfsConstants.WRITE_TIMEOUT_EXTENSION);
/* Based on results on different platforms, we might need set the default
* to false on some of them. */
this.transferToAllowed = conf.getBoolean("dfs.datanode.transferTo.allowed",
true);
// TODO: remove the global setting and change data protocol to support
// per session setting for this value.
this.ignoreChecksumWhenRead = conf.getBoolean("dfs.datanode.read.ignore.checksum",
false);
this.writePacketSize = conf.getInt("dfs.write.packet.size", 64*1024);
this.deletedReportInterval =
conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
// Calculate the full block report interval
int fullReportMagnifier = conf.getInt("dfs.fullblockreport.magnifier", 2);
this.blockReportInterval = fullReportMagnifier * deletedReportInterval;
this.heartBeatInterval = conf.getLong("dfs.heartbeat.interval", HEARTBEAT_INTERVAL) * 1000L;
long heartbeatRecheckInterval = conf.getInt(
"heartbeat.recheck.interval", 5 * 60 * 1000); // 5 minutes
this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
10 * heartBeatInterval;
this.initialBlockReportDelay = conf.getLong("dfs.blockreport.initialDelay",
BLOCKREPORT_INITIAL_DELAY) * 1000L;
if (this.initialBlockReportDelay >= blockReportInterval) {
this.initialBlockReportDelay = 0;
LOG.info("dfs.blockreport.initialDelay is greater than "
+ "dfs.blockreport.intervalMsec."
+ " Setting initial delay to 0 msec:");
}
// do we need to sync block file contents to disk when blockfile is closed?
this.syncOnClose = conf.getBoolean("dfs.datanode.synconclose", false);
this.minDiskCheckIntervalMsec = conf.getLong(
"dfs.datnode.checkdisk.mininterval",
FSConstants.MIN_INTERVAL_CHECK_DIR_MSEC);
}
/**
* Used only for testing.
*
* @param name
* the new name for datanode registration.
*/
public void setRegistrationName(String name) {
NamespaceService[] nsos = namespaceManager.getAllNamenodeThreads();
for (NamespaceService ns : nsos) {
((NSOfferService) ns).setRegistrationName(name);
}
}
private void initDataXceiver(Configuration conf) throws IOException {
String address =
NetUtils.getServerAddress(conf,
"dfs.datanode.bindAddress",
"dfs.datanode.port",
"dfs.datanode.address");
InetSocketAddress socAddr = NetUtils.createSocketAddr(address);
// find free port
ServerSocket ss = (socketWriteTimeout > 0) ?
ServerSocketChannel.open().socket() : new ServerSocket();
Server.bind(ss, socAddr,
conf.getInt("dfs.datanode.xceiver.listen.queue.size", 128));
ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE);
// adjust machine name with the actual port
int tmpPort = ss.getLocalPort();
selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(),
tmpPort);
LOG.info("Opened info server at " + tmpPort);
this.threadGroup = new ThreadGroup("dataXceiverServer");
this.dataXceiverServer = new Daemon(threadGroup,
new DataXceiverServer(ss, conf, this));
this.threadGroup.setDaemon(true); // auto destroy when empty
}
private void startInfoServer(Configuration conf) throws IOException {
String infoAddr =
NetUtils.getServerAddress(conf,
"dfs.datanode.info.bindAddress",
"dfs.datanode.info.port",
"dfs.datanode.http.address");
InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
String infoHost = infoSocAddr.getHostName();
int tmpInfoPort = infoSocAddr.getPort();
this.infoServer = new HttpServer("datanode", infoHost, tmpInfoPort,
tmpInfoPort == 0, conf);
if (conf.getBoolean("dfs.https.enable", false)) {
boolean needClientAuth = conf.getBoolean("dfs.https.need.client.auth", false);
InetSocketAddress secInfoSocAddr = NetUtils.createSocketAddr(conf.get(
"dfs.datanode.https.address", infoHost + ":" + 0));
Configuration sslConf = new Configuration(false);
sslConf.addResource(conf.get("dfs.https.server.keystore.resource",
"ssl-server.xml"));
this.infoServer.addSslListener(secInfoSocAddr, sslConf, needClientAuth);
// assume same ssl port for all datanodes
InetSocketAddress datanodeSslPort = NetUtils.createSocketAddr(conf.get(
"dfs.datanode.https.address", infoHost + ":" + 50475));
this.infoServer.setAttribute("datanode.https.port", datanodeSslPort
.getPort());
}
this.infoServer.addInternalServlet(null, "/streamFile/*", StreamFile.class);
this.infoServer.addInternalServlet(null, "/getFileChecksum/*",
FileChecksumServlets.GetServlet.class);
this.infoServer.setAttribute("datanode", this);
this.infoServer.addServlet(null, "/blockScannerReport",
DataBlockScannerSet.Servlet.class);
this.infoServer.setAttribute(ReconfigurationServlet.CONF_SERVLET_RECONFIGURABLE_PREFIX +
CONF_SERVLET_PATH, DataNode.this);
this.infoServer.addServlet("dnConf", CONF_SERVLET_PATH, ReconfigurationServlet.class);
this.infoServer.start();
}
private void initIpcServer(Configuration conf) throws IOException {
//init ipc server
InetSocketAddress ipcAddr = NetUtils.createSocketAddr(
conf.get("dfs.datanode.ipc.address"));
ipcServer = RPC.getServer(this, ipcAddr.getHostName(), ipcAddr.getPort(),
conf.getInt("dfs.datanode.handler.count", 3), false, conf);
ipcServer.start();
}
/**
* Creates either NIO or regular depending on socketWriteTimeout.
*/
protected Socket newSocket() throws IOException {
return (socketWriteTimeout > 0) ?
SocketChannel.open().socket() : new Socket();
}
public boolean isSupportAppends() {
return supportAppends;
}
public static InterDatanodeProtocol createInterDataNodeProtocolProxy(
DatanodeID datanodeid, Configuration conf, final int socketTimeout)
throws IOException {
InetSocketAddress addr = NetUtils.createSocketAddr(
datanodeid.getHost() + ":" + datanodeid.getIpcPort());
if (InterDatanodeProtocol.LOG.isDebugEnabled()) {
InterDatanodeProtocol.LOG.info("InterDatanodeProtocol addr=" + addr);
}
UserGroupInformation ugi;
try {
ugi = UserGroupInformation.login(conf);
} catch (LoginException le) {
throw new RuntimeException("Couldn't login!");
}
return (InterDatanodeProtocol)RPC.getProxy(InterDatanodeProtocol.class,
InterDatanodeProtocol.versionID, addr,
ugi, conf,
NetUtils.getDefaultSocketFactory(conf), socketTimeout);
}
/**
* This method returns the address namenode uses to communicate with
* datanodes. If this address is not configured the default NameNode
* address is used, as it is running only one RPC server.
* If it is running multiple servers this address cannot be used by clients!!
* @param conf
* @return
*/
public static InetSocketAddress getNameNodeAddress(Configuration conf) {
InetSocketAddress addr = null;
addr = NameNode.getDNProtocolAddress(conf);
if (addr != null) {
return addr;
}
return NameNode.getAddress(conf);
}
//TODO this should not be there -> it affects StreamFile class
public InetSocketAddress getNameNodeAddr() {
return nameNodeAddr;
}
/**
* Get namenode corresponding to a namespace
* @param namespaceId
* @return Namenode corresponding to the namespace
* @throws IOException
*/
public DatanodeProtocol getNSNamenode(int namespaceId) throws IOException {
NamespaceService nsos = namespaceManager.get(namespaceId);
if(nsos == null || nsos.getDatanodeProtocol() == null) {
throw new IOException("cannot find a namnode proxy for namespaceId=" + namespaceId);
}
return nsos.getDatanodeProtocol();
}
public InetSocketAddress getSelfAddr() {
return selfAddr;
}
public int getPort() {
return selfAddr.getPort();
}
DataNodeMetrics getMetrics() {
return myMetrics;
}
/**
* get datanode registration by namespace id
* @param namespaceId
* @return datanode registration object
* @throws IOException
*/
public DatanodeRegistration getDNRegistrationForNS(int namespaceId)
throws IOException {
NamespaceService nsos = namespaceManager.get(namespaceId);
if(nsos==null || nsos.getNsRegistration()==null) {
throw new IOException("cannot find NSOfferService for namespaceId="+namespaceId);
}
return nsos.getNsRegistration();
}
/**
* Return the namenode's identifier
*/
public String getNamenode() {
//return namenode.toString();
return "<namenode>";
}
public static void setNewStorageID(DatanodeRegistration dnReg) {
LOG.info("Datanode is " + dnReg);
dnReg.storageID = createNewStorageId(dnReg.getPort());
}
public static String createNewStorageId(int port) {
/* Return
* "DS-randInt-ipaddr-currentTimeMillis"
* It is considered extermely rare for all these numbers to match
* on a different machine accidentally for the following
* a) SecureRandom(INT_MAX) is pretty much random (1 in 2 billion), and
* b) Good chance ip address would be different, and
* c) Even on the same machine, Datanode is designed to use different ports.
* d) Good chance that these are started at different times.
* For a confict to occur all the 4 above have to match!.
* The format of this string can be changed anytime in future without
* affecting its functionality.
*/
String ip = "unknownIP";
try {
ip = DNS.getDefaultIP("default");
} catch (UnknownHostException ignored) {
LOG.warn("Could not find ip address of \"default\" inteface.");
}
int rand = 0;
try {
rand = SecureRandom.getInstance("SHA1PRNG").nextInt(Integer.MAX_VALUE);
} catch (NoSuchAlgorithmException e) {
LOG.warn("Could not use SecureRandom");
rand = R.nextInt(Integer.MAX_VALUE);
}
return "DS-" + rand + "-"+ ip + "-" + port + "-" +
System.currentTimeMillis();
}
/**
* Shut down this instance of the datanode.
* Returns only after shutdown is complete.
* This method can only be called by the offerService thread.
* Otherwise, deadlock might occur.
*/
public void shutdown() {
if (this.shuttingDown.getAndSet(true)) {
// Already being shut down
LOG.warn("DataNode.shutdown() was called while shutting down.");
return;
}
if (infoServer != null) {
try {
infoServer.stop();
} catch (Exception e) {
LOG.warn("Exception shutting down DataNode", e);
}
}
if (ipcServer != null) {
ipcServer.stop();
}
this.shouldRun = false;
if (dataXceiverServer != null) {
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
this.dataXceiverServer.interrupt();
// wait for all data receiver threads to exit
if (this.threadGroup != null) {
int retries = 0;
while (true) {
this.threadGroup.interrupt();
LOG.info("Waiting for threadgroup to exit, active threads is " +
this.threadGroup.activeCount());
if (this.threadGroup.activeCount() == 0) {
break;
}
try {
if (++retries > 600) {
Thread[] activeThreads = new Thread[this.threadGroup.activeCount()];
this.threadGroup.enumerate(activeThreads, true);
LOG.info("Active Threads: " + Arrays.toString(activeThreads));
LOG.warn("Waited for ThreadGroup to be empty for 10 minutes." +
" SHUTTING DOWN NOW");
break;
}
Thread.sleep(1000);
} catch (InterruptedException e) {}
}
}
// wait for dataXceiveServer to terminate
try {
this.dataXceiverServer.join();
} catch (InterruptedException ie) {
}
}
if (blockCopyExecutor != null && !blockCopyExecutor.isShutdown()) {
blockCopyExecutor.shutdownNow();
}
if (namespaceManager != null) {
namespaceManager.shutDownAll();
}
if (blockScanner != null) {
blockScanner.shutdown();
}
if (storage != null) {
try {
this.storage.unlockAll();
} catch (IOException ie) {
}
}
if (data != null) {
data.shutdown();
}
if (myMetrics != null) {
myMetrics.shutdown();
}
this.shutdownMXBean();
}
/** Check if there is no space in disk
* @param e that caused this checkDiskError call
**/
protected void checkDiskError(Exception e ) throws IOException {
if (e instanceof ClosedByInterruptException
|| e instanceof java.io.InterruptedIOException) {
return;
}
LOG.warn("checkDiskError: exception: ", e);
if (e.getMessage() != null &&
e.getMessage().startsWith("No space left on device")) {
throw new DiskOutOfSpaceException("No space left on device");
} else {
checkDiskError();
}
}
/**
* Check if there is a disk failure and if so, handle the error
*
**/
protected void checkDiskError( ) throws IOException{
// We disallow concurrent disk checks as it doesn't help
// but can significantly impact performance and reliability of
// the system.
//
boolean setSuccess = checkingDisk.compareAndSet(false, true);
if (!setSuccess) {
LOG.info("checkDiskError is already running.");
return;
}
try {
// We don't check disks if it's not long since last check.
//
long curTime = System.currentTimeMillis();
if (curTime - timeLastCheckDisk < minDiskCheckIntervalMsec) {
LOG.info("checkDiskError finished within "
+ minDiskCheckIntervalMsec + " mses. Skip this one.");
return;
}
data.checkDataDir();
timeLastCheckDisk = System.currentTimeMillis();
} catch(DiskErrorException de) {
handleDiskError(de.getMessage());
} finally {
checkingDisk.set(false);
}
}
private void handleDiskError(String errMsgr) throws IOException{
boolean hasEnoughResource = data.hasEnoughResource();
myMetrics.volumeFailures.inc();
for(Integer namespaceId : namespaceManager.getAllNamespaces()){
DatanodeProtocol nn = getNSNamenode(namespaceId);
LOG.warn("DataNode.handleDiskError: Keep Running: " + hasEnoughResource);
//if hasEnoughtResource = true - more volumes are available, so we don't want
// to shutdown DN completely and don't want NN to remove it.
int dp_error = DatanodeProtocol.DISK_ERROR;
if(hasEnoughResource == false) {
// DN will be shutdown and NN should remove it
dp_error = DatanodeProtocol.FATAL_DISK_ERROR;
}
//inform NameNode
try {
nn.errorReport(getDNRegistrationForNS(namespaceId), dp_error, errMsgr);
} catch(IOException ignored) {
}
if(hasEnoughResource) {
for (NamespaceService nsos : namespaceManager.getAllNamenodeThreads()) {
nsos.scheduleBlockReport(0);
}
return; // do not shutdown
}
}
LOG.warn("DataNode is shutting down.\n" + errMsgr);
shouldRun = false;
}
private void refreshVolumes(String confVolumes) throws Exception {
if( !(data instanceof FSDataset)) {
throw new UnsupportedOperationException("Only FSDataset support refresh volumes operation");
}
// Dirs described by conf file
Configuration conf = getConf();
//temporary set dfs.data.dir for get storageDirs
String oldVolumes = conf.get("dfs.data.dir");
conf.set("dfs.data.dir", confVolumes);
Collection<URI> dataDirs = getStorageDirs(conf);
conf.set("dfs.data.dir", oldVolumes);
ArrayList<File> newDirs = getDataDirsFromURIs(dataDirs);
ArrayList<File> decomDirs = new ArrayList<File>();
for (Iterator<StorageDirectory> storageIter = this.storage.dirIterator();
storageIter.hasNext();) {
StorageDirectory dir = storageIter.next();
// Delete volumes not in service from DataStorage
if (!((FSDataset)data).isValidVolume(dir.getCurrentDir())) {
LOG.info("This dir is listed in conf, but not in service " + dir.getRoot());
storageIter.remove();
continue;
}
if (newDirs.contains(dir.getRoot())){
// remove the dir already in-service in newDirs list
LOG.info("This conf dir has already been in service " + dir.getRoot());
newDirs.remove(dir.getRoot());
} else {
// add the dirs not described in conf files to decomDirs
LOG.warn("The configuration does not contain serving dir " +
dir.getRoot() + ", but we cannot remove it from serving volumes in current version." );
decomDirs.add(dir.getRoot());
}
}
if (newDirs.isEmpty()){
LOG.info("All the configured dir is in service, and do not need refreshment.");
return;
}
for (int namespaceId: namespaceManager.getAllNamespaces()) {
// Load new volumes via DataStorage
NamespaceInfo nsInfo = getNSNamenode(namespaceId).versionRequest();
String nameserviceId = this.namespaceManager.get(namespaceId).getNameserviceId();
Collection<StorageDirectory> newStorageDirectories =
storage.recoverTransitionAdditionalRead(nsInfo, newDirs, getStartupOption(conf));
storage.recoverTransitionRead(this, namespaceId, nsInfo, newDirs,
getStartupOption(conf), nameserviceId);
// add new volumes in FSDataSet
((FSDataset)data).addVolumes(conf, namespaceId,
storage.getNameSpaceDataDir(namespaceId), newStorageDirectories);
}
}
/** Number of concurrent xceivers per node. */
int getXceiverCount() {
return threadGroup == null ? 0 : threadGroup.activeCount();
}
static Collection<URI> getStorageDirs(Configuration conf) {
Collection<String> dirNames =
conf.getStringCollection("dfs.data.dir");
return Util.stringCollectionAsURIs(dirNames);
}
static ArrayList<File> getDataDirsFromURIs(Collection<URI> dataDirs) {
ArrayList<File> dirs = new ArrayList<File>();
for (URI dirURI : dataDirs) {
if (!"file".equalsIgnoreCase(dirURI.getScheme())) {
LOG.warn("Unsupported URI schema in " + dirURI + ". Ignoring ...");
continue;
}
// drop any (illegal) authority in the URI for backwards compatibility
File data = new File(dirURI.getPath());
try {
DiskChecker.checkDir(data);
dirs.add(data);
} catch (IOException e) {
LOG.warn("Invalid directory in dfs.data.dir: "
+ e.getMessage());
}
}
return dirs;
}
/**
* A thread per namenode to perform:
* <ul>
* <li> Pre-registration handshake with namenode</li>
* <li> Registration with namenode</li>
* <li> Send periodic heartbeats to the namenode</li>
* <li> Handle commands received from the datanode</li>
* </ul>
*/
class NSOfferService extends NamespaceService {
final InetSocketAddress nnAddr;
DatanodeRegistration nsRegistration;
NamespaceInfo nsInfo;
long lastBlockReport = 0;
private Thread nsThread;
private DatanodeProtocol nsNamenode;
int namespaceId;
String nameserviceId;
private long lastHeartbeat = 0;
private long lastDeletedReport = 0;
boolean resetBlockReportTime = true;
private volatile boolean initialized = false;
private final LinkedList<Block> receivedAndDeletedBlockList
= new LinkedList<Block>();
private int pendingReceivedRequests = 0;
private volatile boolean shouldServiceRun = true;
UpgradeManagerDatanode upgradeManager = null;
private ScheduledFuture keepAliveRun = null;
private ScheduledExecutorService keepAliveSender = null;
private boolean firstBlockReportSent = false;
volatile long lastBeingAlive = now();
NSOfferService(InetSocketAddress isa, String nameserviceId) {
this.nsRegistration = new DatanodeRegistration(getMachineName());
this.nnAddr = isa;
this.nameserviceId = nameserviceId;
}
public DatanodeProtocol getDatanodeProtocol() {
return nsNamenode;
}
/**
* Used only for testing.
*
* @param name
* the new registration name for the datanode
*/
public void setRegistrationName(String name) {
this.nsRegistration.setName(name);
}
/**
* Main loop for each NS thread. Run until shutdown,
* forever calling remote NameNode functions.
*/
private void offerService() throws Exception {
LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" +
" Initial delay: " + initialBlockReportDelay + "msec");
LOG.info("using DELETEREPORT_INTERVAL of " + deletedReportInterval + "msec");
LOG.info("using HEARTBEAT_INTERVAL of " + heartBeatInterval + "msec");
LOG.info("using HEARTBEAT_EXPIRE_INTERVAL of " + heartbeatExpireInterval + "msec");
//
// Now loop for a long time....
//
while (shouldRun && shouldServiceRun) {
try {
long startTime = now();
//
// Every so often, send heartbeat or block-report
//
if (startTime - lastHeartbeat > heartBeatInterval) {
//
// All heartbeat messages include following info:
// -- Datanode name
// -- data transfer port
// -- Total capacity
// -- Bytes remaining
//
lastHeartbeat = startTime;
DatanodeCommand[] cmds = nsNamenode.sendHeartbeat(nsRegistration,
data.getCapacity(),
data.getDfsUsed(),
data.getRemaining(),
data.getNSUsed(namespaceId),
xmitsInProgress.get(),
getXceiverCount());
this.lastBeingAlive = now();
LOG.debug("Sent heartbeat at " + this.lastBeingAlive);
myMetrics.heartbeats.inc(now() - startTime);
//LOG.info("Just sent heartbeat, with name " + localName);
if (!processCommand(cmds))
continue;
}
// check if there are newly received blocks (pendingReceivedRequeste > 0
// or if the deletedReportInterval passed.
if (firstBlockReportSent && (pendingReceivedRequests > 0
|| (startTime - lastDeletedReport > deletedReportInterval))) {
Block[] receivedAndDeletedBlockArray = null;
int currentReceivedRequestsCounter = pendingReceivedRequests;
synchronized (receivedAndDeletedBlockList) {
lastDeletedReport = startTime;
int numBlocksReceivedAndDeleted = receivedAndDeletedBlockList
.size();
if (numBlocksReceivedAndDeleted > 0) {
receivedAndDeletedBlockArray = receivedAndDeletedBlockList
.toArray(new Block[numBlocksReceivedAndDeleted]);
}
}
if (receivedAndDeletedBlockArray != null) {
long rpcStartTime = 0;
if (LOG.isDebugEnabled()) {
rpcStartTime = System.nanoTime();
LOG.debug("sending blockReceivedAndDeleted "
+ receivedAndDeletedBlockArray.length + " blocks to " + nnAddr);
}
nsNamenode.blockReceivedAndDeleted(nsRegistration, receivedAndDeletedBlockArray);
if (LOG.isDebugEnabled()) {
LOG.debug("finshed blockReceivedAndDeleted to " + nnAddr
+ " time: " + (System.nanoTime() - rpcStartTime) + " ns");
}
synchronized (receivedAndDeletedBlockList) {
for (int i = 0; i < receivedAndDeletedBlockArray.length; i++) {
receivedAndDeletedBlockList
.remove(receivedAndDeletedBlockArray[i]);
}
pendingReceivedRequests-=currentReceivedRequestsCounter;
}
}
}
// send block report
if (startTime - lastBlockReport > blockReportInterval) {
//
// Send latest blockinfo report if timer has expired.
// Get back a list of local block(s) that are obsolete
// and can be safely GC'ed.
//
long brStartTime = now();
Block[] bReport = data.getBlockReport(namespaceId);
DatanodeCommand cmd = nsNamenode.blockReport(nsRegistration,
new BlockReport(BlockListAsLongs.convertToArrayLongs(bReport)));
firstBlockReportSent = true;
long brTime = now() - brStartTime;
myMetrics.blockReports.inc(brTime);
LOG.info("BlockReport of " + bReport.length +
" blocks got processed in " + brTime + " msecs");
//
// If we have sent the first block report, then wait a random
// time before we start the periodic block reports.
//
if (resetBlockReportTime) {
lastBlockReport = startTime - R.nextInt((int)(blockReportInterval));
resetBlockReportTime = false;
} else {
/* say the last block report was at 8:20:14. The current report
* should have started around 9:20:14 (default 1 hour interval).
* If current time is :
* 1) normal like 9:20:18, next report should be at 10:20:14
* 2) unexpected like 11:35:43, next report should be at 12:20:14
*/
lastBlockReport += (now() - lastBlockReport) /
blockReportInterval * blockReportInterval;
}
processCommand(cmd);
}
//
// There is no work to do; sleep until hearbeat timer elapses,
// or work arrives, and then iterate again.
//
long waitTime = heartBeatInterval - (System.currentTimeMillis() - lastHeartbeat);
synchronized(receivedAndDeletedBlockList) {
if (waitTime > 0 && pendingReceivedRequests == 0) {
try {
receivedAndDeletedBlockList.wait(waitTime);
} catch (InterruptedException ie) {
}
delayBeforeBlockReceived();
}
} // synchronized
} catch(RemoteException re) {
String reClass = re.getClassName();
if (UnregisteredDatanodeException.class.getName().equals(reClass) ||
DisallowedDatanodeException.class.getName().equals(reClass) ||
IncorrectVersionException.class.getName().equals(reClass)) {
LOG.warn("DataNode is shutting down: " +
StringUtils.stringifyException(re));
shouldRun = false;
shutdown();
return;
}
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
// NOTE: common case should be doing this instead of ignoring ie
Thread.currentThread().interrupt();
}
LOG.warn(StringUtils.stringifyException(re));
} catch (IOException e) {
LOG.warn(StringUtils.stringifyException(e));
}
} // while (shouldRun)
} // offerService
/**
* When a block has been received, we can delay some period of time before
* reporting it to the DN, for the purpose of testing. This simulates
* the actual latency of blockReceived on a real network (where the client
* may be closer to the NN than the DNs).
*/
private void delayBeforeBlockReceived() {
if (artificialBlockReceivedDelay > 0 && !receivedAndDeletedBlockList.isEmpty()) {
try {
long sleepFor = (long)R.nextInt(artificialBlockReceivedDelay);
LOG.debug("DataNode " + nsRegistration + " sleeping for " +
"artificial delay: " + sleepFor + " ms");
Thread.sleep(sleepFor);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
}
}
/**
* Process an array of datanode commands
*
* @param cmds an array of datanode commands
* @return true if further processing may be required or false otherwise.
*/
private boolean processCommand(DatanodeCommand[] cmds) {
if (cmds != null) {
for (DatanodeCommand cmd : cmds) {
try {
if (processCommand(cmd) == false) {
return false;
}
} catch (IOException ioe) {
LOG.warn("Error processing datanode Command", ioe);
}
}
}
return true;
}
/**
*
* @param cmd
* @return true if further processing may be required or false otherwise.
* @throws IOException
*/
private boolean processCommand(DatanodeCommand cmd) throws IOException {
if (cmd == null)
return true;
final BlockCommand bcmd = cmd instanceof BlockCommand? (BlockCommand)cmd: null;
boolean retValue = true;
long startTime = System.currentTimeMillis();
switch(cmd.getAction()) {
case DatanodeProtocol.DNA_TRANSFER:
// Send a copy of a block to another datanode
transferBlocks(namespaceId,
bcmd.getBlocks(), bcmd.getTargets());
myMetrics.blocksReplicated.inc(bcmd.getBlocks().length);
break;
case DatanodeProtocol.DNA_INVALIDATE:
//
// Some local block(s) are obsolete and can be
// safely garbage-collected.
//
Block toDelete[] = bcmd.getBlocks();
try {
if (blockScanner != null) {
blockScanner.deleteBlocks(namespaceId, toDelete);
}
data.invalidate(namespaceId, toDelete);
} catch(IOException e) {
checkDiskError();
throw e;
}
myMetrics.blocksRemoved.inc(toDelete.length);
break;
case DatanodeProtocol.DNA_SHUTDOWN:
// shut down the data node
shouldServiceRun = false;
retValue = false;
break;
case DatanodeProtocol.DNA_REGISTER:
// namenode requested a registration - at start or if NN lost contact
LOG.info("DatanodeCommand action: DNA_REGISTER");
if (shouldRun) {
register();
firstBlockReportSent = false;
}
break;
case DatanodeProtocol.DNA_FINALIZE:
storage.finalizedUpgrade(namespaceId);
break;
case UpgradeCommand.UC_ACTION_START_UPGRADE:
// start distributed upgrade here
processDistributedUpgradeCommand((UpgradeCommand)cmd);
break;
case DatanodeProtocol.DNA_RECOVERBLOCK:
recoverBlocks(namespaceId, bcmd.getBlocks(), bcmd.getTargets());
break;
default:
LOG.warn("Unknown DatanodeCommand action: " + cmd.getAction());
}
long endTime = System.currentTimeMillis();
if (endTime - startTime > 1000) {
LOG.info("processCommand() took " + (endTime - startTime)
+ " msec to process command " + cmd.getAction() + " from " + nnAddr);
} else if (LOG.isDebugEnabled()) {
LOG.debug("processCommand() took " + (endTime - startTime)
+ " msec to process command " + cmd.getAction() + " from " + nnAddr);
}
return retValue;
}
/**
* returns true if NS thread has completed initialization of storage
* and has registered with the corresponding namenode
* @return true if initialized
*/
@Override
public boolean initialized() {
return initialized;
}
@Override
public boolean isAlive() {
return shouldServiceRun && nsThread.isAlive();
}
@Override
public int getNamespaceId() {
return namespaceId;
}
@Override
public String getNameserviceId() {
return this.nameserviceId;
}
@Override
public InetSocketAddress getNNSocketAddress() {
return nnAddr;
}
void setNamespaceInfo(NamespaceInfo nsinfo) {
this.nsInfo = nsinfo;
this.namespaceId = nsinfo.getNamespaceID();
namespaceManager.addNamespace(this);
}
void setNameNode(DatanodeProtocol dnProtocol) {
nsNamenode = dnProtocol;
}
private NamespaceInfo handshake() throws IOException {
NamespaceInfo nsInfo = new NamespaceInfo();
while (shouldRun && shouldServiceRun) {
try {
nsInfo = nsNamenode.versionRequest();
break;
} catch(SocketTimeoutException e) { // namenode is busy
LOG.info("Problem connecting to server: " + nnAddr);
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {}
}
}
String errorMsg = null;
// verify build version
if( ! nsInfo.getBuildVersion().equals( Storage.getBuildVersion() )) {
errorMsg = "Incompatible build versions: namenode BV = "
+ nsInfo.getBuildVersion() + "; datanode BV = "
+ Storage.getBuildVersion();
LOG.warn( errorMsg );
try {
nsNamenode.errorReport( nsRegistration,
DatanodeProtocol.NOTIFY, errorMsg );
} catch( SocketTimeoutException e ) { // namenode is busy
LOG.info("Problem connecting to server: " + nnAddr.toString());
}
}
assert FSConstants.LAYOUT_VERSION == nsInfo.getLayoutVersion() :
"Data-node and name-node layout versions must be the same."
+ "Expected: "+ FSConstants.LAYOUT_VERSION + " actual "+ nsInfo.getLayoutVersion();
return nsInfo;
}
void setupNS(Configuration conf, AbstractList<File> dataDirs)
throws IOException {
// get NN proxy
DatanodeProtocol dnp =
(DatanodeProtocol)RPC.waitForProxy(DatanodeProtocol.class,
DatanodeProtocol.versionID, nnAddr, conf);
setNameNode(dnp);
// handshake with NN
NamespaceInfo nsInfo = handshake();
setNamespaceInfo(nsInfo);
synchronized(DataNode.this){
setupNSStorage();
}
nsRegistration.setIpcPort(ipcServer.getListenerAddress().getPort());
nsRegistration.setInfoPort(infoServer.getPort());
}
void setupNSStorage() throws IOException {
StartupOption startOpt = getStartupOption(conf);
assert startOpt != null : "Startup option must be set.";
boolean simulatedFSDataset =
conf.getBoolean("dfs.datanode.simulateddatastorage", false);
if (simulatedFSDataset) {
nsRegistration.setStorageID(storage.getStorageID()); //same as DN
nsRegistration.storageInfo.layoutVersion = FSConstants.LAYOUT_VERSION;
nsRegistration.storageInfo.namespaceID = nsInfo.namespaceID;
} else {
// read storage info, lock data dirs and transition fs state if necessary
// first do it at the top level dataDirs
// This is done only once when among all namespaces
storage
.recoverTransitionRead(DataNode.this, nsInfo, dataDirs, startOpt);
// Then do it for this namespace's directory
storage.recoverTransitionRead(DataNode.this, nsInfo.namespaceID,
nsInfo, dataDirs, startOpt, nameserviceId);
LOG.info("setting up storage: namespaceId="
+ namespaceId + ";lv=" + storage.layoutVersion + ";nsInfo="
+ nsInfo);
nsRegistration.setStorageInfo(
storage.getNStorage(nsInfo.namespaceID), storage.getStorageID());
data.initialize(storage);
}
data.addNamespace(namespaceId, storage.getNameSpaceDataDir(namespaceId), conf);
if (blockScanner != null) {
blockScanner.start();
blockScanner.addNamespace(namespaceId);
}
}
/**
* This methods arranges for the data node to send the block report at
* the next heartbeat.
*/
@Override
public void scheduleBlockReport(long delay) {
if (delay > 0) { // send BR after random delay
lastBlockReport = System.currentTimeMillis()
- ( blockReportInterval - R.nextInt((int)(delay)));
} else { // send at next heartbeat
lastBlockReport = lastHeartbeat - blockReportInterval;
}
resetBlockReportTime = true; // reset future BRs for randomness
}
/**
* This method control the occurrence of blockReceivedAndDeleted
* only use for testing
*/
@Override
public void scheduleBlockReceivedAndDeleted(long delay) {
if (delay > 0) {
lastDeletedReport = System.currentTimeMillis()
- deletedReportInterval + delay;
} else {
lastDeletedReport = 0;
}
}
@Override
public void reportBadBlocks(LocatedBlock[] blocks) throws IOException{
try {
nsNamenode.reportBadBlocks(blocks);
} catch (IOException e){
/* One common reason is that NameNode could be in safe mode.
* Should we keep on retrying in that case?
*/
LOG.warn("Failed to report bad block to namenode : " +
" Exception : " + StringUtils.stringifyException(e));
throw e;
}
}
/*
* Informing the name node could take a long long time! Should we wait
* till namenode is informed before responding with success to the
* client? For now we don't.
*/
@Override
public void notifyNamenodeReceivedBlock(Block block, String delHint) {
if (block == null ) {
throw new IllegalArgumentException("Block is null");
}
if (delHint != null && !delHint.isEmpty()) {
block = new ReceivedBlockInfo(block, delHint);
}
synchronized (receivedAndDeletedBlockList) {
receivedAndDeletedBlockList.add(block);
pendingReceivedRequests++;
receivedAndDeletedBlockList.notifyAll();
}
}
@Override
public void notifyNamenodeDeletedBlock(Block block) {
if (block == null) {
throw new IllegalArgumentException(block == null ? "Block is null"
: "delHint is null");
}
// mark it as a deleted block
DFSUtil.markAsDeleted(block);
synchronized (receivedAndDeletedBlockList) {
receivedAndDeletedBlockList.add(block);
}
}
//This must be called only by namespaceManager
@Override
public void start() {
if ((nsThread != null) && (nsThread.isAlive())) {
//Thread is started already
return;
}
nsThread = new Thread(this, dnThreadName);
nsThread.setDaemon(true); // needed for JUnit testing
nsThread.start();
}
@Override
//This must be called only by namespaceManager.
public void stop() {
shouldServiceRun = false;
if (keepAliveRun != null) {
keepAliveRun.cancel(false);
}
if (keepAliveSender != null) {
keepAliveSender.shutdownNow();
}
if (nsThread != null) {
nsThread.interrupt();
}
}
//This must be called only by namespaceManager
@Override
public void join() {
try {
if (nsThread != null) {
nsThread.join();
}
} catch (InterruptedException ie) { }
}
//Cleanup method to be called by current thread before exiting.
private void cleanUp() {
if(upgradeManager != null)
upgradeManager.shutdownUpgrade();
namespaceManager.remove(this);
if (keepAliveRun != null) {
keepAliveRun.cancel(false);
}
if (keepAliveSender != null) {
keepAliveSender.shutdownNow();
}
shouldServiceRun = false;
RPC.stopProxy(nsNamenode);
if (blockScanner != null) {
blockScanner.removeNamespace(this.getNamespaceId());
}
if (data != null) {
data.removeNamespace(this.getNamespaceId());
}
if (storage != null) {
storage.removeNamespaceStorage(this.getNamespaceId());
}
}
/**
* Register one namespace with the corresponding NameNode
* <p>
* The nsDatanode needs to register with the namenode on startup in order
* 1) to report which storage it is serving now and
* 2) to receive a registrationID
*
* issued by the namenode to recognize registered datanodes.
*
* @see FSNamesystem#registerDatanode(DatanodeRegistration)
* @throws IOException
*/
void register() throws IOException {
if (nsRegistration.getStorageID().equals("")) {
nsRegistration.storageID = createNewStorageId(nsRegistration.getPort());
}
while(shouldRun && shouldServiceRun) {
try {
// reset name to machineName. Mainly for web interface.
nsRegistration.setName(machineName + ":" + nsRegistration.getPort());
nsRegistration = nsNamenode.register(nsRegistration,
DataTransferProtocol.DATA_TRANSFER_VERSION);
break;
} catch(RemoteException re) {
String reClass = re.getClassName();
if (UnregisteredDatanodeException.class.getName().equals(reClass) ||
DisallowedDatanodeException.class.getName().equals(reClass) ||
IncorrectVersionException.class.getName().equals(reClass)) {
LOG.warn("DataNode is shutting down: " +
StringUtils.stringifyException(re));
break;
}
} catch(Exception e) { // namenode cannot be contacted
LOG.info("Problem connecting to server: " + nnAddr.toString() +
StringUtils.stringifyException(e));
}
try {
Thread.sleep(1000);
} catch (InterruptedException ie) {}
}
assert ("".equals(storage.getStorageID())
&& !"".equals(nsRegistration.getStorageID()))
|| storage.getStorageID().equals(nsRegistration.getStorageID()) :
"New storageID can be assigned only if data-node is not formatted";
if (storage.getStorageID().equals("")) {
storage.setStorageID(nsRegistration.getStorageID());
storage.writeAll();
LOG.info("New storage id " + nsRegistration.getStorageID()
+ " is assigned to data-node " + nsRegistration.getName());
}
if(! storage.getStorageID().equals(nsRegistration.getStorageID())) {
throw new IOException("Inconsistent storage IDs. Name-node returned "
+ nsRegistration.getStorageID()
+ ". Expecting " + storage.getStorageID());
}
sendBlocksBeingWrittenReport(nsNamenode, namespaceId, nsRegistration);
// random short delay - helps scatter the BR from all DNs
scheduleBlockReport(initialBlockReportDelay);
}
/**
* No matter what kind of exception we get, keep retrying to offerService().
* That's the loop that connects to the NameNode and provides basic DataNode
* functionality.
*
* Only stop when "shouldRun" or "shouldServiceRun" is turned off, which can
* happen either at shutdown or due to refreshNamenodes.
*/
@Override
public void run() {
LOG.info(nsRegistration + "In NSOfferService.run, data = " + data
+ ";ns=" + namespaceId);
try {
// init stuff
try {
// setup storage
setupNS(conf, dataDirs);
register();
KeepAliveHeartbeater keepAliveTask =
new KeepAliveHeartbeater(nsNamenode, nsRegistration, this);
keepAliveSender = Executors.newSingleThreadScheduledExecutor();
keepAliveRun = keepAliveSender.scheduleAtFixedRate(keepAliveTask, 0,
heartBeatInterval,
TimeUnit.MILLISECONDS);
} catch (IOException ioe) {
// Initial handshake, storage recovery or registration failed
// End NSOfferService thread
LOG.info("--------- " + StringUtils.stringifyException(ioe));
LOG.fatal(nsRegistration + " initialization failed for namespaceId "
+ namespaceId, ioe);
return;
}
initialized = true;
while (shouldRun && shouldServiceRun) {
try {
startDistributedUpgradeIfNeeded();
offerService();
} catch (Exception ex) {
LOG.error("Exception: " + StringUtils.stringifyException(ex));
if (shouldRun && shouldServiceRun) {
try {
Thread.sleep(5000);
} catch (InterruptedException ie) {
LOG.warn("Received exception: ", ie);
}
}
}
}
} catch (Throwable ex) {
LOG.warn("Unexpected exception " + StringUtils.stringifyException(ex));
} finally {
LOG.warn(nsRegistration + " ending namespace service for: "
+ namespaceId);
cleanUp();
}
}
private void processDistributedUpgradeCommand(UpgradeCommand comm
) throws IOException {
assert upgradeManager != null : "DataNode.upgradeManager is null.";
upgradeManager.processUpgradeCommand(comm);
}
@Override
public synchronized UpgradeManagerDatanode getUpgradeManager() {
if(upgradeManager == null)
upgradeManager =
new UpgradeManagerDatanode(DataNode.this, namespaceId);
return upgradeManager;
}
/**
* Start distributed upgrade if it should be initiated by the data-node.
*/
private void startDistributedUpgradeIfNeeded() throws IOException {
UpgradeManagerDatanode um = getUpgradeManager();
if(!um.getUpgradeState())
return;
um.setUpgradeState(false, um.getUpgradeVersion());
um.startUpgrade();
return;
}
/** Block synchronization */
@Override
public LocatedBlock syncBlock(
Block block, List<BlockRecord> syncList,
boolean closeFile, List<InterDatanodeProtocol> datanodeProxies,
long deadline
)
throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("block=" + block + ", (length=" + block.getNumBytes()
+ "), syncList=" + syncList + ", closeFile=" + closeFile);
}
//syncList.isEmpty() that all datanodes do not have the block
//so the block can be deleted.
if (syncList.isEmpty()) {
nsNamenode.commitBlockSynchronization(block, 0, 0, closeFile, true,
DatanodeID.EMPTY_ARRAY);
return null;
}
List<DatanodeID> successList = new ArrayList<DatanodeID>();
throwIfAfterTime(deadline);
long generationstamp = -1;
try {
generationstamp = nsNamenode.nextGenerationStamp(block, closeFile);
} catch (RemoteException e) {
if (e.unwrapRemoteException() instanceof BlockAlreadyCommittedException) {
throw new BlockAlreadyCommittedException(e);
} else {
throw e;
}
}
Block newblock = new Block(block.getBlockId(), block.getNumBytes(), generationstamp);
for(BlockRecord r : syncList) {
try {
throwIfAfterTime(deadline);
LOG.info("Updating block " + r + " to " + newblock);
r.datanode.updateBlock(namespaceId, r.info.getBlock(), newblock, closeFile);
successList.add(r.id);
} catch (BlockRecoveryTimeoutException e) {
throw e;
} catch (IOException e) {
InterDatanodeProtocol.LOG.warn("Failed to updateBlock (newblock="
+ newblock + ", datanode=" + r.id + ")", e);
}
}
LOG.info("Updated blocks on syncList for block " + block + " to " + newblock);
stopAllProxies(datanodeProxies);
if (!successList.isEmpty()) {
DatanodeID[] nlist = successList.toArray(new DatanodeID[successList.size()]);
throwIfAfterTime(deadline);
nsNamenode.commitBlockSynchronization(block,
newblock.getGenerationStamp(), newblock.getNumBytes(), closeFile, false,
nlist);
DatanodeInfo[] info = new DatanodeInfo[nlist.length];
for (int i = 0; i < nlist.length; i++) {
info[i] = new DatanodeInfo(nlist[i]);
}
return new LocatedBlock(newblock, info); // success
}
//failed
StringBuilder b = new StringBuilder();
for(BlockRecord r : syncList) {
b.append("\n " + r.id);
}
throw new IOException("Cannot recover " + block + ", none of these "
+ syncList.size() + " datanodes success {" + b + "\n}");
}
@Override
public DatanodeRegistration getNsRegistration() {
return nsRegistration;
}
}
/**
* Manages the NSOfferService objects for the data node.
* Creation, removal, starting, stopping, shutdown on NSOfferService
* objects must be done via APIs in this class.
*/
class NamespaceManager {
private final Map<Integer, NamespaceService> nsMapping =
new HashMap<Integer, NamespaceService>();
protected final Map<InetSocketAddress, NamespaceService> nameNodeThreads =
new HashMap<InetSocketAddress, NamespaceService>();
//This lock is only used for refreshNamenodes method
private final Object refreshNamenodesLock = new Object();
NamespaceManager() {
}
NamespaceManager(Configuration conf, List<InetSocketAddress> nameNodeAddrs)
throws IOException {
Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
Iterator<String> it = nameserviceIds.iterator();
for(InetSocketAddress nnAddr : nameNodeAddrs){
String nameserivceId = it.hasNext()? it.next(): null;
NSOfferService nsos = new NSOfferService(nnAddr, nameserivceId);
nameNodeThreads.put(nsos.getNNSocketAddress(), nsos);
}
}
public boolean initailized() {
for(NamespaceService nsos : nameNodeThreads.values()){
if(!nsos.initialized()){
return false;
}
}
return true;
}
public boolean isAlive(int namespaceId) {
NamespaceService nsos = nsMapping.get(namespaceId);
if(nsos == null){
return false;
}
return nsos.isAlive();
}
synchronized void addNamespace(NamespaceService t) {
if (nameNodeThreads.get(t.getNNSocketAddress()) == null) {
throw new IllegalArgumentException(
"Unknown NSOfferService thread for namenode address:"
+ t.getNNSocketAddress());
}
nsMapping.put(t.getNamespaceId(), t);
}
/**
* Returns the array of NSOfferService objects.
* Caution: The NSOfferService returned could be shutdown any time.
*/
synchronized NamespaceService[] getAllNamenodeThreads() {
NamespaceService[] nsosArray = new NamespaceService[nameNodeThreads.values()
.size()];
return nameNodeThreads.values().toArray(nsosArray);
}
synchronized NamespaceService get(int namespaceId) {
return nsMapping.get(namespaceId);
}
synchronized NamespaceService get(InetSocketAddress nameNodeAddr) {
return nameNodeThreads.get(nameNodeAddr);
}
public synchronized void remove(NamespaceService t) {
nameNodeThreads.remove(t.getNNSocketAddress());
nsMapping.remove(t.getNamespaceId());
}
synchronized Integer[] getAllNamespaces(){
return nsMapping.keySet().toArray(
new Integer[nsMapping.keySet().size()]);
}
void shutDownAll() {
NamespaceService[] nsosArray = this.getAllNamenodeThreads();
for (NamespaceService nsos : nsosArray) {
nsos.stop(); //interrupts the threads
}
//now join
for (NamespaceService nsos : nsosArray) {
nsos.join();
}
}
void startAll() throws IOException {
for (NamespaceService nsos : getAllNamenodeThreads()) {
nsos.start();
}
isAlive = true;
}
void stopAll() {
for (NamespaceService nsos : getAllNamenodeThreads()) {
nsos.stop();
}
}
void joinAll() throws InterruptedException {
for (NamespaceService nsos : getAllNamenodeThreads()) {
nsos.join();
}
}
void refreshNamenodes(List<InetSocketAddress> nameNodeAddrs, Configuration conf)
throws IOException, InterruptedException{
List<InetSocketAddress> toStart = new ArrayList<InetSocketAddress>();
List<NamespaceService> toStop = new ArrayList<NamespaceService>();
Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
List<String> toStartServiceIds = new ArrayList<String>();
synchronized (refreshNamenodesLock) {
synchronized (this) {
for (InetSocketAddress nnAddr : nameNodeThreads.keySet()) {
if (!nameNodeAddrs.contains(nnAddr)){
toStop.add(nameNodeThreads.get(nnAddr));
}
}
Iterator<String> it = nameserviceIds.iterator();
for (InetSocketAddress nnAddr : nameNodeAddrs) {
String nameserviceId = it.hasNext()? it.next(): null;
if (!nameNodeThreads.containsKey(nnAddr)) {
toStart.add(nnAddr);
toStartServiceIds.add(nameserviceId);
}
}
it = toStartServiceIds.iterator();
for (InetSocketAddress nnAddr : toStart) {
NSOfferService nsos = new NSOfferService(nnAddr, it.next());
nameNodeThreads.put(nsos.getNNSocketAddress(), nsos);
}
for (NamespaceService nsos : toStop) {
remove(nsos);
}
}
}
for (NamespaceService nsos : toStop) {
nsos.stop();
}
startAll();
}
}
/* ********************************************************************
Protocol when a client reads data from Datanode (Cur Ver: 9):
Client's Request :
=================
Processed in DataXceiver:
+----------------------------------------------+
| Common Header | 1 byte OP == OP_READ_BLOCK |
+----------------------------------------------+
Processed in readBlock() :
+-------------------------------------------------------------------------+
| 8 byte Block ID | 8 byte genstamp | 8 byte start offset | 8 byte length |
+-------------------------------------------------------------------------+
| vInt length | <DFSClient id> |
+-----------------------------------+
Client sends optional response only at the end of receiving data.
DataNode Response :
===================
In readBlock() :
If there is an error while initializing BlockSender :
+---------------------------+
| 2 byte OP_STATUS_ERROR | and connection will be closed.
+---------------------------+
Otherwise
+---------------------------+
| 2 byte OP_STATUS_SUCCESS |
+---------------------------+
Actual data, sent by BlockSender.sendBlock() :
ChecksumHeader :
+--------------------------------------------------+
| 1 byte CHECKSUM_TYPE | 4 byte BYTES_PER_CHECKSUM |
+--------------------------------------------------+
Followed by actual data in the form of PACKETS:
+------------------------------------+
| Sequence of data PACKETs .... |
+------------------------------------+
A "PACKET" is defined further below.
The client reads data until it receives a packet with
"LastPacketInBlock" set to true or with a zero length. If there is
no checksum error, it replies to DataNode with OP_STATUS_CHECKSUM_OK:
Client optional response at the end of data transmission :
+------------------------------+
| 2 byte OP_STATUS_CHECKSUM_OK |
+------------------------------+
PACKET : Contains a packet header, checksum and data. Amount of data
======== carried is set by BUFFER_SIZE.
+-----------------------------------------------------+
| 4 byte packet length (excluding packet header) |
+-----------------------------------------------------+
| 8 byte offset in the block | 8 byte sequence number |
+-----------------------------------------------------+
| 1 byte boolean set: isLastPacketInBlock | forceSync |
+-----------------------------------------------------+
| 4 byte Length of actual data |
+-----------------------------------------------------+
| x byte checksum data. x is defined below |
+-----------------------------------------------------+
| actual data ...... |
+-----------------------------------------------------+
x = (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM *
CHECKSUM_SIZE
CHECKSUM_SIZE depends on CHECKSUM_TYPE (usually, 4 for CRC32)
The above packet format is used while writing data to DFS also.
Not all the fields might be used while reading.
************************************************************************ */
/** Header size for a packet */
public static int PKT_HEADER_LEN = ( 4 + /* Packet payload length */
8 + /* offset in block */
8 + /* seqno */
1 /* up to 8 boolean values field */);
public static byte isLastPacketInBlockMask = 0x01;
public static byte forceSyncMask = 0x02;
/**
* Used for transferring a block of data. This class
* sends a piece of data to another DataNode.
*/
class DataTransfer implements Callable<Boolean> {
DatanodeInfo targets[];
Block b;
Block destinationBlock;
DataNode datanode;
private int srcNamespaceId;
private int dstNamespaceId;
/**
* Connect to the first item in the target list. Pass along the
* entire target list, the block, and the data.
*/
public DataTransfer(int namespaceId, DatanodeInfo targets[], Block b, DataNode datanode) throws IOException {
// the source and destination blocks are the same for block replication
this(targets, namespaceId, b, namespaceId, b, datanode);
}
/**
* Connect to the first item in the target list. Pass along the
* entire target list, the block, and the data.
*/
public DataTransfer(DatanodeInfo targets[], int srcNamespaceId, Block b,
int dstNamespaceId, Block destinationBlock,
DataNode datanode) throws IOException {
this.targets = targets;
this.b = b;
this.destinationBlock = destinationBlock;
this.datanode = datanode;
this.srcNamespaceId = srcNamespaceId;
this.dstNamespaceId = dstNamespaceId;
}
/**
* Do the deed, write the bytes
*/
public Boolean call() throws Exception {
xmitsInProgress.getAndIncrement();
Socket sock = null;
DataOutputStream out = null;
BlockSender blockSender = null;
try {
InetSocketAddress curTarget =
NetUtils.createSocketAddr(targets[0].getName());
sock = newSocket();
NetUtils.connect(sock, curTarget, socketTimeout);
sock.setSoTimeout(targets.length * socketTimeout);
long writeTimeout = socketWriteTimeout + socketWriteExtentionTimeout
* (targets.length - 1);
OutputStream baseStream = NetUtils.getOutputStream(sock, writeTimeout);
out = new DataOutputStream(new BufferedOutputStream(baseStream,
SMALL_BUFFER_SIZE));
blockSender = new BlockSender(srcNamespaceId, b, 0, b.getNumBytes(),
false, false, false, datanode);
DatanodeInfo srcNode = new DatanodeInfo(getDNRegistrationForNS(srcNamespaceId));
//
// Header info
//
WriteBlockHeader header = new WriteBlockHeader(
DataTransferProtocol.DATA_TRANSFER_VERSION, dstNamespaceId,
destinationBlock.getBlockId(),
destinationBlock.getGenerationStamp(), 0, false, true, srcNode,
targets.length - 1, targets, "");
header.writeVersionAndOpCode(out);
header.write(out);
// send data & checksum
blockSender.sendBlock(out, baseStream, null);
// no response necessary
LOG.info(getDatanodeInfo() + ":Transmitted block " + b + " at " + srcNamespaceId + " to " + curTarget);
} catch (IOException ie) {
LOG.warn(getDatanodeInfo() + ":Failed to transfer " + b + " at " + srcNamespaceId + " to " + targets[0].getName()
+ " got " + StringUtils.stringifyException(ie));
// check if there are any disk problem
try{
datanode.checkDiskError();
} catch (IOException e) {
LOG.warn("Error when checking disks : " + StringUtils.stringifyException(e));
throw e;
}
throw ie;
} finally {
xmitsInProgress.getAndDecrement();
IOUtils.closeStream(blockSender);
IOUtils.closeStream(out);
IOUtils.closeSocket(sock);
}
return true;
}
}
/**
* Initializes the {@link #data}. The initialization is done only once, when
* handshake with the the first namenode is completed.
*/
private synchronized void initFsDataSet(Configuration conf,
AbstractList<File> dataDirs, int numNamespaces) throws IOException {
if (data != null) { // Already initialized
return;
}
// get version and id info from the name-node
boolean simulatedFSDataset =
conf.getBoolean("dfs.datanode.simulateddatastorage", false);
if (simulatedFSDataset) {
storage.createStorageID(selfAddr.getPort());
// it would have been better to pass storage as a parameter to
// constructor below - need to augment ReflectionUtils used below.
conf.set("dfs.datanode.StorageId", storage.getStorageID());
try {
data = (FSDatasetInterface) ReflectionUtils.newInstance(
Class.forName(
"org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset"),
conf);
} catch (ClassNotFoundException e) {
throw new IOException(StringUtils.stringifyException(e));
}
} else {
data = new FSDataset(this, conf, numNamespaces);
}
}
public static class KeepAliveHeartbeater implements Runnable {
private DatanodeProtocol namenode;
private DatanodeRegistration dnRegistration;
private NamespaceService ns;
public KeepAliveHeartbeater(DatanodeProtocol namenode,
DatanodeRegistration dnRegistration,
NamespaceService ns) {
this.namenode = namenode;
this.dnRegistration = dnRegistration;
this.ns = ns;
}
public void run() {
try {
namenode.keepAlive(dnRegistration);
ns.lastBeingAlive = now();
LOG.debug("Sent heartbeat at " + ns.lastBeingAlive);
} catch (Throwable ex) {
LOG.error("Error sending keepAlive to the namenode", ex);
}
}
}
/** Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
public void runDatanodeDaemon() throws IOException {
namespaceManager.startAll();
// start dataXceiveServer
dataXceiverServer.start();
ipcServer.start();
}
public static boolean isDatanodeUp(DataNode dn) {
return dn.isDatanodeUp();
}
/**
* @return true if any namespace thread is alive
*/
public boolean isDatanodeUp() {
for (NamespaceService nsos: namespaceManager.getAllNamenodeThreads()) {
if (nsos != null && nsos.isAlive()) {
return true;
}
}
return false;
}
/**
* @return true if any namespace thread has heartbeat with namenode recently
*/
public boolean isDataNodeBeingAlive() {
for (NamespaceService nsos: namespaceManager.getAllNamenodeThreads()) {
if (nsos != null &&
nsos.lastBeingAlive >= now() - heartbeatExpireInterval) {
return true;
}
}
return false;
}
/**
* @return true - if the data node is initialized
*/
public boolean isInitialized() {
for (NamespaceService nsos : namespaceManager.getAllNamenodeThreads()) {
if (!nsos.initialized() || !nsos.isAlive()) {
return false;
}
}
return true;
}
/**
* @param namenode addr
* @return true if the NSOfferService thread for given namespaceID is initialized
* @throws IOException when the NSOfferService is dead
*/
public synchronized boolean initialized(InetSocketAddress nameNodeAddr) throws IOException{
NamespaceService nsos = namespaceManager.get(nameNodeAddr);
if (nsos == null) {
throw new IOException("NSOfferService for namenode " +
nameNodeAddr.getAddress() + " is dead.");
}
return nsos.initialized();
}
/** Instantiate a single datanode object. This must be run by invoking
* {@link DataNode#runDatanodeDaemon(DataNode)} subsequently.
*/
public static DataNode instantiateDataNode(String args[],
Configuration conf) throws IOException {
if (conf == null)
conf = new Configuration();
if (!parseArguments(args, conf)) {
printUsage();
return null;
}
if (conf.get("dfs.network.script") != null) {
LOG.error("This configuration for rack identification is not supported" +
" anymore. RackID resolution is handled by the NameNode.");
System.exit(-1);
}
String[] dataDirs = conf.getStrings("dfs.data.dir");
dnThreadName = "DataNode: [" +
StringUtils.arrayToString(dataDirs) + "]";
return makeInstance(dataDirs, conf);
}
/** Instantiate & Start a single datanode daemon and wait for it to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
public static DataNode createDataNode(String args[], Configuration conf)
throws IOException {
DataNode dn = instantiateDataNode(args, conf);
if (dn != null) {
dn.runDatanodeDaemon();
}
return dn;
}
void join() {
while (shouldRun) {
try {
namespaceManager.joinAll();
NamespaceService[] namespaceServices = namespaceManager.getAllNamenodeThreads();
if (namespaceServices == null || (namespaceServices != null
&& namespaceServices.length == 0)) {
shouldRun = false;
isAlive = false;
}
Thread.sleep(2000);
} catch (InterruptedException ex) {
LOG.warn("Received exception in Datanode#join: " + ex);
}
}
}
/**
* Make an instance of DataNode after ensuring that at least one of the
* given data directories (and their parent directories, if necessary)
* can be created.
* @param dataDirs List of directories, where the new DataNode instance should
* keep its files.
* @param conf Configuration instance to use.
* @return DataNode instance for given list of data dirs and conf, or null if
* no directory from this directory list can be created.
* @throws IOException
*/
public static DataNode makeInstance(String[] dataDirs, Configuration conf)
throws IOException {
ArrayList<File> dirs = new ArrayList<File>();
for (int i = 0; i < dataDirs.length; i++) {
File data = new File(dataDirs[i]);
try {
DiskChecker.checkDir(data);
dirs.add(data);
} catch(DiskErrorException e) {
LOG.warn("Invalid directory in dfs.data.dir: " + e.getMessage());
}
}
if (dirs.size() > 0)
return new DataNode(conf, dirs);
LOG.error("All directories in dfs.data.dir are invalid.");
return null;
}
@Override
public String toString() {
return "DataNode{" +
"data=" + data +
", localName='" + getDatanodeInfo() + "'" +
", xmitsInProgress=" + xmitsInProgress.get() +
"}";
}
private static void printUsage() {
System.err.println("Usage: java DataNode");
System.err.println(" [-rollback]");
}
/**
* Parse and verify command line arguments and set configuration parameters.
*
* @return false if passed argements are incorrect
*/
private static boolean parseArguments(String args[],
Configuration conf) {
int argsLen = (args == null) ? 0 : args.length;
StartupOption startOpt = StartupOption.REGULAR;
for(int i=0; i < argsLen; i++) {
String cmd = args[i];
if ("-r".equalsIgnoreCase(cmd) || "--rack".equalsIgnoreCase(cmd)) {
LOG.error("-r, --rack arguments are not supported anymore. RackID " +
"resolution is handled by the NameNode.");
System.exit(-1);
} else if ("-rollback".equalsIgnoreCase(cmd)) {
startOpt = StartupOption.ROLLBACK;
} else if ("-regular".equalsIgnoreCase(cmd)) {
startOpt = StartupOption.REGULAR;
} else if ("-d".equalsIgnoreCase(cmd)) {
++i;
if(i >= argsLen) {
LOG.error("-D option requires following argument.");
System.exit(-1);
}
String[] keyval = args[i].split("=", 2);
if (keyval.length == 2) {
conf.set(keyval[0], keyval[1]);
} else {
LOG.error("-D option invalid (expected =): " + args[i]);
System.exit(-1);
}
} else
return false;
}
setStartupOption(conf, startOpt);
return true;
}
private static void setStartupOption(Configuration conf, StartupOption opt) {
conf.set("dfs.datanode.startup", opt.toString());
}
static StartupOption getStartupOption(Configuration conf) {
return StartupOption.valueOf(conf.get("dfs.datanode.startup",
StartupOption.REGULAR.toString()));
}
/**
* This method is used for testing.
* Examples are adding and deleting blocks directly.
* The most common usage will be when the data node's storage is similated.
*
* @return the fsdataset that stores the blocks
*/
public FSDatasetInterface getFSDataset() {
return data;
}
/** Wait for the datanode to exit and clean up all its resources */
public void waitAndShutdown() {
join();
// make sure all other threads have exited even if
// offerservice thread died abnormally
shutdown();
}
/**
*/
public static void main(String args[]) {
try {
StringUtils.startupShutdownMessage(DataNode.class, args, LOG);
DataNode datanode = createDataNode(args, null);
if (datanode != null) {
datanode.waitAndShutdown();
}
} catch (Throwable e) {
LOG.error(StringUtils.stringifyException(e));
System.exit(-1);
}
}
private void transferBlock(int namespaceId, Block block,
DatanodeInfo xferTargets[]) throws IOException {
DatanodeProtocol nn = getNSNamenode(namespaceId);
DatanodeRegistration nsReg = getDNRegistrationForNS(namespaceId);
if (!data.isValidBlock(namespaceId, block, true)) {
// block does not exist or is under-construction
String errStr = "Can't send invalid block " + block;
LOG.info(errStr);
nn.errorReport(nsReg, DatanodeProtocol.INVALID_BLOCK, errStr);
return;
}
// Check if NN recorded length matches on-disk length
long onDiskLength = data.getFinalizedBlockLength(namespaceId, block);
if (block.getNumBytes() > onDiskLength) {
// Shorter on-disk len indicates corruption so report NN the corrupt block
nn.reportBadBlocks(new LocatedBlock[] { new LocatedBlock(block,
new DatanodeInfo[] { new DatanodeInfo(nsReg) }) });
LOG.info("Can't replicate block " + block + " because on-disk length "
+ onDiskLength + " is shorter than NameNode recorded length "
+ block.getNumBytes());
return;
}
int numTargets = xferTargets.length;
if (numTargets > 0) {
if (LOG.isInfoEnabled()) {
StringBuilder xfersBuilder = new StringBuilder();
for (int i = 0; i < numTargets; i++) {
xfersBuilder.append(xferTargets[i].getName());
xfersBuilder.append(" ");
}
LOG.info(nsReg + " Starting thread to transfer block " + block + " to "
+ xfersBuilder);
}
blockCopyExecutor.submit(new DataTransfer(namespaceId, xferTargets, block, this));
}
}
void transferBlocks(int namespaceId, Block blocks[],
DatanodeInfo xferTargets[][]) {
for (int i = 0; i < blocks.length; i++) {
try {
transferBlock(namespaceId, blocks[i], xferTargets[i]);
} catch (IOException ie) {
LOG.warn("Failed to transfer block " + blocks[i], ie);
}
}
}
protected void notifyNamenodeReceivedBlock(int namespaceId, Block block,
String delHint) throws IOException {
if (block == null) {
throw new IllegalArgumentException("Block is null");
}
NamespaceService nsos = namespaceManager.get(namespaceId);
if (nsos == null || nsos.getDatanodeProtocol() == null) {
throw new IOException("Cannot locate OfferService thread for namespace="
+ namespaceId);
}
nsos.notifyNamenodeReceivedBlock(block, delHint);
}
protected void notifyNamenodeDeletedBlock(int namespaceId, Block block)
throws IOException {
if (block == null) {
throw new IllegalArgumentException("Block is null");
}
NamespaceService nsos = namespaceManager.get(namespaceId);
if (nsos == null || nsos.getDatanodeProtocol() == null) {
throw new IOException("Cannot locate OfferService thread for namespace="
+ namespaceId);
}
nsos.notifyNamenodeDeletedBlock(block);
}
// InterDataNodeProtocol implementation
// THIS METHOD IS ONLY USED FOR UNIT TESTS
/** {@inheritDoc} */
public BlockMetaDataInfo getBlockMetaDataInfo(int namespaceId, Block block
) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("block=" + block);
}
Block stored = data.getStoredBlock(namespaceId, block.getBlockId());
if (stored == null) {
return null;
}
BlockMetaDataInfo info = new BlockMetaDataInfo(stored,
blockScanner.getLastScanTime(namespaceId, stored));
if (LOG.isDebugEnabled()) {
LOG.debug("getBlockMetaDataInfo successful block=" + stored +
" length " + stored.getNumBytes() +
" genstamp " + stored.getGenerationStamp());
}
// paranoia! verify that the contents of the stored block
// matches the block file on disk.
data.validateBlockMetadata(namespaceId, stored);
return info;
}
@Override
public BlockRecoveryInfo startBlockRecovery(int namespaceId, Block block) throws IOException {
return data.startBlockRecovery(namespaceId, block.getBlockId());
}
public Daemon recoverBlocks(final int namespaceId, final Block[] blocks, final DatanodeInfo[][] targets) {
Daemon d = new Daemon(threadGroup, new Runnable() {
/** Recover a list of blocks. It is run by the primary datanode. */
public void run() {
for(int i = 0; i < blocks.length; i++) {
try {
logRecoverBlock("NameNode", namespaceId, blocks[i], targets[i]);
recoverBlock(namespaceId, blocks[i], false, targets[i], true, 0);
} catch (IOException e) {
LOG.warn("recoverBlocks FAILED, blocks[" + i + "]=" + blocks[i], e);
}
}
}
});
d.start();
return d;
}
/** {@inheritDoc} */
public void updateBlock(int namespaceId, Block oldblock, Block newblock, boolean finalize) throws IOException {
LOG.info("namespaceId: " + namespaceId
+ ", oldblock=" + oldblock + "(length=" + oldblock.getNumBytes()
+ "), newblock=" + newblock + "(length=" + newblock.getNumBytes()
+ "), datanode=" + getDatanodeInfo());
data.updateBlock(namespaceId, oldblock, newblock);
if (finalize) {
data.finalizeBlockIfNeeded(namespaceId, newblock);
myMetrics.blocksWritten.inc();
notifyNamenodeReceivedBlock(namespaceId, newblock, null);
LOG.info("Received block " + newblock +
" of size " + newblock.getNumBytes() +
" as part of lease recovery.");
}
}
/** {@inheritDoc} */
public long getProtocolVersion(String protocol, long clientVersion
) throws IOException {
if (protocol.equals(InterDatanodeProtocol.class.getName())) {
return InterDatanodeProtocol.versionID;
} else if (protocol.equals(ClientDatanodeProtocol.class.getName())) {
checkVersion(protocol, clientVersion, ClientDatanodeProtocol.versionID);
return ClientDatanodeProtocol.versionID;
}
throw new IOException("Unknown protocol to " + getClass().getSimpleName()
+ ": " + protocol);
}
/** {@inheritDoc} */
public BlockPathInfo getBlockPathInfo(Block block) throws IOException {
return getBlockPathInfo(getAllNamespaces()[0], block);
}
@Override
public BlockPathInfo getBlockPathInfo(int namespaceId, Block block) throws IOException {
File datafile = data.getBlockFile(namespaceId, block);
File metafile = FSDataset.getMetaFile(datafile, block);
BlockPathInfo info = new BlockPathInfo(block, datafile.getAbsolutePath(),
metafile.getAbsolutePath());
if (LOG.isDebugEnabled()) {
LOG.debug("getBlockPathInfo successful block=" + block +
" blockfile " + datafile.getAbsolutePath() +
" metafile " + metafile.getAbsolutePath());
}
return info;
}
public ProtocolSignature getProtocolSignature(String protocol,
long clientVersion, int clientMethodsHash) throws IOException {
return ProtocolSignature.getProtocolSignature(
this, protocol, clientVersion, clientMethodsHash);
}
private void checkVersion(String protocol, long clientVersion,
long serverVersion) throws IOException {
if (serverVersion > clientVersion &&
!ProtocolCompatible.isCompatibleClientDatanodeProtocol(
clientVersion, serverVersion)) {
throw new RPC.VersionIncompatible(protocol, clientVersion, serverVersion);
}
}
/** A convenient class used in lease recovery */
static class BlockRecord {
final DatanodeID id;
final InterDatanodeProtocol datanode;
final BlockRecoveryInfo info;
BlockRecord(DatanodeID id, InterDatanodeProtocol datanode,
BlockRecoveryInfo info) {
this.id = id;
this.datanode = datanode;
this.info = info;
}
/** {@inheritDoc} */
public String toString() {
return "BlockRecord(info=" + info + " node=" + id + ")";
}
}
static public class BlockRecoveryTimeoutException extends IOException {
/**
*
*/
private static final long serialVersionUID = 7887035511587861524L;
public BlockRecoveryTimeoutException (String msg) {
super (msg);
}
}
static public void throwIfAfterTime(long timeoutTime) throws IOException {
if (timeoutTime > 0 && System.currentTimeMillis() > timeoutTime) {
throw new BlockRecoveryTimeoutException("The client have timed out.");
}
}
/** Recover a block
* @param keepLength if true, will only recover replicas that have the same length
* as the block passed in. Otherwise, will calculate the minimum length of the
* replicas and truncate the rest to that length.
**/
private LocatedBlock recoverBlock(int namespaceId, Block block, boolean keepLength,
DatanodeID[] datanodeids, boolean closeFile, long deadline) throws IOException {
// If the block is already being recovered, then skip recovering it.
// This can happen if the namenode and client start recovering the same
// file at the same time.
synchronized (ongoingRecovery) {
Block tmp = new Block();
tmp.set(block.getBlockId(), block.getNumBytes(), GenerationStamp.WILDCARD_STAMP);
if (ongoingRecovery.get(tmp) != null) {
String msg = "Block " + block + " is already being recovered, " +
" ignoring this request to recover it.";
LOG.info(msg);
throw new IOException(msg);
}
ongoingRecovery.put(block, block);
}
try {
int errorCount = 0;
// Number of "replicasBeingWritten" in 0.21 parlance - these are replicas
// on DNs that are still alive from when the write was happening
int rbwCount = 0;
// Number of "replicasWaitingRecovery" in 0.21 parlance - these replicas
// have survived a DN restart, and thus might be truncated (eg if the
// DN died because of a machine power failure, and when the ext3 journal
// replayed, it truncated the file
int rwrCount = 0;
List<BlockRecord> blockRecords = new ArrayList<BlockRecord>();
List<InterDatanodeProtocol> datanodeProxies =
new ArrayList<InterDatanodeProtocol>();
//check generation stamps
for(DatanodeID id : datanodeids) {
try {
InterDatanodeProtocol datanode;
if (getDNRegistrationForNS(namespaceId).equals(id)) {
LOG.info("Skipping IDNPP creation for local id " + id
+ " when recovering " + block);
datanode = this;
} else {
LOG.info("Creating IDNPP for non-local id " + id + " (dnReg="
+ getDNRegistrationForNS(namespaceId) + ") when recovering "
+ block);
datanode = DataNode.createInterDataNodeProtocolProxy(
id, getConf(), socketTimeout);
datanodeProxies.add(datanode);
}
throwIfAfterTime(deadline);
BlockRecoveryInfo info = datanode.startBlockRecovery(namespaceId, block);
if (info == null) {
LOG.info("No block metadata found for block " + block + " on datanode "
+ id);
continue;
}
if (info.getBlock().getGenerationStamp() < block.getGenerationStamp()) {
LOG.info("Only old generation stamp " + info.getBlock().getGenerationStamp()
+ " found on datanode " + id + " (needed block=" +
block + ")");
continue;
}
blockRecords.add(new BlockRecord(id, datanode, info));
if (info.wasRecoveredOnStartup()) {
rwrCount++;
} else {
rbwCount++;
}
} catch (BlockRecoveryTimeoutException e) {
throw e;
} catch (IOException e) {
++errorCount;
InterDatanodeProtocol.LOG.warn(
"Failed to getBlockMetaDataInfo for block (=" + block
+ ") from datanode (=" + id + ")", e);
}
}
// If we *only* have replicas from post-DN-restart, then we should
// include them in determining length. Otherwise they might cause us
// to truncate too short.
boolean shouldRecoverRwrs = (rbwCount == 0);
List<BlockRecord> syncList = new ArrayList<BlockRecord>();
long minlength = Long.MAX_VALUE;
for (BlockRecord record : blockRecords) {
BlockRecoveryInfo info = record.info;
assert (info != null && info.getBlock().getGenerationStamp() >= block.getGenerationStamp());
if (!shouldRecoverRwrs && info.wasRecoveredOnStartup()) {
LOG.info("Not recovering replica " + record + " since it was recovered on "
+ "startup and we have better replicas");
continue;
}
if (keepLength) {
if (info.getBlock().getNumBytes() == block.getNumBytes()) {
syncList.add(record);
}
} else {
syncList.add(record);
if (info.getBlock().getNumBytes() < minlength) {
minlength = info.getBlock().getNumBytes();
}
}
}
if (syncList.isEmpty() && errorCount > 0) {
stopAllProxies(datanodeProxies);
throw new IOException("All datanodes failed: block=" + block
+ ", datanodeids=" + Arrays.asList(datanodeids));
}
if (!keepLength) {
block.setNumBytes(minlength);
}
return syncBlock(namespaceId, block, syncList, closeFile,
datanodeProxies, deadline);
} finally {
synchronized (ongoingRecovery) {
ongoingRecovery.remove(block);
}
}
}
protected void stopAllProxies(List<InterDatanodeProtocol> datanodeProxies) {
// safe to stop proxies now
for (InterDatanodeProtocol proxy : datanodeProxies) {
stopDatanodeProxy(proxy);
}
}
private void stopDatanodeProxy(InterDatanodeProtocol datanode) {
// if this is a proxy instance, close it
if (Proxy.isProxyClass(datanode.getClass())) {
RPC.stopProxy(datanode);
}
}
/** Block synchronization */
private LocatedBlock syncBlock(int namespaceId, Block block,
List<BlockRecord> syncList, boolean closeFile,
List<InterDatanodeProtocol> datanodeProxies, long deadline)
throws IOException {
return namespaceManager.get(namespaceId).syncBlock(block, syncList,
closeFile, datanodeProxies, deadline);
}
// ClientDataNodeProtocol implementation
/** {@inheritDoc} */
public LocatedBlock recoverBlock(Block block, boolean keepLength,
DatanodeInfo[] targets) throws IOException {
// old client: use default namespace
return recoverBlock(getAllNamespaces()[0], block, keepLength, targets);
}
@Override
public LocatedBlock recoverBlock(int namespaceId, Block block,
boolean keepLength, DatanodeInfo[] targets, long deadline)
throws IOException {
logRecoverBlock("Client", namespaceId, block, targets);
return recoverBlock(namespaceId, block, keepLength, targets, false,
deadline);
}
@Override
public LocatedBlock recoverBlock(int namespaceId, Block block,
boolean keepLength, DatanodeInfo[] targets) throws IOException {
logRecoverBlock("Client", namespaceId, block, targets);
return recoverBlock(namespaceId, block, keepLength, targets, false, 0);
}
/** {@inheritDoc} */
public Block getBlockInfo(Block block) throws IOException {
return getBlockInfo(DataNode.PKT_HEADER_LEN, block);
}
@Override
public Block getBlockInfo(int namespaceId, Block block) throws IOException {
Block stored = data.getStoredBlock(namespaceId, block.getBlockId());
return stored;
}
@Override
public void copyBlockLocal(String srcFileSystem,
int srcNamespaceId, Block srcBlock,
int dstNamespaceId, Block dstBlock, String srcBlockFilePath)
throws IOException {
File srcBlockFile = new File(srcBlockFilePath);
if (!srcBlockFile.exists()) {
throw new FileNotFoundException("File " + srcBlockFilePath
+ " could not be found");
}
blockCopyExecutor.submit(new LocalBlockCopy(srcFileSystem,
srcNamespaceId, srcBlock,
dstNamespaceId, dstBlock, true, srcBlockFile));
}
@Override
public void copyBlock(Block srcBlock, Block destinationBlock,
DatanodeInfo target) throws IOException {
copyBlock(srcBlock, destinationBlock, target, true);
}
@Override
public void copyBlock(Block srcBlock, Block destinationBlock,
DatanodeInfo target, boolean async) throws IOException {
throw new IOException(
"Please upgrade your fastcopy tool to work with federated " +
"HDFS clusters.");
}
@Override
public void copyBlock(int srcNamespaceId, Block srcBlock, int dstNamespaceId,
Block destinationBlock, DatanodeInfo target)
throws IOException {
copyBlock(srcNamespaceId, srcBlock,
dstNamespaceId, destinationBlock, target, true);
}
@Override
public void copyBlock(int srcNamespaceId, Block srcBlock, int dstNamespaceId,
Block destinationBlock, DatanodeInfo target, boolean async)
throws IOException {
if (!data.isValidBlock(srcNamespaceId, srcBlock, true)) {
// block does not exist or is under-construction
String errStr = "copyBlock: Can't send invalid block " + srcBlock
+ " at " + srcNamespaceId;
LOG.info(errStr);
throw new IOException(errStr);
}
// Check if specified length matches on-disk length
long onDiskLength = data.getFinalizedBlockLength(srcNamespaceId, srcBlock);
if (srcBlock.getNumBytes() > onDiskLength) {
// Shorter on-disk len indicates corruption so report NN the corrupt block
String msg = "copyBlock: Can't replicate block " + srcBlock
+ " at " + srcNamespaceId
+ " because on-disk length " + onDiskLength
+ " is shorter than provided length " + srcBlock.getNumBytes();
LOG.info(msg);
throw new IOException(msg);
}
LOG.info(getDatanodeInfo() + " copyBlock: Starting thread to transfer: " +
"srcNamespaceId: " + srcNamespaceId + " block: " +
srcBlock + " to " + target.getName());
DatanodeInfo[] targets = new DatanodeInfo[1];
targets[0] = target;
// Use IP Address and port number to determine locality. Relying on the
// DatanodeID of both the target machine and the local machine to
// determine locality. This guarantees uniformity in comparison.
String targetMachine = target.getHost();
int targetPort = target.getPort();
DatanodeRegistration dnRegistration = getDNRegistrationForNS(srcNamespaceId);
int localPort = dnRegistration.getPort();
String localMachine = dnRegistration.getHost();
Future<Boolean> result;
// If the target datanode is our datanode itself, then perform local copy.
if (targetMachine.equals(localMachine) && targetPort == localPort) {
LOG.info("Performing local block copy since source and "
+ "destination datanodes are same for block "
+ srcBlock.getBlockName());
result = blockCopyExecutor.submit(new LocalBlockCopy(srcNamespaceId,
srcBlock, dstNamespaceId, destinationBlock));
} else if (targetMachine.equals(localMachine)) {
LOG.info("Performing cross datanode local block copy since source " +
"and destination hosts are same for block "
+ srcBlock.getBlockName());
result = blockCopyExecutor.submit(new CrossDatanodeLocalBlockCopy(
srcNamespaceId, srcBlock, dstNamespaceId, destinationBlock, target));
} else {
result = blockCopyExecutor.submit(new DataTransfer(targets, srcNamespaceId, srcBlock,
dstNamespaceId, destinationBlock, this));
}
// If this is not an async request, wait for the task to complete, if the
// task fails this will throw an exception and will be propogated to the
// client.
if (!async) {
try {
// Wait for 5 minutes.
result.get(this.blockCopyRPCWaitTime, TimeUnit.SECONDS);
} catch (Exception e) {
LOG.error(e);
throw new IOException(e);
}
}
}
private static void logRecoverBlock(String who, int namespaceId,
Block block, DatanodeID[] targets) {
StringBuilder msg = new StringBuilder(targets[0].getName());
for (int i = 1; i < targets.length; i++) {
msg.append(", " + targets[i].getName());
}
LOG.info(who + " calls recoverBlock(namespace_id =" + namespaceId +
" block=" + block
+ ", targets=[" + msg + "])");
}
class CrossDatanodeLocalBlockCopy implements Callable<Boolean> {
private final int srcNamespaceId;
private final Block srcBlock;
private final int dstNamespaceId;
private Block dstBlock;
private final DatanodeInfo target;
private final String srcFileSystem;
public CrossDatanodeLocalBlockCopy(int srcNamespaceId, Block srcBlock,
int dstNamespaceId, Block dstBlock,
DatanodeInfo target) throws IOException {
this.srcNamespaceId = srcNamespaceId;
this.srcBlock = srcBlock;
this.dstNamespaceId = dstNamespaceId;
this.dstBlock = dstBlock;
this.target = target;
this.srcFileSystem = data.getFileSystemForBlock(srcNamespaceId, srcBlock);
}
public Boolean call() throws Exception {
InterDatanodeProtocol remoteDatanode = null;
try {
File srcBlockFile = data.getBlockFile(srcNamespaceId, srcBlock);
remoteDatanode = DataNode
.createInterDataNodeProtocolProxy(target, getConf(), socketTimeout);
remoteDatanode.copyBlockLocal(srcFileSystem, srcNamespaceId, srcBlock,
dstNamespaceId, dstBlock,
srcBlockFile.getAbsolutePath());
} catch (IOException e) {
LOG.warn("Cross datanode local block copy failed", e);
throw e;
} finally {
if (remoteDatanode != null) {
stopDatanodeProxy(remoteDatanode);
}
}
return true;
}
}
class LocalBlockCopy implements Callable<Boolean> {
private final Block srcBlock;
private final Block dstBlock;
private final int srcNamespaceId;
private final int dstNamespaceId;
// Whether or not this copy is a copy across two datanodes on the same host.
private final boolean crossDatanode;
private final File srcBlockFile;
private final String srcFileSystem;
public LocalBlockCopy(int srcNamespaceId, Block srcBlock,
int dstNamespaceId, Block dstBlock) throws IOException {
this(null, srcNamespaceId, srcBlock, dstNamespaceId, dstBlock, false, null);
}
public LocalBlockCopy(String srcFileSystem,
int srcNamespaceId, Block srcBlock,
int dstNamespaceId, Block dstBlock,
boolean crossDatanode, File srcBlockFile) throws IOException {
this.srcBlock = srcBlock;
this.dstBlock = dstBlock;
this.srcNamespaceId = srcNamespaceId;
this.dstNamespaceId = dstNamespaceId;
this.crossDatanode = crossDatanode;
this.srcBlockFile = srcBlockFile;
this.srcFileSystem = (srcFileSystem != null) ? srcFileSystem :
data.getFileSystemForBlock(srcNamespaceId, srcBlock);
}
public Boolean call() throws Exception {
try {
if (crossDatanode) {
data.copyBlockLocal(srcFileSystem, srcBlockFile,
srcNamespaceId, srcBlock, dstNamespaceId, dstBlock);
} else {
data.copyBlockLocal(srcFileSystem,
data.getBlockFile(srcNamespaceId, srcBlock),
srcNamespaceId, srcBlock, dstNamespaceId, dstBlock);
}
dstBlock.setNumBytes(srcBlock.getNumBytes());
notifyNamenodeReceivedBlock(dstNamespaceId, dstBlock, null);
blockScanner.addBlock(dstNamespaceId, dstBlock);
} catch (Exception e) {
LOG.warn("Local block copy for src : " + srcBlock.getBlockName()
+ ", dst : " + dstBlock.getBlockName() + " failed", e);
throw e;
}
return true;
}
}
public void reportBadBlocks(int namespaceId, LocatedBlock[] blocks)
throws IOException {
NamespaceService nsos = namespaceManager.get(namespaceId);
if(nsos == null) {
throw new IOException("cannot locate OfferService thread for namespace=" + namespaceId);
}
nsos.reportBadBlocks(blocks);
}
public UpgradeManagerDatanode getUpgradeManager(int namespaceId) {
NamespaceService nsos = namespaceManager.get(namespaceId);
return nsos == null ? null : nsos.getUpgradeManager();
}
public void completeUpgrade() throws IOException{
for(int namespaceId : namespaceManager.getAllNamespaces()){
UpgradeManagerDatanode manager = namespaceManager.get(namespaceId).getUpgradeManager();
manager.completeUpgrade();
}
}
/**
* See {@link DataBlockScanner}
*/
private synchronized void initDataBlockScanner(Configuration conf) {
if (blockScanner != null) {
return;
}
//initialize periodic block scanner
String reason = null;
if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) {
reason = "verification is turned off by configuration";
} else if ( !(data instanceof FSDataset) ) {
reason = "verifcation is supported only with FSDataset";
}
if ( reason == null ) {
blockScanner = new DataBlockScannerSet(this, (FSDataset)data, conf);
} else {
LOG.info("Periodic Block Verification is disabled because " +
reason + ".");
}
}
/**
* Get host:port with host set to Datanode host and port set to the
* port {@link DataXceiver} is serving.
* @return host:port string
*/
public String getMachineName() {
return machineName + ":" + selfAddr.getPort();
}
public long getCTime(int namespaceId) {
return storage.getNStorage(namespaceId).getCTime();
}
public String getStorageID() {
return storage.getStorageID();
}
/**
* Get DataNode info - used primarily for logging
*/
public String getDatanodeInfo() {
return machineName + ":" + selfAddr.getPort()
+ "; storageID= " + storage.getStorageID();
}
/**
* Return true if the given namespace is alive.
* @param namespaceId
* @return true if the namespace is alive, false otherwise
*/
public boolean isNamespaceAlive(int namespaceId) {
return namespaceManager.isAlive(namespaceId);
}
/**
* Return true if the given namespace is alive.
* @param addr
* @return true if the namespace is alive, false otherwise
*/
public boolean isNamespaceAlive(InetSocketAddress addr) {
return namespaceManager.get(addr).isAlive();
}
public Integer[] getAllNamespaces(){
return namespaceManager.getAllNamespaces();
}
public NamespaceService[] getAllNamespaceServices() {
return namespaceManager.getAllNamenodeThreads();
}
/**
* This method makes data node to send block report
*/
public void scheduleNSBlockReport(long delay) {
for (NamespaceService nsos : namespaceManager.getAllNamenodeThreads()) {
nsos.scheduleBlockReport(delay);
}
}
/**
* This method makes data node to send blockReceivedAndDelete report
*/
public void scheduleNSBlockReceivedAndDeleted(long delay) {
for (NamespaceService nsos : namespaceManager.getAllNamenodeThreads()) {
nsos.scheduleBlockReceivedAndDeleted(delay);
}
}
public void refreshNamenodes(Configuration conf) throws IOException {
LOG.info("refresh namenodes");
try {
List<InetSocketAddress> nameNodeAddrs = DFSUtil.getNNServiceRpcAddresses(conf);
namespaceManager.refreshNamenodes(nameNodeAddrs, conf);
} catch (InterruptedException e) {
throw new IOException(e.getCause());
}
}
//ClientDataNodeProtocol implementation
/* {@inheritDoc} */
/**
* This method refreshes all name nodes served by the datanode
*/
public void refreshNamenodes() throws IOException {
conf = new Configuration();
refreshNamenodes(conf);
}
/**
* {@inheritDoc}
*/
@Override
public void reconfigurePropertyImpl(String property, String newVal)
throws ReconfigurationException {
if (property.equals("dfs.data.dir")) {
try {
LOG.info("Reconfigure " + property + " to " + newVal);
this.refreshVolumes(newVal);
} catch (Exception e) {
throw new ReconfigurationException(property,
newVal, getConf().get(property), e);
}
} else {
throw new ReconfigurationException(property, newVal,
getConf().get(property));
}
}
/**
* {@inheritDoc}
*/
@Override
public List<String> getReconfigurableProperties() {
List<String> changeable =
Arrays.asList("dfs.data.dir");
return changeable;
}
//@Override PulseCheckable
public Boolean isAlive() {
return isDatanodeUp() && isDataNodeBeingAlive();
}
private ObjectName datanodeMXBeanName;
/**
* Register DataNodeMXBean
*/
private void registerMXBean() {
this.pulseChecker = PulseChecker.create(this, "DataNode");
datanodeMXBeanName = MBeanUtil.registerMBean("DataNode", "DataNodeInfo", this);
}
private void shutdownMXBean() {
if (datanodeMXBeanName != null) {
MBeanUtil.unregisterMBean(datanodeMXBeanName);
}
if (pulseChecker != null) {
pulseChecker.shutdown();
}
}
@Override // DataNodeMXBean
public String getVersion() {
return VersionInfo.getVersion();
}
@Override // DataNodeMXBean
public String getRpcPort(){
return Integer.toString(this.ipcServer.getListenerAddress().getPort());
}
@Override // DataNodeMXBean
public String getHttpPort(){
return Integer.toString(this.infoServer.getPort());
}
/**
* Returned information is a JSON representation of a map with
* name node host name as the key and block pool Id as the value
*/
@Override // DataNodeMXBean
public String getNamenodeAddresses() {
final Map<String, Integer> info = new HashMap<String, Integer>();
for (NamespaceService ns : namespaceManager.getAllNamenodeThreads()) {
if (ns != null && ns.initialized()) {
info.put(ns.getNNSocketAddress().getHostName(), ns.getNamespaceId());
}
}
return JSON.toString(info);
}
/**
* Returned information is a JSON representation of a map with
* volume name as the key and value is a map of volume attribute
* keys to its values
*/
@Override // DataNodeMXBean
public String getVolumeInfo() {
final Map<String, Object> info = new HashMap<String, Object>();
try {
FSVolume[] volumes = ((FSDataset)this.data).volumes.getVolumes();
for (FSVolume v : volumes) {
final Map<String, Object> innerInfo = new HashMap<String, Object>();
innerInfo.put("usedSpace", v.getDfsUsed());
innerInfo.put("freeSpace", v.getAvailable());
innerInfo.put("reservedSpace", v.getReserved());
info.put(v.getDir().toString(), innerInfo);
}
return JSON.toString(info);
} catch (IOException e) {
LOG.info("Cannot get volume info.", e);
return "ERROR";
}
}
@Override // DataNodeMXBean
public String getServiceIds() {
String nameserviceIdList = "";
for (NamespaceService ns : namespaceManager.getAllNamenodeThreads()) {
if (ns != null && ns.initialized()) {
String nameserviceId = ns.getNameserviceId();
if (nameserviceIdList.length() > 0) {
nameserviceIdList += ",";
}
if (nameserviceId == null) {
// Non-federation version, should be only one namespace
nameserviceId = "NONFEDERATION";
}
nameserviceIdList += nameserviceId;
}
}
return nameserviceIdList;
}
/**
* Sends a 'Blocks Being Written' report to the given node.
*
* @param node the node to send the report to
* @throws IOException
*/
public void sendBlocksBeingWrittenReport(DatanodeProtocol node,
int namespaceId, DatanodeRegistration nsRegistration) throws IOException {
Block[] blocks = data.getBlocksBeingWrittenReport(namespaceId);
if (blocks != null && blocks.length != 0) {
long[] blocksAsLong =
BlockListAsLongs.convertToArrayLongs(blocks);
BlockReport bbwReport = new BlockReport(blocksAsLong);
node.blocksBeingWrittenReport(nsRegistration, bbwReport);
}
}
}