package org.apache.hadoop.raid;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.zip.CRC32;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.RaidDFSUtil;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.PlacementMonitor.BlockInfo;
public class TestTempDirectoryCleanUp extends TestCase {
final static String TEST_DIR = new File(System.getProperty("test.build.data",
"build/contrib/raid/test/data")).getAbsolutePath();
final static String CONFIG_FILE = new File(TEST_DIR,
"test-raid.xml").getAbsolutePath();
final static long RELOAD_INTERVAL = 1000;
final static Log LOG = LogFactory.getLog(
"org.apache.hadoop.raid.TestTempDirectoryCleanUp");
final static Random rand = new Random();
Configuration conf;
String namenode = null;
String hftp = null;
MiniDFSCluster dfs = null;
MiniMRCluster mr = null;
FileSystem fileSys = null;
String jobTrackerName = null;
/**
* create mapreduce and dfs clusters
*/
private void createClusters(boolean local) throws Exception {
if (System.getProperty("hadoop.log.dir") == null) {
String base = new File(".").getAbsolutePath();
System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs");
}
new File(TEST_DIR).mkdirs(); // Make sure data directory exists
conf = new Configuration();
conf.set("raid.config.file", CONFIG_FILE);
conf.setBoolean("raid.config.reload", true);
conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
Utils.loadTestCodecs(conf, 3, 10, 1, 5, "/raid", "/raidrs", false, false);
conf.setLong("raid.policy.rescan.interval", 5 * 1000L);
// the RaidNode does the raiding inline (instead of submitting to map/reduce)
if (local) {
conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
} else {
conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
}
// use local block fixer
conf.set("raid.blockfix.classname",
"org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
conf.set("dfs.block.replicator.classname",
"org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid");
conf.set("raid.server.address", "localhost:0");
// create a dfs and map-reduce cluster
final int taskTrackers = 4;
// Because BlockPlacementPolicyRaid only allows one replica in each rack,
// spread 6 nodes into 6 racks to make sure chooseTarget function could pick
// more than one node.
String[] racks = {"/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6"};
dfs = new MiniDFSCluster(conf, 6, true, racks);
dfs.waitActive();
fileSys = dfs.getFileSystem();
namenode = fileSys.getUri().toString();
mr = new MiniMRCluster(taskTrackers, namenode, 3);
jobTrackerName = "localhost:" + mr.getJobTrackerPort();
hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();
FileSystem.setDefaultUri(conf, namenode);
conf.set("mapred.job.tracker", jobTrackerName);
}
private void createTestFiles() throws IOException {
TestRaidNode.createTestFiles(fileSys, "/user/dhruba/raidtest/",
"/raid/user/dhruba/raidtest", 5, 7);
TestRaidNode.createTestFiles(fileSys, "/user/dhruba/raidtest2/",
"/raid/user/dhruba/raidtest2", 5, 7);
TestRaidNode.createTestFiles(fileSys, "/user/dhruba/raidtest3/",
"/raidrs/user/dhruba/raidtest3", 1, 10);
LOG.info("Created test files");
}
private Configuration initializeConfig() throws IOException {
short targetReplication = 2;
short metaReplication = 2;
short rstargetReplication = 1;
short rsmetaReplication = 1;
// Initialize Raid Policy config
ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
cb.addPolicy("policy1", "/user/dhruba/raidtest",
targetReplication, metaReplication);
cb.addAbstractPolicy("abstractPolicy",targetReplication,
metaReplication, "xor");
cb.addPolicy("policy2", "/user/dhruba/raidtest2", "abstractPolicy");
cb.addPolicy("policy3", "/user/dhruba/raidtest3",
rstargetReplication, rsmetaReplication, "rs");
cb.persist();
// Initialize Raidnode config
Configuration localConf = new Configuration(conf);
//Avoid block mover to move blocks
localConf.setInt(PlacementMonitor.BLOCK_MOVE_QUEUE_LENGTH_KEY, 0);
localConf.setInt(PlacementMonitor.NUM_MOVING_THREADS_KEY, 1);
localConf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 3000L);
return localConf;
}
/**
* stop clusters created earlier
*/
private void stopClusters() throws Exception {
if (mr != null) { mr.shutdown(); }
if (dfs != null) { dfs.shutdown(); }
}
/**
* Return if the temp directories exist
*/
private boolean checkTempDirectories(DistRaid job)
throws IOException {
boolean doesExist = false;
for (Codec codec: Codec.getCodecs()) {
Path tmpdir = new Path(codec.tmpParityDirectory, job.getJobID());
FileSystem fs = tmpdir.getFileSystem(job.getConf());
if (fs.exists(tmpdir)) {
LOG.info("Temp directory " + tmpdir + " exists");
doesExist = true;
} else {
LOG.info("Temp directory " + tmpdir + " does not exist");
}
}
return doesExist;
}
/**
* Test if the temporary directory is cleanup when the job is complete
*/
public void testTempDirCleanUpWhenJobComplete() throws Exception {
LOG.info("Test testTempDirCleanUpWhenJobComplete started.");
createClusters(false);
RaidNode cnode = null;
try {
createTestFiles();
Configuration localConf = initializeConfig();
cnode = RaidNode.createRaidNode(null, localConf);
long start = System.currentTimeMillis();
final int MAX_WAITTIME = 300000;
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
DistRaidNode dcnode = (DistRaidNode) cnode;
while (dcnode.jobMonitor.jobsMonitored() < 3 &&
System.currentTimeMillis() - start < MAX_WAITTIME) {
Thread.sleep(1000);
}
List<DistRaid> jobs = dcnode.jobMonitor.getRunningJobs();
for (DistRaid job: jobs) {
while (checkTempDirectories(job) == false) {
Thread.sleep(500);
LOG.info("Waiting for temp directory creation for " +
job.getJobID());
}
}
while (dcnode.jobMonitor.runningJobsCount() > 0 &&
System.currentTimeMillis() - start < MAX_WAITTIME) {
LOG.info("Waiting for zero running jobs: " +
dcnode.jobMonitor.runningJobsCount());
Thread.sleep(1000);
}
for (DistRaid job: jobs) {
assertEquals("Temp directory for job " + job.getJobID() +
" should be deleted.", false, checkTempDirectories(job));
}
LOG.info("Test testTempDirCleanUpWhenJobComplete successful.");
} catch (Exception e) {
LOG.info("testTempDirCleanUpWhenJobComplete Exception ", e);
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
stopClusters();
}
LOG.info("Test testTempDirCleanUpWhenJobComplete completed.");
}
/**
* Test if the temporary directory is cleanup when the job is killed
*/
public void testTempDirCleanUpWhenJobIsKilled() throws Exception {
LOG.info("Test testTempDirCleanUpWhenJobIsKilled started.");
createClusters(false);
RaidNode cnode = null;
try {
createTestFiles();
Configuration localConf = initializeConfig();
cnode = RaidNode.createRaidNode(null, localConf);
long start = System.currentTimeMillis();
final int MAX_WAITTIME = 300000;
assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
DistRaidNode dcnode = (DistRaidNode) cnode;
while (dcnode.jobMonitor.jobsMonitored() < 3 &&
System.currentTimeMillis() - start < MAX_WAITTIME) {
Thread.sleep(1000);
}
List<DistRaid> jobs = dcnode.jobMonitor.getRunningJobs();
// Kill jobs;
LOG.info("Wait temp directory creation and kill all the jobs");
for (DistRaid job: jobs) {
while (checkTempDirectories(job) == false) {
Thread.sleep(500);
LOG.info("Waiting for temp directory creation for " +
job.getJobID());
}
LOG.info("Kill job " + job.getJobID());
job.killJob();
}
while (dcnode.jobMonitor.runningJobsCount() > 0 &&
System.currentTimeMillis() - start < MAX_WAITTIME) {
LOG.info("Waiting for zero running jobs: " +
dcnode.jobMonitor.runningJobsCount());
Thread.sleep(1000);
}
for (DistRaid job: jobs) {
assertEquals("Temp directory for job " + job.getJobID() +
" should be deleted.", false, checkTempDirectories(job));
}
LOG.info("Test testTempDirCleanUpWhenJobIsKilled successful.");
} catch (Exception e) {
LOG.info("testTempDirCleanUpWhenJobIsKilled Exception ", e);
throw e;
} finally {
if (cnode != null) { cnode.stop(); cnode.join(); }
stopClusters();
}
LOG.info("Test testTempDirCleanUpWhenJobIsKilled completed.");
}
}