package com.alimama.mdrill.editlog.write;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Comparator;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.alimama.mdrill.editlog.defined.HdfsConstants;
import com.alimama.mdrill.editlog.defined.JournalManager;
import com.alimama.mdrill.editlog.defined.NNStorage;
import com.alimama.mdrill.editlog.defined.StorageDirectory;
import com.alimama.mdrill.editlog.read.EditLogFileInputStream;
import com.alimama.mdrill.editlog.read.EditLogInputStream;
import com.alimama.mdrill.editlog.read.FSEditLogValidate.EditLogValidation;
import com.alimama.mdrill.editlog.util.FileUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.ComparisonChain;
public class FileJournalManager implements JournalManager {
private void purgeLog(EditLogFile log) {
LOG.info("Purging old edit log " + log.getFile().toString());
deleteOrWarn(log.getFs(), log.getFile());
}
private void deleteOrWarn(FileSystem fs, Path file) {
try {
if (!fs.delete(file, true)) {
LOG.warn("Could not delete " + file);
}
} catch (IOException e) {
LOG.warn("Could not delete " + file);
}
}
public static String EDITS_FILE_NAME = "mlog";
public static String EDITS_INPROGRESS_FILE_NAME = "mlog_inprocess";
private static final Log LOG = LogFactory.getLog(FileJournalManager.class);
private final StorageDirectory sd;
private int outputBufferCapacity = 1024*1024;
private static final Pattern EDITS_REGEX = Pattern.compile(EDITS_FILE_NAME + "_(\\d+)-(\\d+)");
private static final Pattern EDITS_INPROGRESS_REGEX = Pattern.compile(EDITS_INPROGRESS_FILE_NAME+ "_(\\d+)");
private Path currentInProgress = null;
public FileJournalManager(StorageDirectory sd) {
this.sd = sd;
}
@Override
public void close() throws IOException {
}
@Override
synchronized public EditLogOutputStream startLogSegment(long txid)
throws IOException {
try {
currentInProgress = NNStorage.getInProgressEditsFile(sd, txid);
EditLogOutputStream stm = new EditLogFileOutputStream(sd.getFileSystem(), currentInProgress, outputBufferCapacity);
return stm;
} catch (IOException e) {
LOG.warn("Unable to start log segment " + txid +
" at " + currentInProgress + ": " +
e.getLocalizedMessage());
throw e;
}
}
@Override
synchronized public void finalizeLogSegment(long firstTxId, long lastTxId)
throws IOException {
Path inprogressFile = NNStorage.getInProgressEditsFile(sd, firstTxId);
Path dstFile = NNStorage.getFinalizedEditsFile(sd, firstTxId, lastTxId);
LOG.info("Finalizing edits file " + inprogressFile + " -> " + dstFile);
Preconditions.checkState(!sd.getFileSystem().exists(dstFile), "Can't finalize edits file " + inprogressFile + " since finalized file " + "already exists");
if (!sd.getFileSystem().rename(inprogressFile, dstFile)) {
throw new IllegalStateException("Unable to finalize edits file " + inprogressFile);
}
if (inprogressFile.equals(currentInProgress)) {
currentInProgress = null;
}
}
@VisibleForTesting
public StorageDirectory getStorageDirectory() {
return sd;
}
@Override
synchronized public void setOutputBufferCapacity(int size) {
this.outputBufferCapacity = size;
}
@Override
public void purgeLogsOlderThan(long minTxIdToKeep)
throws IOException {
LOG.info("Purging logs older than " + minTxIdToKeep);
FileStatus[] files = FileUtil.listFiles(sd.getFileSystem(),sd.getCurrentDir());
List<EditLogFile> editLogs = FileJournalManager.matchEditLogs(sd.getFileSystem(),files);
for (EditLogFile log : editLogs) {
if (!log.isInProgress()&&log.getFirstTxId() < minTxIdToKeep &&
log.getLastTxId() < minTxIdToKeep) {
purgeLog(log);
}
}
}
public static List<EditLogFile> matchEditLogs(FileSystem fs,Path logDir) throws IOException {
return matchEditLogs(fs,FileUtil.listFiles(fs,logDir));
}
static List<EditLogFile> matchEditLogs(FileSystem fs,FileStatus[] filesInStorage) {
List<EditLogFile> ret = Lists.newArrayList();
for (FileStatus f : filesInStorage) {
String name = f.getPath().getName();
// Check for edits
Matcher editsMatch = EDITS_REGEX.matcher(name);
if (editsMatch.matches()) {
try {
long startTxId = Long.valueOf(editsMatch.group(1));
long endTxId = Long.valueOf(editsMatch.group(2));
ret.add(new EditLogFile(fs,f.getPath(), startTxId, endTxId));
} catch (NumberFormatException nfe) {
LOG.error("Edits file " + f + " has improperly formatted " +
"transaction ID");
// skip
}
}
// Check for in-progress edits
Matcher inProgressEditsMatch = EDITS_INPROGRESS_REGEX.matcher(name);
if (inProgressEditsMatch.matches()) {
try {
long startTxId = Long.valueOf(inProgressEditsMatch.group(1));
ret.add(new EditLogFile(fs,f.getPath(), startTxId, HdfsConstants.INVALID_TXID, true));
} catch (NumberFormatException nfe) {
LOG.error("In-progress edits file " + f + " has improperly " + "formatted transaction ID");
// skip
}
}
}
return ret;
}
@Override
synchronized public void selectInputStreams(
Collection<EditLogInputStream> streams, long fromTxId,
boolean inProgressOk, boolean forReading) throws IOException {
List<EditLogFile> elfs = matchEditLogs(sd.getFileSystem(),sd.getCurrentDir());
LOG.debug(this + ": selecting input streams starting at " + fromTxId +
(inProgressOk ? " (inProgress ok) " : " (excluding inProgress) ") +
"from among " + elfs.size() + " candidate file(s)");
addStreamsToCollectionFromFiles(elfs, streams, fromTxId, inProgressOk);
}
static void addStreamsToCollectionFromFiles(Collection<EditLogFile> elfs,
Collection<EditLogInputStream> streams, long fromTxId, boolean inProgressOk) {
for (EditLogFile elf : elfs) {
if (elf.isInProgress()) {
if (!inProgressOk) {
LOG.debug("passing over " + elf + " because it is in progress " +
"and we are ignoring in-progress logs.");
continue;
}
try {
elf.validateLog();
} catch (IOException e) {
LOG.error("got IOException while trying to validate header of " +
elf + ". Skipping.", e);
continue;
}
}
if (elf.lastTxId < fromTxId) {
assert elf.lastTxId != HdfsConstants.INVALID_TXID;
LOG.debug("passing over " + elf + " because it ends at " +
elf.lastTxId + ", but we only care about transactions " +
"as new as " + fromTxId);
continue;
}
EditLogFileInputStream elfis = new EditLogFileInputStream(elf.getFs(),elf.getFile(),
elf.getFirstTxId(), elf.getLastTxId(), elf.isInProgress());
LOG.debug("selecting edit log stream " + elf);
streams.add(elfis);
}
}
@Override
synchronized public void recoverUnfinalizedSegments() throws IOException {
Path currentDir = sd.getCurrentDir();
LOG.info("Recovering unfinalized segments in " + currentDir);
List<EditLogFile> allLogFiles = matchEditLogs(sd.getFileSystem(),currentDir);
for (EditLogFile elf : allLogFiles) {
if (elf.getFile().equals(currentInProgress)) {
continue;
}
if (elf.isInProgress()) {
try{
if (elf.getLength() == 0) {
LOG.info("Deleting zero-length edit log file " + elf);
if (!elf.delete()) {
throw new IOException("Unable to delete file " + elf.getFile());
}
continue;
}
}catch(Throwable e){
continue;
}
elf.validateLog();
if (elf.hasCorruptHeader()) {
elf.moveAsideCorruptFile();
throw new CorruptionException("In-progress edit log file is corrupt: "
+ elf);
}
if (elf.getLastTxId() == HdfsConstants.INVALID_TXID) {
LOG.info("Moving aside edit log file that seems to have zero " +
"transactions " + elf);
elf.moveAsideEmptyFile();
continue;
}
finalizeLogSegment(elf.getFirstTxId(), elf.getLastTxId());
}
}
}
public List<EditLogFile> getLogFiles(long fromTxId) throws IOException {
Path currentDir = sd.getCurrentDir();
List<EditLogFile> allLogFiles = matchEditLogs(sd.getFileSystem(),currentDir);
List<EditLogFile> logFiles = Lists.newArrayList();
for (EditLogFile elf : allLogFiles) {
if (fromTxId <= elf.getFirstTxId() ||
elf.containsTxId(fromTxId)) {
logFiles.add(elf);
}
}
Collections.sort(logFiles, EditLogFile.COMPARE_BY_START_TXID);
return logFiles;
}
@Override
public String toString() {
return String.format("FileJournalManager(root=%s)", sd.getCurrentDir());
}
public static class EditLogFile {
private Path file;
private FileSystem fs;
private final long firstTxId;
private long lastTxId;
private boolean hasCorruptHeader = false;
private final boolean isInProgress;
final static Comparator<EditLogFile> COMPARE_BY_START_TXID
= new Comparator<EditLogFile>() {
@Override
public int compare(EditLogFile a, EditLogFile b) {
return ComparisonChain.start()
.compare(a.getFirstTxId(), b.getFirstTxId())
.compare(a.getLastTxId(), b.getLastTxId())
.result();
}
};
EditLogFile(FileSystem fs,Path file,
long firstTxId, long lastTxId) {
this(fs,file, firstTxId, lastTxId, false);
assert (lastTxId != HdfsConstants.INVALID_TXID)
&& (lastTxId >= firstTxId);
}
EditLogFile(FileSystem fs,Path file, long firstTxId,
long lastTxId, boolean isInProgress) {
assert (lastTxId == HdfsConstants.INVALID_TXID && isInProgress)
|| (lastTxId != HdfsConstants.INVALID_TXID && lastTxId >= firstTxId);
assert (firstTxId > 0) || (firstTxId == HdfsConstants.INVALID_TXID);
assert file != null;
Preconditions.checkArgument(!isInProgress ||
lastTxId == HdfsConstants.INVALID_TXID);
this.firstTxId = firstTxId;
this.lastTxId = lastTxId;
this.file = file;
this.fs=fs;
this.isInProgress = isInProgress;
}
public long getFirstTxId() {
return firstTxId;
}
public long getLastTxId() {
return lastTxId;
}
boolean containsTxId(long txId) {
return firstTxId <= txId && txId <= lastTxId;
}
public void validateLog() throws IOException {
EditLogValidation val = EditLogFileInputStream.validateEditLog(fs,file);
this.lastTxId = val.getEndTxId();
this.hasCorruptHeader = val.hasCorruptHeader();
}
public boolean isInProgress() {
return isInProgress;
}
public Path getFile() {
return file;
}
public boolean delete() throws IOException
{
return fs.delete(this.file,true);
}
public long getLength() throws IOException
{
return this.fs.getFileStatus(this.file).getLen();
}
public FileSystem getFs() {
return fs;
}
boolean hasCorruptHeader() {
return hasCorruptHeader;
}
void moveAsideCorruptFile() throws IOException {
assert hasCorruptHeader;
renameSelf(".corrupt");
}
public void moveAsideEmptyFile() throws IOException {
assert lastTxId == HdfsConstants.INVALID_TXID;
renameSelf(".empty");
}
private void renameSelf(String newSuffix) throws IOException {
Path src = file;
Path dst = new Path(src.getParent(), src.getName() + newSuffix);
boolean success = fs.rename(src,dst);
if (!success) {
throw new IOException(
"Couldn't rename log " + src + " to " + dst);
}
file = dst;
}
@Override
public String toString() {
return String.format("EditLogFile(file=%s,first=%019d,last=%019d,"
+"inProgress=%b,hasCorruptHeader=%b)",
file.toString(), firstTxId, lastTxId,
isInProgress(), hasCorruptHeader);
}
}
}