package com.alimama.mdrill.hdfsDirectory;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.zip.CRC32;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.BufferedIndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.LinkFSDirectory;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.cache.Cache;
import org.apache.lucene.util.cache.SimpleLRUCache;
import com.alimama.mdrill.buffer.BlockBufferInput;
import com.alimama.mdrill.buffer.CacheKeyBuffer;
public class FileSystemDirectory extends Directory {
private static final Log logger = LogFactory.getLog(FileSystemDirectory.class);
private final FileSystem fs;
public final Path directory;
private final int ioFileBufferSize;
private boolean isAllowMove=false;
private boolean isUsedBlockBuffer=false;
private Cache<CacheKeyBuffer, String> CACHE_BUFFER = Cache.synchronizedCache(new SimpleLRUCache<CacheKeyBuffer, String>(64));
public synchronized String getCacheKey() {
CacheKeyBuffer kkk = new CacheKeyBuffer(new String[0], this.dir_uuid,System.currentTimeMillis() / 600000l,this.getP());
String rtn = CACHE_BUFFER.get(kkk);
if (rtn == null) {
String[] filelist = null;
try {
filelist = this.listAll();
} catch (Throwable e) {
logger.error("listAll", e);
}
rtn = getCacheKey(filelist);
CACHE_BUFFER.put(kkk, rtn);
}
return rtn;
}
private static SimpleDateFormat format = new SimpleDateFormat("yyyyMMddHHmmss");
public synchronized String getCacheKey(String[] filelist) {
CacheKeyBuffer kkk = new CacheKeyBuffer(filelist, this.dir_uuid,
System.currentTimeMillis() / 600000l,this.getP());
String rtn = CACHE_BUFFER.get(kkk);
if (rtn == null) {
StringBuffer buff = new StringBuffer();
buff.append(this.getClass().getName()).append("_");
buff.append(this.directory.toString()).append("_");
CRC32 crc32 = new CRC32();
crc32.update(0);
long filesize = 0;
long filemodify = 0;
if (filelist != null) {
buff.append(filelist.length).append("_");
for (String s : filelist) {
crc32.update(new String(s).getBytes());
FileStatus fstatus=null;
try {
fstatus=this.fileStatus(s) ;
} catch (Throwable e) {
fstatus=null;
logger.error("fileStatus", e);
}
if(fstatus!=null)
{
filesize += fstatus.getLen();
filemodify = Math.max(filemodify, fstatus.getModificationTime());
}
}
}
long crcvalue = crc32.getValue();
buff.append(crcvalue).append("_");
buff.append(filesize).append("_");
buff.append(filemodify).append("_");
buff.append(format.format(new Date(filemodify)));
rtn = buff.toString();
CACHE_BUFFER.put(kkk, rtn);
}
return rtn;
}
public boolean isUsedBlockBuffer() {
return isUsedBlockBuffer;
}
public void setUsedBlockBuffer(boolean isUsedBlockBuffer) {
this.isUsedBlockBuffer = isUsedBlockBuffer;
}
private HashMap<String,Path> links=new HashMap<String,Path>();
public boolean isAllowMove() {
return isAllowMove;
}
public void setAllowLinks(boolean isAllowMove) {
this.isAllowMove = isAllowMove;
}
public void copy(Directory to, String src, String dest) throws IOException {
if(to instanceof FileSystemDirectory)
{
FileSystemDirectory fsto=(FileSystemDirectory)to;
if(fsto.isAllowMove())
{
Path discfile = new Path(fsto.directory, dest);
Path srcfile = new Path(directory, src);
logger.info("used links dest:"+getShortPath(discfile)+",src:"+getShortPath(srcfile));
fsto.addLinks(discfile.getName(), srcfile);
return ;
}
}
if(to instanceof LinkFSDirectory)
{
LinkFSDirectory fsto=(LinkFSDirectory)to;
if(fsto.isAllowMove())
{
File discfile = new File(fsto.directory, dest);
Path srcfile = new Path(directory, src);
fsto.addhdfsLinks(discfile.getName(), srcfile);
return ;
}
}
super.copy(to, src, dest);
}
private String getShortPath(Path p)
{
StringBuffer buff=new StringBuffer();
buff.append(p.getParent().getName());
buff.append("/");
buff.append(p.getName());
return buff.toString();
}
public void addLinks(String name,Path p)
{
this.links.put(name, p);
}
public FileSystemDirectory(FileSystem fs, Path directory, boolean create,
Configuration conf) throws IOException {
try {
setLockFactory(new SingleInstanceLockFactory());
} catch (IOException e) {
// Cannot happen
}
this.fs = fs;
this.directory = directory;
this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);
if (create) {
create();
}
boolean isDir = false;
try {
FileStatus status = fs.getFileStatus(directory);
if (status != null) {
isDir = status.isDir();
}
} catch (IOException e) {
// file does not exist, isDir already set to false
}
if (!isDir) {
throw new IOException(directory + " is not a directory");
}
}
private void create() throws IOException {
if (!fs.exists(directory)) {
fs.mkdirs(directory);
}
boolean isDir = false;
try {
FileStatus status = fs.getFileStatus(directory);
if (status != null) {
isDir = status.isDir();
}
} catch (IOException e) {
// file does not exist, isDir already set to false
}
if (!isDir) {
throw new IOException(directory + " is not a directory");
}
// clear old index files
FileStatus[] fileStatus = fs.listStatus(directory,
LuceneIndexFileNameFilter.getFilter());
this.links.clear();
for (int i = 0; i < fileStatus.length; i++) {
if (!fs.delete(fileStatus[i].getPath(), true)) {
throw new IOException("Cannot delete index file " + fileStatus[i].getPath());
}
}
}
public String[] list() throws IOException {
FileStatus[] fileStatus = fs.listStatus(directory,
LuceneIndexFileNameFilter.getFilter());
ArrayList<String> rtn=new ArrayList<String>();
for (int i = 0; i < fileStatus.length; i++) {
rtn.add(fileStatus[i].getPath().getName());
}
for(java.util.Map.Entry<String, Path> e:this.links.entrySet())
{
rtn.add(e.getKey());
}
String[] result = new String[rtn.size()];
return rtn.toArray(result);
}
public boolean fileExists(String name) throws IOException {
return fs.exists(new Path(directory, name))||this.links.containsKey(name);
}
public long fileModified(String name) {
throw new UnsupportedOperationException();
}
public void touchFile(String name) {
throw new UnsupportedOperationException();
}
public long fileLength(String name) throws IOException {
Path f=this.links.get(name);
if(f==null)
{
f=new Path(directory, name);
}
return fs.getFileStatus(f).getLen();
}
private FileStatus fileStatus(String name) throws IOException {
Path f=this.links.get(name);
if(f==null)
{
f=new Path(directory, name);
}
return fs.getFileStatus(f);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#deleteFile(java.lang.String)
*/
public void deleteFile(String name) throws IOException {
this.links.remove(name);
if (!fs.delete(new Path(directory, name))) {
throw new IOException("Cannot delete index file " + name);
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#renameFile(java.lang.String,
* java.lang.String)
*/
public void renameFile(String from, String to) throws IOException {
Path l=this.links.remove(from);
if(l!=null)
{
this.links.put(to, l);
}
fs.rename(new Path(directory, from), new Path(directory, to));
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#createOutput(java.lang.String)
*/
public IndexOutput createOutput(String name) throws IOException {
Path file = new Path(directory, name);
this.links.remove(name);
if (fs.exists(file) && !fs.delete(file, true)) {
// delete the existing one if applicable
throw new IOException("Cannot overwrite index file " + file);
}
return new FileSystemIndexOutput(file, ioFileBufferSize);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#openInput(java.lang.String)
*/
public IndexInput openInput(String name) throws IOException {
return openInput(name, ioFileBufferSize);
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#openInput(java.lang.String, int)
*/
public IndexInput openInput(String name, int bufferSize) throws IOException {
Path f=this.links.get(name);
if(f==null)
{
f=new Path(directory, name);
}else{
logger.info("openInput->usedlinks:"+name+"@"+getShortPath(f));
}
FileSystemIndexInput rtn= new FileSystemIndexInput(f, bufferSize);
if(this.isUsedBlockBuffer()&&name.indexOf("frq")>=0)
{
return BlockBufferInput.MaybeInstance(rtn,this,name,this.getP());
}
return rtn;
}
// /*
// * (non-Javadoc)
// *
// * @see org.apache.lucene.store.Directory#makeLock(java.lang.String)
// */
// public Lock makeLock(final String name) {
// return new Lock() {
// public boolean obtain() {
// return true;
// }
//
// public void release() {
// }
//
// public boolean isLocked() {
// throw new UnsupportedOperationException();
// }
//
// public String toString() {
// return "Lock@" + new Path(directory, name);
// }
// };
// }
/*
* (non-Javadoc)
*
* @see org.apache.lucene.store.Directory#close()
*/
public void close() throws IOException {
// do not close the file system
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#toString()
*/
public String toString() {
return this.getClass().getName() + "@" + directory;
}
private class FileSystemIndexInput extends BufferedIndexInput {
private final Path filePath; // for debugging
private final Descriptor descriptor;
private long length=-1;
private boolean isOpen;
private boolean isClone;
public FileSystemIndexInput(final Path path, final int ioFileBufferSize)
throws IOException {
filePath = path;
descriptor = new Descriptor(fs,path, ioFileBufferSize);
isOpen = true;
}
protected void readInternal(byte[] b, int offset, int len)
throws IOException {
long t0=System.currentTimeMillis();
long t1=0l;
synchronized (descriptor) {
t1=System.currentTimeMillis();
long position = getFilePointer();
if (position != descriptor.Positon()) {
descriptor.Stream().seek(position);
descriptor.setPositon(position);
}
int total = 0;
do {
int i = descriptor.Stream().read(b, offset + total, len - total);
if (i == -1) {
throw new IOException("Read past EOF");
}
descriptor.addPositon(i);
total += i;
} while (total < len);
}
long t2=System.currentTimeMillis();
long tl=t2-t1;
long tl2=t2-t0;
synchronized (descriptor) {
descriptor.Stat(tl, tl2, len);
}
}
public void close() throws IOException {
if (!isClone) {
if (isOpen) {
synchronized (descriptor) {
descriptor.close();
}
isOpen = false;
} else {
throw new IOException("Index file " + filePath + " already closed");
}
}
}
public long length() {
if(length<0)
{
try {
length = fs.getFileStatus(this.filePath).getLen();
} catch (IOException e) {
logger.error("getFileStatus "+filePath.getName()+" timetaken ",e);
}
}
return length;
}
protected void finalize() throws IOException {
if (!isClone && isOpen) {
close(); // close the file
}
}
public Object clone() {
FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
clone.isClone = true;
return clone;
}
@Override
protected void seekInternal(long pos) throws IOException {
}
}
private class FileSystemIndexOutput extends BufferedIndexOutput {
private final Path filePath; // for debugging
private final FSDataOutputStream out;
private boolean isOpen;
public FileSystemIndexOutput(Path path, int ioFileBufferSize)
throws IOException {
filePath = path;
out = fs.create(path, true, ioFileBufferSize);
isOpen = true;
}
public void flushBuffer(byte[] b, int offset, int size)
throws IOException {
out.write(b, offset, size);
}
public void close() throws IOException {
if (isOpen) {
super.close();
out.close();
isOpen = false;
} else {
throw new IOException("Index file " + filePath
+ " already closed");
}
}
@Override
public void seek(long pos) {
throw new RuntimeException("not allowed");
}
public long length() throws IOException {
return out.getPos();
}
protected void finalize() throws IOException {
if (isOpen) {
close(); // close the file
}
}
}
@Override
public String[] listAll() throws IOException {
return this.list();
}
}