package org.apache.blur.manager.writer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.Timer;
import java.util.TimerTask;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.manager.BlurPartitioner;
import org.apache.blur.server.IndexSearcherClosable;
import org.apache.blur.server.ShardContext;
import org.apache.blur.server.TableContext;
import org.apache.blur.store.hdfs.HdfsDirectory;
import org.apache.blur.utils.BlurConstants;
import org.apache.blur.utils.BlurUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.Text;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
public class IndexImporter extends TimerTask implements Closeable {
private static final String BADROWIDS = ".badrowids";
private static final String COMMIT = ".commit";
private static final String INUSE = ".inuse";
private static final String BADINDEX = ".badindex";
private static final Lock _globalLock = new ReentrantReadWriteLock().writeLock();
private final static Log LOG = LogFactory.getLog(IndexImporter.class);
private final BlurIndex _blurIndex;
private final ShardContext _shardContext;
private final Timer _timer;
private final String _table;
private final String _shard;
private final AtomicBoolean _running = new AtomicBoolean();
private final long _cleanupDelay;
private long _lastCleanup;
public IndexImporter(BlurIndex blurIndex, ShardContext shardContext, TimeUnit refreshUnit, long refreshAmount) {
_running.set(true);
_blurIndex = blurIndex;
_shardContext = shardContext;
_timer = new Timer("IndexImporter [" + shardContext.getShard() + "/" + shardContext.getTableContext().getTable()
+ "]", true);
long period = refreshUnit.toMillis(refreshAmount);
_timer.schedule(this, period, period);
_table = _shardContext.getTableContext().getTable();
_shard = _shardContext.getShard();
_cleanupDelay = TimeUnit.MINUTES.toMillis(10);
}
@Override
public void close() throws IOException {
if (_running.get()) {
_running.set(false);
_timer.cancel();
_timer.purge();
}
}
@Override
public void run() {
// Only allow one import to occur in the process at a time.
_globalLock.lock();
try {
if (_lastCleanup + _cleanupDelay < System.currentTimeMillis()) {
try {
cleanupOldDirs();
} catch (IOException e) {
LOG.error("Unknown error while trying to clean old directories on [{1}/{2}].", e, _shard, _table);
}
_lastCleanup = System.currentTimeMillis();
}
Path path = _shardContext.getHdfsDirPath();
Configuration configuration = _shardContext.getTableContext().getConfiguration();
try {
FileSystem fileSystem = path.getFileSystem(configuration);
SortedSet<FileStatus> listStatus;
while (true) {
if (!_running.get()) {
return;
}
try {
listStatus = sort(fileSystem.listStatus(path, new PathFilter() {
@Override
public boolean accept(Path path) {
if (path != null && path.getName().endsWith(COMMIT)) {
return true;
}
return false;
}
}));
break;
} catch (FileNotFoundException e) {
LOG.warn("File not found error, retrying.");
}
try {
Thread.sleep(100);
} catch (InterruptedException e) {
return;
}
}
for (FileStatus fileStatus : listStatus) {
Path file = fileStatus.getPath();
if (fileStatus.isDir() && file.getName().endsWith(COMMIT)) {
// rename to inuse, if good continue else rename to badindex
Path inuse = new Path(file.getParent(), rename(file.getName(), INUSE));
if (fileSystem.rename(file, inuse)) {
HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, inuse);
if (DirectoryReader.indexExists(hdfsDirectory)) {
IndexAction indexAction = getIndexAction(hdfsDirectory, fileSystem);
_blurIndex.process(indexAction);
return;
} else {
Path badindex = new Path(file.getParent(), rename(file.getName(), BADINDEX));
if (fileSystem.rename(inuse, badindex)) {
LOG.error("Directory found at [{0}] is not a vaild index, renaming to [{1}].", inuse, badindex);
} else {
LOG.fatal("Directory found at [{0}] is not a vaild index, could not rename to [{1}].", inuse,
badindex);
}
}
} else {
LOG.fatal("Could not rename [{0}] to inuse dir.", file);
}
}
}
} catch (IOException e) {
LOG.error("Unknown error while trying to refresh imports on [{1}/{2}].", e, _shard, _table);
}
} finally {
_globalLock.unlock();
}
}
private String rename(String name, String newSuffix) {
int lastIndexOf = name.lastIndexOf('.');
return name.substring(0, lastIndexOf) + newSuffix;
}
private IndexAction getIndexAction(final HdfsDirectory directory, final FileSystem fileSystem) {
return new IndexAction() {
@Override
public void performMutate(IndexSearcherClosable searcher, IndexWriter writer) throws IOException {
LOG.info("About to import [{0}] into [{1}/{2}]", directory, _shard, _table);
boolean emitDeletes = searcher.getIndexReader().numDocs() != 0;
applyDeletes(directory, writer, _shard, emitDeletes);
LOG.info("Add index [{0}] [{1}/{2}]", directory, _shard, _table);
writer.addIndexes(directory);
LOG.info("Removing delete markers [{0}] on [{1}/{2}]", directory, _shard, _table);
writer.deleteDocuments(new Term(BlurConstants.DELETE_MARKER, BlurConstants.DELETE_MARKER_VALUE));
LOG.info("Finishing import [{0}], commiting on [{1}/{2}]", directory, _shard, _table);
}
@Override
public void doPreCommit(IndexSearcherClosable indexSearcher, IndexWriter writer) throws IOException {
}
@Override
public void doPostCommit(IndexWriter writer) throws IOException {
LOG.info("Import complete on [{0}/{1}]", _shard, _table);
}
@Override
public void doPreRollback(IndexWriter writer) throws IOException {
LOG.info("Starting rollback on [{0}/{1}]", _shard, _table);
}
@Override
public void doPostRollback(IndexWriter writer) throws IOException {
LOG.info("Finished rollback on [{0}/{1}]", _shard, _table);
Path path = directory.getPath();
String name = path.getName();
fileSystem.rename(path, new Path(path.getParent(), rename(name, BADROWIDS)));
}
};
}
private SortedSet<FileStatus> sort(FileStatus[] listStatus) {
SortedSet<FileStatus> result = new TreeSet<FileStatus>();
for (FileStatus fileStatus : listStatus) {
result.add(fileStatus);
}
return result;
}
private void applyDeletes(Directory directory, IndexWriter indexWriter, String shard, boolean emitDeletes)
throws IOException {
DirectoryReader reader = DirectoryReader.open(directory);
try {
LOG.info("Applying deletes in reader [{0}]", reader);
CompositeReaderContext compositeReaderContext = reader.getContext();
List<AtomicReaderContext> leaves = compositeReaderContext.leaves();
BlurPartitioner blurPartitioner = new BlurPartitioner();
Text key = new Text();
int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
int shardId = BlurUtil.getShardIndex(shard);
for (AtomicReaderContext context : leaves) {
AtomicReader atomicReader = context.reader();
Fields fields = atomicReader.fields();
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
BytesRef ref = null;
while ((ref = termsEnum.next()) != null) {
key.set(ref.bytes, ref.offset, ref.length);
int partition = blurPartitioner.getPartition(key, null, numberOfShards);
if (shardId != partition) {
throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
+ "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
}
if (emitDeletes) {
indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(ref)));
}
}
}
}
} finally {
reader.close();
}
}
public void cleanupOldDirs() throws IOException {
Path hdfsDirPath = _shardContext.getHdfsDirPath();
TableContext tableContext = _shardContext.getTableContext();
Configuration configuration = tableContext.getConfiguration();
FileSystem fileSystem = hdfsDirPath.getFileSystem(configuration);
FileStatus[] inuseSubDirs = fileSystem.listStatus(hdfsDirPath, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().endsWith(INUSE);
}
});
Set<Path> inuseDirs = toSet(inuseSubDirs);
Map<Path, Path> inuseFileToDir = toMap(fileSystem, inuseDirs);
FileStatus[] listStatus = fileSystem.listStatus(hdfsDirPath, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().endsWith(HdfsDirectory.LNK);
}
});
for (FileStatus status : listStatus) {
Path realPath = HdfsDirectory.readRealPathDataFromSymlinkPath(fileSystem, status.getPath());
Path inuseDir = inuseFileToDir.get(realPath);
inuseDirs.remove(inuseDir);
}
for (Path p : inuseDirs) {
LOG.info("Deleteing path [{0}] no longer in use.", p);
fileSystem.delete(p, true);
}
}
private Map<Path, Path> toMap(FileSystem fileSystem, Set<Path> inuseDirs) throws IOException {
Map<Path, Path> result = new TreeMap<Path, Path>();
for (Path p : inuseDirs) {
if (!fileSystem.isFile(p)) {
FileStatus[] listStatus = fileSystem.listStatus(p);
for (FileStatus status : listStatus) {
result.put(status.getPath(), p);
}
}
}
return result;
}
private Set<Path> toSet(FileStatus[] dirs) {
Set<Path> result = new TreeSet<Path>();
for (FileStatus status : dirs) {
result.add(status.getPath());
}
return result;
}
}