Package org.apache.blur.manager.writer

Source Code of org.apache.blur.manager.writer.BlurNRTIndex$Committer

package org.apache.blur.manager.writer;

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.blur.lucene.LuceneVersionConstant.LUCENE_VERSION;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.blur.analysis.FieldManager;
import org.apache.blur.index.ExitableReader;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.lucene.store.refcounter.DirectoryReferenceCounter;
import org.apache.blur.lucene.store.refcounter.DirectoryReferenceFileGC;
import org.apache.blur.lucene.store.refcounter.IndexInputCloser;
import org.apache.blur.lucene.warmup.TraceableDirectory;
import org.apache.blur.server.IndexSearcherClosable;
import org.apache.blur.server.IndexSearcherClosableNRT;
import org.apache.blur.server.ShardContext;
import org.apache.blur.server.TableContext;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.Row;
import org.apache.blur.utils.BlurUtil;
import org.apache.blur.utils.SimpleTimer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.BlurIndexWriter;
import org.apache.lucene.index.BlurIndexWriter.LockOwnerException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NRTManager;
import org.apache.lucene.search.NRTManager.TrackingIndexWriter;
import org.apache.lucene.search.NRTManagerReopenThread;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.store.Directory;

public class BlurNRTIndex extends BlurIndex {

  private static final Log LOG = LogFactory.getLog(BlurNRTIndex.class);
  private static final boolean APPLY_ALL_DELETES = true;
  private static final String SNAPSHOTS_FOLDER_NAME = "snapshots";
  private static final String SNAPSHOTS_TMPFILE_EXTENSION = ".tmp";

  private final AtomicReference<NRTManager> _nrtManagerRef = new AtomicReference<NRTManager>();
  private final AtomicBoolean _isClosed = new AtomicBoolean();
  private final BlurIndexWriter _writer;
  private final Thread _committer;
  private final SearcherFactory _searcherFactory;
  private final Directory _directory;
  private final NRTManagerReopenThread _refresher;
  private final TableContext _tableContext;
  private final ShardContext _shardContext;
  private final TransactionRecorder _recorder;
  private final TrackingIndexWriter _trackingWriter;
  private final IndexImporter _indexImporter;
  // This lock is used during a import of data from the file system. For example
  // after a mapreduce program.
  private final ReadWriteLock _lock = new ReentrantReadWriteLock();
  private long _lastRefresh = 0;

  public BlurNRTIndex(ShardContext shardContext, SharedMergeScheduler mergeScheduler, IndexInputCloser closer,
      Directory directory, DirectoryReferenceFileGC gc, final ExecutorService searchExecutor) throws IOException {
    _tableContext = shardContext.getTableContext();
    _directory = directory;
    _shardContext = shardContext;
   
    FieldManager fieldManager = _tableContext.getFieldManager();
    Analyzer analyzer = fieldManager.getAnalyzerForIndex();
    IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    conf.setWriteLockTimeout(TimeUnit.MINUTES.toMillis(5));
    conf.setSimilarity(_tableContext.getSimilarity());
   
    SnapshotDeletionPolicy sdp;
    if (snapshotsDirectoryExists()) {
      // load existing snapshots
      sdp = new SnapshotDeletionPolicy(_tableContext.getIndexDeletionPolicy(), loadExistingSnapshots());
    } else {
      sdp = new SnapshotDeletionPolicy(_tableContext.getIndexDeletionPolicy());
    }
    conf.setIndexDeletionPolicy(sdp);
    conf.setMergedSegmentWarmer(new FieldBasedWarmer(shardContext, _isClosed));

    TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
    mergePolicy.setUseCompoundFile(false);
    conf.setMergeScheduler(mergeScheduler.getMergeScheduler());

    DirectoryReferenceCounter referenceCounter = new DirectoryReferenceCounter(directory, gc, closer);
    // This directory allows for warm up by adding tracing ability.
    TraceableDirectory dir = new TraceableDirectory(referenceCounter);
   
    SimpleTimer simpleTimer = new SimpleTimer();
    simpleTimer.start("writerOpen");
    _writer = new BlurIndexWriter(dir, conf, true);
    simpleTimer.stop("writerOpen");
    simpleTimer.start("nrtSetup");
    _recorder = new TransactionRecorder(shardContext);
    _recorder.replay(_writer);

    _searcherFactory = new SearcherFactory() {
      @Override
      public IndexSearcher newSearcher(IndexReader reader) throws IOException {
        return new IndexSearcherClosableNRT(reader, searchExecutor, _nrtManagerRef, _directory);
      }
    };

    _trackingWriter = new TrackingIndexWriter(_writer);
    _indexImporter = new IndexImporter(_trackingWriter, _lock, _shardContext, TimeUnit.SECONDS, 10);
    _nrtManagerRef.set(new NRTManager(_trackingWriter, _searcherFactory, APPLY_ALL_DELETES));
    // start commiter

    _committer = new Thread(new Committer());
    _committer.setDaemon(true);
    _committer.setName("Commit Thread [" + _tableContext.getTable() + "/" + shardContext.getShard() + "]");
    _committer.start();

    // start refresher
    double targetMinStaleSec = _tableContext.getTimeBetweenRefreshs() / 1000.0;
    _refresher = new NRTManagerReopenThread(getNRTManager(), targetMinStaleSec * 10, targetMinStaleSec);
    _refresher.setName("Refresh Thread [" + _tableContext.getTable() + "/" + shardContext.getShard() + "]");
    _refresher.setDaemon(true);
    _refresher.start();
    simpleTimer.stop("nrtSetup");
    simpleTimer.log(LOG);
  }

  /**
   * The snapshots directory contains a file per snapshot.
   * Name of the file is the snapshot name and it stores the segments filename
   *
   * @return Map<String, String>
   * @throws IOException
   */
  private Map<String, String> loadExistingSnapshots() throws IOException {
    Map<String, String> snapshots = new HashMap<String, String>();
   
    FileSystem fileSystem = getFileSystem();
    FileStatus[] status = fileSystem.listStatus(getSnapshotsDirectoryPath());
   
    for (int i=0;i<status.length;i++){
      FileStatus fileStatus = status[i];
      String snapshotName = fileStatus.getPath().getName();
      // cleanup all tmp files
      if (snapshotName.endsWith(SNAPSHOTS_TMPFILE_EXTENSION)) {
        fileSystem.delete(fileStatus.getPath(), true);
        continue;
      }
      BufferedReader br=new BufferedReader(new InputStreamReader(fileSystem.open(fileStatus.getPath())));
      String segmentsFilename=br.readLine();
      if (segmentsFilename != null){
        snapshots.put(snapshotName, segmentsFilename);
      }
    }
    return snapshots;
  }
 
  private boolean snapshotsDirectoryExists() throws IOException{
    Path shardHdfsDirPath = _shardContext.getHdfsDirPath();
    FileSystem fileSystem = getFileSystem();
    Path shardSnapshotsDirPath = new Path(shardHdfsDirPath, SNAPSHOTS_FOLDER_NAME);
    if (fileSystem.exists(shardSnapshotsDirPath)) {
      return true;
    }
    return false;
  }
 
  @Override
  public void replaceRow(boolean waitToBeVisible, boolean wal, Row row) throws IOException {
    _lock.readLock().lock();
    try {
      List<Record> records = row.records;
      if (records == null || records.isEmpty()) {
        deleteRow(waitToBeVisible, wal, row.id);
        return;
      }
      long generation = _recorder.replaceRow(wal, row, _trackingWriter);
      waitToBeVisible(waitToBeVisible, generation);
    } finally {
      _lock.readLock().unlock();
    }
  }

  @Override
  public void deleteRow(boolean waitToBeVisible, boolean wal, String rowId) throws IOException {
    _lock.readLock().lock();
    try {
      long generation = _recorder.deleteRow(wal, rowId, _trackingWriter);
      waitToBeVisible(waitToBeVisible, generation);
    } finally {
      _lock.readLock().unlock();
    }
  }

  /**
   * The method fetches a reference to the IndexSearcher, the caller is
   * responsible for calling close on the searcher.
   */
  @Override
  public IndexSearcherClosable getIndexReader() throws IOException {
    return resetRunning((IndexSearcherClosable) getNRTManager().acquire());
  }

  private IndexSearcherClosable resetRunning(IndexSearcherClosable indexSearcherClosable) {
    IndexReader indexReader = indexSearcherClosable.getIndexReader();
    if (indexReader instanceof ExitableReader) {
      ExitableReader er = (ExitableReader) indexReader;
      er.getRunning().set(true);
    }
    return indexSearcherClosable;
  }

  private NRTManager getNRTManager() {
    return _nrtManagerRef.get();
  }

  @Override
  public void close() throws IOException {
    // @TODO make sure that locks are cleaned up.
    if (!_isClosed.get()) {
      _isClosed.set(true);
      _indexImporter.close();
      _committer.interrupt();
      _refresher.close();
      try {
        _recorder.close();
        _writer.close(false);
        getNRTManager().close();
      } finally {
        _directory.close();
      }
    }
  }

  @Override
  public void refresh() throws IOException {
    getNRTManager().maybeRefresh();
    _lastRefresh = System.currentTimeMillis();
  }

  @Override
  public AtomicBoolean isClosed() {
    return _isClosed;
  }

  @Override
  public void optimize(int numberOfSegmentsPerShard) throws IOException {
    _writer.forceMerge(numberOfSegmentsPerShard);
  }

  private void waitToBeVisible(boolean waitToBeVisible, long generation) throws IOException {
    if (needsRefresh()) {
      refresh();
    }
    if (waitToBeVisible && getNRTManager().getCurrentSearchingGen() < generation) {
      getNRTManager().waitForGeneration(generation);
    }
  }

  private boolean needsRefresh() {
    if (_lastRefresh + _tableContext.getTimeBetweenRefreshs() < System.currentTimeMillis()) {
      return true;
    }
    return false;
  }

  class Committer implements Runnable {
    @Override
    public void run() {
      synchronized (this) {
        while (!_isClosed.get()) {
          try {
            LOG.debug("Committing of [{0}/{1}].", _tableContext.getTable(), _shardContext.getShard());
            _recorder.commit(_writer);
          } catch (CorruptIndexException e) {
            LOG.error("Curruption Error during commit of [{0}/{1}].", e, _tableContext.getTable(),
                _shardContext.getShard());
          } catch (LockOwnerException e) {
            LOG.info("This shard server no longer owns the lock on [{0}/{1}], closing.", _tableContext.getTable(),
                _shardContext.getShard());
            try {
              close();
            } catch (IOException ex) {
              LOG.error("Unknown error while trying to close [{0}/{1}]", _tableContext.getTable(),
                  _shardContext.getShard());
            }
            return;
          } catch (IOException e) {
            LOG.error("IO Error during commit of [{0}/{1}].", e, _tableContext.getTable(), _shardContext.getShard());
          }
          try {
            wait(_tableContext.getTimeBetweenCommits());
          } catch (InterruptedException e) {
            if (_isClosed.get()) {
              return;
            }
            LOG.error("Unknown error with committer thread [{0}/{1}].", e, _tableContext.getTable(),
                _shardContext.getShard());
          }
        }
      }
    }
  }

  @Override
  public void createSnapshot(String name) throws IOException {
    SnapshotDeletionPolicy snapshotter = getSnapshotter();
    Map<String, String> existingSnapshots = snapshotter.getSnapshots();
    if (existingSnapshots.containsKey(name)) {
      LOG.error("A Snapshot already exists with the same name [{0}] on [{1}/{2}].",
          name, _tableContext.getTable(), _shardContext.getShard());
      throw new IOException("A Snapshot already exists with the same name [" + name + "] on " +
        "["+_tableContext.getTable()+"/"+_shardContext.getShard()+"].");
    }
    _writer.commit();
    IndexCommit indexCommit = snapshotter.snapshot(name);
   
    /*
     * Persist the snapshots info into a tmp file under the snapshots sub-folder
     * and once writing is finished, close the writer.
     * Now rename the tmp file to an actual snapshots file.
     * This make the file write an atomic operation
     *
     * The name of the file is the snapshot name and its contents specify the segments file name
     */
    String segmentsFilename = indexCommit.getSegmentsFileName();
    FileSystem fileSystem = getFileSystem();
    Path shardSnapshotsDirPath = getSnapshotsDirectoryPath();
    BlurUtil.createPath(fileSystem, shardSnapshotsDirPath);
    Path newTmpSnapshotFile = new Path(shardSnapshotsDirPath, name + SNAPSHOTS_TMPFILE_EXTENSION);
    BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fileSystem.create(newTmpSnapshotFile,true)));
    br.write(segmentsFilename);
    br.close();
   
    // now rename the tmp file
    Path newSnapshotFile = new Path(shardSnapshotsDirPath, name);
    fileSystem.rename(newTmpSnapshotFile, newSnapshotFile);
   
    LOG.info("Snapshot [{0}] created successfully on [{1}/{2}].",
        name, _tableContext.getTable(), _shardContext.getShard());
  }

  @Override
  public void removeSnapshot(String name) throws IOException {
    SnapshotDeletionPolicy snapshotter = getSnapshotter();
    Map<String, String> existingSnapshots = snapshotter.getSnapshots();
    if (existingSnapshots.containsKey(name)) {
      snapshotter.release(name);
     
      // now delete the snapshot file stored in the snapshots directory under the shard
      Path snapshotFilePath = new Path(getSnapshotsDirectoryPath(), name);
      getFileSystem().delete(snapshotFilePath, true);
      
      LOG.info("Snapshot [{0}] removed successfully from [{1}/{2}].",
          name, _tableContext.getTable(), _shardContext.getShard());
    } else {
      LOG.error("No Snapshot exists with the name [{0}] on  [{1}/{2}].",
          name, _tableContext.getTable(), _shardContext.getShard());
      throw new IOException("No Snapshot exists with the name [" + name + "] on " +
        "["+_tableContext.getTable()+"/"+_shardContext.getShard()+"].");
    }
  }

  @Override
  public List<String> getSnapshots() throws IOException {
    SnapshotDeletionPolicy snapshotter = getSnapshotter();
    Map<String, String> existingSnapshots = snapshotter.getSnapshots();
    return new ArrayList<String>(existingSnapshots.keySet());
  }

  /**
   * Fetches the snapshotter from the LiveIndexWriterConfig of IndexWriter
   *
   * @return SnapshotDeletionPolicy
   * @throws IOException
   */
  private SnapshotDeletionPolicy getSnapshotter() throws IOException{
    IndexDeletionPolicy idp = _writer.getConfig().getIndexDeletionPolicy();
    if (idp instanceof SnapshotDeletionPolicy) {
      SnapshotDeletionPolicy snapshotter = (SnapshotDeletionPolicy)idp;
      return snapshotter;
    } else {
      LOG.error("The index deletion policy for [{0}/{1}] does not support snapshots.",
          _tableContext.getTable(), _shardContext.getShard());
      throw new IOException("The index deletion policy for ["+_tableContext.getTable()+"/"+_shardContext.getShard()+"]" +
          " does not support snapshots.");
    }
  }
 
  public Path getSnapshotsDirectoryPath() throws IOException {
    Path shardHdfsDirPath = _shardContext.getHdfsDirPath();
    return new Path(shardHdfsDirPath, SNAPSHOTS_FOLDER_NAME);
  }
 
  private FileSystem getFileSystem() throws IOException{
    Path shardHdfsDirPath = _shardContext.getHdfsDirPath();
    Configuration configuration = _shardContext.getTableContext().getConfiguration();
    return shardHdfsDirPath.getFileSystem(configuration);
  }
}
TOP

Related Classes of org.apache.blur.manager.writer.BlurNRTIndex$Committer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.