Package com.linkedin.databus.core

Source Code of com.linkedin.databus.core.ConcurrentAppendableCompositeFileInputStream$InputStreamEnumerator$ParserLag

package com.linkedin.databus.core;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.log4j.Logger;

import com.linkedin.databus.core.ConcurrentAppendableCompositeFileInputStream.InputStreamEnumerator.ParserLag;
import com.linkedin.databus.core.TrailFileNotifier.TrailFileListener;
import com.linkedin.databus.core.TrailFileNotifier.TrailFileManager;
import com.linkedin.databus.core.util.NamedThreadFactory;
import com.linkedin.databus.core.util.RateMonitor;
import com.linkedin.databus.monitoring.mbean.GGParserStatistics;
import com.linkedin.databus2.core.DatabusException;

/**
* A composite InputStream that sequentially reads from an ordered and filtered list of
* files found inside the input directory and takes care of reading files that are concurrently appended
* in a defined order.
*
* <pre>
*           __________________________________________________
*        1 |                                                  | 1
*  ______________             _____________           ______ ______           _______________
* |              | 1       1 |             | 1     1 |             |1      *  |              |
* | CompositeFIS | --------->| SequenceFIS |-------->| Enumerator  |--------> |  SingleFIS   |
* |______________|           |_____________|         |_____________|          |______________|
*         |1
*         |
*         |
*         |
*         |
*         |1
* _________________
* |               |
* |    TrailFile  |
* |     Notifier  |
* |_______________|
*
*
* CompositeFIS : ConcurrentAppendableCompositeFileInputStream
* SingleFIS : ConcurrentAppendableSingleFileInputStream
* </pre>
*/
public class ConcurrentAppendableCompositeFileInputStream
  extends InputStream
  implements TrailFileListener
{
  public static final String MODULE = ConcurrentAppendableCompositeFileInputStream.class.getName();
  public static final Logger LOG = Logger.getLogger(MODULE);

  /** Directory under which files need to be streamed **/
  private final File _dir;

  /** Underlying Sequence File InputStream **/
  private SequenceInputStream _seqStream = null;

  /** Agent to poll for new files, order them, find deltas and notify this class */
  private final TrailFileNotifier _trailFileLocator;

  /** Client Specified ranker and filter callback **/
  private final TrailFileManager _trailFileManager;

  /** Low Watermark file. All files equal to and greater than this in the ordering defined by FileNameRankFilter will be streamed **/
  private File _latestFile;

  /** Offset in the startFile from which streaming has to start **/
  private final long _initialFileOffset;

  /** Number of byte read calls for this stream */
  private volatile long _numReadCalls = 0;

  /** Number of byte read calls that resulted in a valid byte being sent **/
  private volatile long _numReadCallsWithData = 0;

  /**
   * Enumerator which provides the never-ending stream of files
   *   and implements interface needed for SequenceInputStream
   **/
  private InputStreamEnumerator _streamEnumerator = null;

  /**
   * Lazy initialization is done on streams. This flag is used to denote if initialization is done.
   */
  private boolean _initDone = false;

  /**
   * Rate Monitor to track read throughput

   */
  private final RateMonitor _rateMonitor = new RateMonitor("ConcurrentAppendableCompositeFileInputStream");

  /** Are files in the directory be treated like static (not written to concurrently */
  private final boolean _staticStream;

  private boolean _closed = false;

  /** First Trail File Flag for setting offsets **/
  private boolean _firstTrailFile = true;

  /** executor for stats updating task */
  private ScheduledThreadPoolExecutor _executor;
  private final long STATS_COLLECTION_PERIOD = 5000; //5 seconds

  private GGParserStatistics _ggParserStats = null;
  public void setGGParserStats(GGParserStatistics st) {
    _ggParserStats = st;
  }

  /**
   *
   * Construct a trail directory input stream.
   * @param dir : directory from which trail files will be picked
   * @param filter : Filter and Sorted callback provided to filter and rank the files by trail order
   * @param staticStream : Are files in the directory be treated like static files. Parser should set this to false
   */
  public ConcurrentAppendableCompositeFileInputStream(String dir,
                              TrailFileManager filter,
                                                      boolean staticStream)
                      throws IOException
  {
    this(dir,null,-1L,filter,staticStream);
  }

  @Override
  public String toString()
  {
    return "ConcurrentAppendableCompositeFileInputStream{" +
        "_dir=" + _dir +
        ", _seqStream=" + _seqStream +
        ", _trailFileLocator=" + _trailFileLocator +
        ", _trailFileManager=" + _trailFileManager +
        ", _latestFile=" + _latestFile +
        ", _initialFileOffset=" + _initialFileOffset +
        ", _numReadCalls=" + _numReadCalls +
        ", _numReadCallsWithData=" + _numReadCallsWithData +
        ", _streamEnumerator=" + _streamEnumerator +
        ", _initDone=" + _initDone +
        ", _rateMonitor=" + _rateMonitor +
        ", _staticStream=" + _staticStream +
        ", _closed=" + _closed +
        ", _firstTrailFile=" + _firstTrailFile +
        '}';
  }

  /**
   *
   * Construct a trail directory input stream.
   * @param dir : directory from which trail files will be picked
   * @param startFile : current file from which data has to be recorded.
   * @param offset : Offset to the startFile from which streaming should start. If offset <= 0, then the entire file is read...
   * @param filter : Filter and Sorted callback provided to filter and rank the files by trail order
   * @param staticStream : Are files in the directory be treated like static files. Parser should set this to false
   * @throws IOException
   */
  public ConcurrentAppendableCompositeFileInputStream(String dir,
                                          String startFile,
                                          long offset,
                                          TrailFileManager filter,
                                          boolean staticStream)
          throws IOException
  {
    validateDir(dir);

    _dir = new File(dir);
    _trailFileManager = filter;
    _latestFile = (startFile == null) ? null : new File(_dir.getAbsolutePath() + "/" + startFile);
    _initialFileOffset = offset;
    _staticStream = staticStream;

    _trailFileLocator = new TrailFileNotifier(_dir, _trailFileManager, _latestFile, 10, this);
    _rateMonitor.start();
    _rateMonitor.suspend();

    // start timer to collect stats
    _executor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("StatsTime for " + dir, true)); // add directory to the thread name
    _executor.scheduleWithFixedDelay( new Runnable() {
      @Override
      public void run() {
        updateParserStats();
      }
    }, STATS_COLLECTION_PERIOD, STATS_COLLECTION_PERIOD, TimeUnit.MILLISECONDS);

  }

  /**
   * Validate directory
   * @param dir
   * @throws IOException
   */
  private void validateDir(String dir)
      throws IOException
  {
    File d = new File(dir);

    if (! d.isDirectory())
      throw new IOException("Path (" + dir +") does not exist or is not a directory !!");
  }


  /**
   * Sets up the sequence stream from the trail files located.
   * @throws IOException
   */
  public void initializeStream()
      throws IOException
  {
    _streamEnumerator = new InputStreamEnumerator(_staticStream, _trailFileManager, _ggParserStats);
    boolean success = _trailFileLocator.fetchOneTime();

    if ( ! success )
      return ;

    _seqStream = new SequenceInputStream(_streamEnumerator);

    if ( ! _staticStream)
      _trailFileLocator.start(); // Note: TrailFileLocator needs to be started only after seqStream is constructed.
    _initDone = true;
  }


  public RateMonitor getRateMonitor()
  {
    return _rateMonitor;
  }

  public enum ReadCall
  {
    READ_ONE_BYTE,
    READ_BULK,
    READ_BULK_WITH_OFFSETS
  };

  private int readData(byte[] b, int off, int len, ReadCall readCall) throws IOException
  {
    // Initialize stream first time
    if ( ! _initDone)
    {
      initializeStream();
    }
    int ret = ConcurrentAppendableSingleFileInputStream.EOF;
    int numBytesRead = 0;

    _rateMonitor.resume();
    if ( null != _seqStream)
    {
      switch(readCall)
      {
        case READ_BULK: ret = _seqStream.read(b);
                        numBytesRead = ret;
                        break;
        case READ_BULK_WITH_OFFSETS: ret = _seqStream.read(b, off, len);
                                     numBytesRead = ret;
                                     break;
        case READ_ONE_BYTE: ret = _seqStream.read();
                                  numBytesRead = 1;
      }
    }

    if (ret != ConcurrentAppendableSingleFileInputStream.EOF)
    {
      _numReadCallsWithData++;
      _rateMonitor.ticks(numBytesRead);

      if(_ggParserStats != null)
        _ggParserStats.addBytesParsed(_rateMonitor.getNumTicks());
    }
    _rateMonitor.suspend();

    _numReadCalls++;
    return ret;
  }

  @Override
  public int read() throws IOException
  {
    return readData(null, -1, -1, ReadCall.READ_ONE_BYTE);
  }

  @Override
  public int read(byte[] b) throws IOException
  {
    return readData(b, -1, -1, ReadCall.READ_BULK);
  }

  @Override
  public int read(byte[] b, int off, int len) throws IOException
  {
    return readData(b, off, len, ReadCall.READ_BULK_WITH_OFFSETS);
  }

  @Override
  public void close() throws IOException
  {
    if(_executor != null)
      _executor.shutdownNow();

    if ( (null != _trailFileLocator) && (_trailFileLocator.isAlive()))
      _trailFileLocator.shutdown();

    closeStream();
  }

  private void closeStream() throws IOException
  {
    if (_closed)
    return;

    LOG.info("Closing Stream !!");

    if(_streamEnumerator != null)
      _streamEnumerator.close();

    if (null != _seqStream)
      _seqStream.close();

    LOG.info("Stream closed !!");
    _closed = true;
  }

  @Override
  public void onNewTrailFile(File file) throws DatabusException
  {
    long offset = 0;

    if ( _firstTrailFile)
    {
      offset = _initialFileOffset;
      // Defensive Check to ensure we are getting the file we are expecting
      if ((null != _latestFile) && (! file.equals(_latestFile)) )
      {
        throw new DatabusException("Expected to get " + _latestFile + " but got " + file);
      }
      _firstTrailFile = false;
    } else {
      // Defensive Check to ensure we are not seeing old files.
      if (!_trailFileManager.isNextFileInSequence(_latestFile, file))
      {
        throw new DatabusException("Expected to get next file to " + _latestFile + " but got " + file);
      }
    }
    _latestFile = file;
    _streamEnumerator.enqueueNewTrailFile(new FilePosition(file, offset));
  }

  @Override
  public void onError(Throwable ex)
  {
    LOG.error("Closing the inputStream because of error", ex);

    try {
      closeStream();
    } catch (IOException e) {
      LOG.error("Unable to close the inputStream", e);
    }
  }

  protected TrailFileNotifier getTrailFileNotifier()
  {
    return _trailFileLocator;
  }

  public long getNumReadCalls()
  {
    return _numReadCalls;
  }

  public long getNumReadCallsWithData()
  {
    return _numReadCallsWithData;
  }

  public File getCurrentFile()
  {
    ConcurrentAppendableSingleFileInputStream s = _streamEnumerator.getCurrStream();

    if ( null == s)
      return null;

    return s.getFile();
  }

  public long getCurrentPosition()
  {
    ConcurrentAppendableSingleFileInputStream s = _streamEnumerator.getCurrStream();

    if ( null == s)
      return -1;

    return s.getCurrPosition();
  }

  public boolean isClosed()
  {
    return _closed;
  }

  /**
   *
   * Stream which returns EOF for any read. Used as start and end stream(on shutdown).
   */
  public static class EOFStream
    extends InputStream
  {
    @Override
    public int read() throws IOException {
      return ConcurrentAppendableSingleFileInputStream.EOF;
    }

  }

  private static class FilePosition
  {
  @Override
    public String toString()
    {
      return "FilePosition [file=" + file + ", offset=" + offset + "]";
    }

    private final File file;
  private final long offset;

  public FilePosition(File file, long offset) {
    super();
    this.file = file;
    this.offset = offset;
  }

  public File getFile() {
    return file;
  }

  public long getOffset() {
    return offset;
  }
  }


  /**
   *
   * Enumerator for sequencing input-stream from trail files used by SequenceInputStream.
   * This enumerator never gets exhausted (hasMoreElements() always true (unless closed explicitly) and nextElement() blocks if no more files !!)
   *
   */
  public static class InputStreamEnumerator
    implements Enumeration<InputStream>
  {
    private ConcurrentAppendableSingleFileInputStream _currStream = null;
    private final Queue<FilePosition> _newTrailFiles = new ConcurrentLinkedQueue<FilePosition>();
    private final boolean _isStaticStream;

    private final Lock _lock = new ReentrantLock();
    private final Condition _notEmpty = _lock.newCondition();

    private boolean _firstStream = true;
    private volatile boolean _closed = false;

    private final InputStream _beginStream = new EOFStream();
    private final InputStream _endStream = new EOFStream();

    private final Map<String, Long> _inputFileSizes = Collections.synchronizedMap(new HashMap<String, Long>());
    private final Map<String, Long> _inputFileModTimes = Collections.synchronizedMap(new HashMap<String, Long>());

    private final TrailFileManager _trailFileFilter;
    private final GGParserStatistics _ggParserStats;

    // to avoid updates for the same file
    private File _mostRecentParsedFile;
    private File _mostRecentFileAdded = null;
    private long _mostRecentFileAddedSize=0;


    public InputStreamEnumerator(boolean staticStream, TrailFileManager trailFileFilter, GGParserStatistics ggParserStats)
        throws IOException
    {
      _isStaticStream = staticStream;
      _trailFileFilter = trailFileFilter;
      _ggParserStats = ggParserStats;
    }

    /**
     * describes the lag between the file currently being processed and latest file available
     * bytesLag - between position in the currentStream file and end of the latest trail file
     * timeLag - timeStamp_of_the_current_file - timeStamp_of_the_latest_file
     * filesLag - number of files between current file and the latest file (including latest, excluding current)
     */
    public static class ParserLag {
      private final long _bytesLag;
      private final long _tsBegin;
      private final long _tsEnd;
      private final int _filesLag;
      public ParserLag(long byteLag, long tsBegin, long tsEnd, int fileLag) {
        _bytesLag = byteLag;
        _tsBegin = tsBegin;
        _tsEnd = tsEnd;
        _filesLag = fileLag;
      }

      public long getBytesLag() { return _bytesLag; }
      public long getTSBegin() { return _tsBegin; }
      public long getTSEnd() { return _tsEnd; }
      public int getFilesLag() { return _filesLag; }

      @Override
      public String toString() { return "LAG:bytes="+_bytesLag+";files="+_filesLag+"; tsBegin="+_tsBegin + ";tsEnd=" + _tsEnd; }
    }

    /**
     *
     * @return number of bytes/files/seconds between current position and the end of latest file
     */
    public ParserLag calculateLag() {
      File trailFile = null;
      int totalBytes = 0;
      int totalFiles = 0;
      long modTSFirst = 0, modTSLast = 0; // mod times of the files in the queue
      Long size=null;
      Long modTime;

      _lock.lock();
      try {
        // check the current file being parsed (it is not in the queue anymore)
        if(_currStream != null) {
          File fc = _currStream.getFile();
          size = _inputFileSizes.get(fc.getAbsolutePath());
          if(size == null || _newTrailFiles.size()==0) { //if no new files, this file may have been appended
            size = Long.valueOf(fc.length());
            _inputFileSizes.put(fc.getAbsolutePath(), size);
          }
          // adjust the offset
          totalBytes += size - _currStream.getCurrPosition();

          // get the mod time
          modTime = _inputFileModTimes.get(fc.getAbsolutePath());
          if(modTime == null || _newTrailFiles.size()==0) { //if no new files, this file may have been appended
            modTime = Long.valueOf(fc.lastModified());
            _inputFileModTimes.put(fc.getAbsolutePath(), modTime);
          }
          modTSFirst = modTime;
          modTSLast = modTime;  //only for the current file
        }


        // add-up all the files in the queue
        for(FilePosition fp : _newTrailFiles) {
          trailFile = fp.getFile();
          size = _inputFileSizes.get(trailFile.getAbsolutePath()); // cache values to avoid extra disk op
          if(size == null) {
            size = Long.valueOf(trailFile.length());
            _inputFileSizes.put(trailFile.getAbsolutePath(), size);
          }
          totalBytes += size.longValue();
          totalFiles ++;

          // mod time - we just want it for the first file
          if(modTSFirst == 0) {
            modTime = _inputFileModTimes.get(trailFile.getAbsolutePath());
            if(modTime == null) {
              modTime = Long.valueOf(trailFile.lastModified());
              _inputFileModTimes.put(trailFile.getAbsolutePath(), modTime);
            }
            modTSFirst = modTime;
          }
        }
        // Since last file may be appended - we need to re-check it every time
        // 'trailFile' is the file reference from the loop above.
        // if it is not null - this is the file that may be appended
        if(trailFile != null) {
          // get current size, totalBytes already includes its size from the previous loop and it may be incorrect
          totalBytes -= size.longValue(); // undo prev size of this file and get a new one
          size = Long.valueOf(trailFile.length());
          _inputFileSizes.remove(trailFile.getAbsolutePath()); // since file may be appended do not cache its size
          totalBytes += size.longValue();

          // get current modTime
          modTime = Long.valueOf(trailFile.lastModified());
          _inputFileModTimes.remove(trailFile.getAbsolutePath());

          modTSLast = modTime;
        }
      } finally {
        _lock.unlock();
      }
      return new ParserLag(totalBytes, modTSFirst, modTSLast, totalFiles);
    }

    @Override
    public String toString()
    {
      StringBuffer toString = new StringBuffer("InputStreamEnumerator{" +
          "_currStream=" + _currStream +
          ", numberOfNewTrailFiles=" + _newTrailFiles.size() +
          ", _isStaticStream=" + _isStaticStream +
          ", _lock=" + _lock +
          ", _notEmpty=" + _notEmpty +
          ", _firstStream=" + _firstStream +
          ", _closed=" + _closed +
          ", _beginStream=" + _beginStream +
          ", _endStream=" + _endStream);

      if(LOG.isDebugEnabled())
        toString.append(", _newTrailFiles =  " + _newTrailFiles);

      return toString.append('}').toString();
    }

    @Override
    public  boolean hasMoreElements()
    {
      _lock.lock();
      try
      {
        if ( _closed )
          return false;

        // Always return true for non-static stream unless closed.
        if ( !_isStaticStream)
          return true;

        return !_newTrailFiles.isEmpty();
      } finally {
        _lock.unlock();
      }
    }

    @Override
    public  InputStream nextElement()
    {
      if(LOG.isDebugEnabled())
        LOG.debug("Moving to next InputStream !!");
      InputStream stream = null;
      _lock.lock();
      try
      {
        if ( _firstStream)
        {
          /**
           *  First Stream has to be set because SequenceInputStream constructor calls nextElement() directly.
           *  Without this, the the thread could block if no files available at that time.
           */
          stream = _beginStream;
          _firstStream = false;
          return stream;
        }

        if ( _closed )
          return _endStream;

        while ( (_newTrailFiles.isEmpty()) && (!_closed))
          _notEmpty.await();

        if ( _closed )
          return null;

        if(_currStream != null) {
          LOG.info("about to switch to the next file from " + _currStream.getFile());

          // file is done. remove its info from the caches
          _inputFileSizes.remove(_currStream.getFile().getAbsolutePath());
          _inputFileModTimes.remove(_currStream.getFile().getAbsolutePath());

        }

        FilePosition newFilePos = _newTrailFiles.poll();

        try {
          if(_isStaticStream || (!_newTrailFiles.isEmpty()))
            _currStream = ConcurrentAppendableSingleFileInputStream.createStaticFileInputStream(
                                               newFilePos.getFile().getAbsolutePath(), newFilePos.getOffset());
          else
            _currStream =  ConcurrentAppendableSingleFileInputStream.createAppendingFileInputStream(
                                               newFilePos.getFile().getAbsolutePath(), newFilePos.getOffset(),100);

          stream = _currStream;

          // update the stats
          parserFileStarted(newFilePos.getFile());

        } catch (Exception e) {
          LOG.error("Unable to construct stream for trail filePos (" + newFilePos + ")",e);
          throw new RuntimeException("Unable to construct stream for trail filePos (" + newFilePos + ")",e);
        }
      } catch (InterruptedException ie ) {
        LOG.error("Got interrupted while waiting for new trail files !!", ie);
      } finally {
        _lock.unlock();
      }
      return stream;
    }

    /**
     * Called by TrailFileNotifier thread if it finds new file in the order defined by the FileNameRanker object.
     * @param file
     */
    public void enqueueNewTrailFile(FilePosition filePos)
    {
      _lock.lock();

      try
      {
        if (LOG.isDebugEnabled())
          LOG.debug("Enqueueing new trail file Position: " + filePos);

        if ( _closed)
        {
          LOG.error("Trying to insert new filePos (" + filePos + ") when in closed state. Skipping !!");
          return;
        }

        // for metrics collection
        parserAddNewFile(filePos.getFile());

        if ( _newTrailFiles.isEmpty())
        {
          if ( null != _currStream)
            _currStream.appendDone();
        }
        _newTrailFiles.offer(filePos);
        _notEmpty.signal();
      } finally {
        _lock.unlock();
      }
    }

    public void close()
    {
      _lock.lock();
      try
      {
        _closed = true;
        _notEmpty.signal(); //to wakeup thread waiting on this
      } finally {
        _lock.unlock();
      }
    }

    public ConcurrentAppendableSingleFileInputStream getCurrStream() {
      return _currStream;
    }


    /**
     * new trail file has been added.
     * @param f
     */
    public void parserAddNewFile(File f)
    {
      if(_mostRecentFileAdded != null && _trailFileFilter.compareFileName(f, _mostRecentFileAdded) <= 0) {
        LOG.warn("adding file that has been added before=" + f + ". ignoring.");
        return;
      }
      if(_ggParserStats != null)
        _ggParserStats.addNewFile(1); //update stats

    }

    /**
     * parsing of next file started
     * @param f
     */
    public void parserFileStarted(File f)
    {
      if(_mostRecentParsedFile != null && _trailFileFilter.compareFileName(f, _mostRecentParsedFile) <= 0) {
        LOG.warn("skipping update for the file("+f+") because it has been recorded already. most recent file=" + _mostRecentParsedFile);
        return// avoid updates for the same file
      }

      if(_ggParserStats != null)
        _ggParserStats.addParsedFile(1);

      _mostRecentParsedFile = f;
    }
  }


  private void updateParserStats() {
    if(_streamEnumerator == null || (_ggParserStats == null))
      return;


    ParserLag pLag = _streamEnumerator.calculateLag();
    if(LOG.isDebugEnabled())
      LOG.debug("running updateParserStats. lag=" + pLag);

    _ggParserStats.setBytesLag(pLag.getBytesLag());
    _ggParserStats.setFilesLag(pLag.getFilesLag());
    _ggParserStats.setModTimeLag(pLag.getTSBegin(), pLag.getTSEnd());
  }

}
TOP

Related Classes of com.linkedin.databus.core.ConcurrentAppendableCompositeFileInputStream$InputStreamEnumerator$ParserLag

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.