Package co.cask.cdap.data.stream

Source Code of co.cask.cdap.data.stream.StreamDataFileIndex

/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;

import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.common.io.BinaryDecoder;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
import com.google.common.io.Closeables;
import com.google.common.io.InputSupplier;
import com.google.common.primitives.Longs;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongList;
import it.unimi.dsi.fastutil.longs.LongListIterator;
import it.unimi.dsi.fastutil.longs.LongLists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;

/**
* This class is for loading stream index file and lookup of the index.
*/
final class StreamDataFileIndex {

  private static final Logger LOG = LoggerFactory.getLogger(StreamDataFileIndex.class);

  private static final byte[] INDEX_MAGIC_HEADER = {'I', '1'};

  // Parallel array list for holding timestamps and corresponding positions in the index.
  private final LongList timestamps;
  private final LongList positions;

  /**
   * Constructs with the given input.
   *
   * @param indexInputSupplier Provides {@link InputStream} for reading the index.
   */
  StreamDataFileIndex(InputSupplier<? extends InputStream> indexInputSupplier) {
    LongList timestamps;
    LongList positions;

    // Load the whole index into memory.
    try {
      Map.Entry<LongList, LongList> index;
      InputStream indexInput = indexInputSupplier.getInput();
      try {
        index = loadIndex(indexInput);
      } finally {
        Closeables.closeQuietly(indexInput);
      }
      timestamps = LongLists.unmodifiable(index.getKey());
      positions = LongLists.unmodifiable(index.getValue());
    } catch (IOException e) {
      LOG.error("Failed to load stream index. Default to empty index.", e);
      timestamps = LongLists.EMPTY_LIST;
      positions = LongLists.EMPTY_LIST;
    }
    this.timestamps = timestamps;
    this.positions = positions;
  }

  /**
   * Finds the largest event file position recorded in the index that has timestamp smaller than or equal to the given
   * timestamp.
   *
   * @param timestamp Stream event timestamp to search for.
   * @return The file position or {@code -1} if no record satisfied the requirement can be found.
   */
  long floorPositionByTime(long timestamp) {
    if (timestamps.isEmpty()) {
      return -1;
    }

    // Binary search for a timestamp that is larger than or equals to the given timestamp.
    int idx = binarySearch(timestamps, timestamp);
    if (idx >= 0) {
      return positions.getLong(idx);
    }

    // Return the position that has smaller timestamp than the one to search for.
    // If every timestamp in the index is larger than the given one, return -1.
    return idx == -1 ? -1 : positions.getLong(-idx - 2);
  }

  /**
   * Finds the largest event file position recorded in the index that is smaller than or equal to a given offset.
   *
   * @param offset an arbitrary file offset.
   * @return largest event file position that is smaller than or equal to the given offset.
   *
   */
  long floorPosition(long offset) {
    if (positions.isEmpty()) {
      return 0L;
    }

    int idx = binarySearch(positions, offset);
    if (idx >= 0) {
      return offset;
    }

    return idx == -1 ? 0 : positions.getLong(-idx - 2);
  }

  /**
   * Returns a {@link StreamDataFileIndexIterator} for iterating over all (timestamp, position) pairs.
   */
  StreamDataFileIndexIterator indexIterator() {
    final LongListIterator timestampIter = timestamps.iterator();
    final LongListIterator positionIter = positions.iterator();

    return new StreamDataFileIndexIterator() {

      private long timestamp;
      private long position;

      @Override
      public boolean nextIndexEntry() {
        if (timestampIter.hasNext() && positionIter.hasNext()) {
          timestamp = timestampIter.nextLong();
          position = positionIter.nextLong();
          return true;
        }
        return false;
      }

      @Override
      public long currentTimestamp() {
        return timestamp;
      }

      @Override
      public long currentPosition() {
        return position;
      }
    };
  }

  /**
   * Same contract as {@link Collections#binarySearch(java.util.List, Object)}, except that it works on LongList.
   */
  private int binarySearch(LongList list, long target) {
    // Binary search for a value that is larger than or equals to the given target
    int low = 0;
    int high = list.size() - 1;

    while (low <= high) {
      int mid = ((high - low) >> 1) + low;
      long value = list.getLong(mid);

      if (value < target) {
        low = mid + 1;
      } else if (value > target) {
        high = mid - 1;
      } else {
        // Found, returns the index
        return mid;
      }
    }

    return -(low + 1);
  }

  private Map.Entry<LongList, LongList> loadIndex(InputStream input) throws IOException {
    byte[] magic = new byte[INDEX_MAGIC_HEADER.length];
    ByteStreams.readFully(input, magic);

    if (!Arrays.equals(magic, INDEX_MAGIC_HEADER)) {
      throw new IOException("Unsupported index file format. Expected magic bytes as 'I' '1'");
    }

    // Decode the properties map. In current version, it is not used.
    StreamUtils.decodeMap(new BinaryDecoder(input));

    // Read in all index (timestamp, position pairs).
    LongList timestamps = new LongArrayList(1000);
    LongList positions = new LongArrayList(1000);
    byte[] buf = new byte[Longs.BYTES * 2];

    while (ByteStreams.read(input, buf, 0, buf.length) == buf.length) {
      timestamps.add(Bytes.toLong(buf, 0));
      positions.add(Bytes.toLong(buf, Longs.BYTES));
    }

    return Maps.immutableEntry(timestamps, positions);
  }
}
TOP

Related Classes of co.cask.cdap.data.stream.StreamDataFileIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.