Package co.cask.cdap.data.stream

Source Code of co.cask.cdap.data.stream.StreamDataFileReader$DefaultPositionStreamEvent

/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;

import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.api.stream.StreamEventData;
import co.cask.cdap.common.io.BinaryDecoder;
import co.cask.cdap.common.io.Decoder;
import co.cask.cdap.common.io.SeekableInputStream;
import co.cask.cdap.common.stream.StreamEventDataCodec;
import co.cask.cdap.data.file.FileReader;
import co.cask.cdap.data.file.ReadFilter;
import co.cask.cdap.internal.io.Schema;
import co.cask.cdap.internal.io.SchemaTypeAdapter;
import com.google.common.base.Stopwatch;
import com.google.common.io.ByteStreams;
import com.google.common.io.InputSupplier;
import com.google.gson.JsonSyntaxException;
import com.google.gson.stream.JsonReader;

import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.annotation.concurrent.NotThreadSafe;

/**
* Class for reading data file written by {@link StreamDataFileWriter}.
*
* @see StreamDataFileWriter
*/
@NotThreadSafe
public final class StreamDataFileReader implements FileReader<PositionStreamEvent, Long> {

  private static final byte[] MAGIC_HEADER = {'E', '1'};

  private final InputSupplier<? extends SeekableInputStream> eventInputSupplier;
  private final InputSupplier<? extends InputStream> indexInputSupplier;
  private final long startTime;
  private final long offset;
  private StreamDataFileIndex index;
  private SeekableInputStream eventInput;
  private long position;
  private byte[] timestampBuffer;
  private long timestamp;
  private int length;
  private boolean closed;
  private boolean eof;
  private Decoder decoder;

  /**
   * Opens a new {@link StreamDataFileReader} with the given inputs.
   *
   * @param eventInputSupplier An {@link InputSupplier} for providing the stream to read events.
   * @return A new instance of {@link StreamDataFileReader}.
   */
  public static StreamDataFileReader create(InputSupplier<? extends SeekableInputStream> eventInputSupplier) {
    return new StreamDataFileReader(eventInputSupplier, null, 0L, 0L);
  }

  /**
   * Opens a new {@link StreamDataFileReader} with the given inputs that starts reading events that are
   * written at or after the given timestamp.
   *
   * @param eventInputSupplier An {@link InputSupplier} for providing the stream to read events.
   * @param indexInputSupplier An {@link InputSupplier} for providing the stream to read event index.
   * @param startTime Timestamp in milliseconds for the event time to start reading with.
   * @return A new instance of {@link StreamDataFileReader}.
   */
  public static StreamDataFileReader createByStartTime(InputSupplier<? extends SeekableInputStream> eventInputSupplier,
                                                       InputSupplier<? extends InputStream> indexInputSupplier,
                                                       long startTime) {
    return new StreamDataFileReader(eventInputSupplier, indexInputSupplier, startTime, 0L);
  }

  /**
   * Opens a new {@link StreamDataFileReader} with the given inputs, which starts reading events at a the smallest
   * event position that is larger than or equal to the given offset.
   *
   * @param eventInputSupplier An {@link InputSupplier} for providing the stream to read events.
   * @param indexInputSupplier An {@link InputSupplier} for providing the stream to read event index.
   * @param offset An arbitrary event file offset.
   * @return A new instance of {@link StreamDataFileReader}.
   */
  public static StreamDataFileReader createWithOffset(InputSupplier<? extends SeekableInputStream> eventInputSupplier,
                                                      InputSupplier<? extends InputStream> indexInputSupplier,
                                                      long offset) {
    return new StreamDataFileReader(eventInputSupplier, indexInputSupplier, 0L, offset);
  }

  private StreamDataFileReader(InputSupplier<? extends SeekableInputStream> eventInputSupplier,
                               InputSupplier<? extends InputStream> indexInputSupplier,
                               long startTime, long offset) {
    this.eventInputSupplier = eventInputSupplier;
    this.indexInputSupplier = indexInputSupplier;
    this.startTime = startTime;
    this.offset = offset;
    this.timestampBuffer = new byte[8];
    this.timestamp = -1L;
    this.length = -1;
  }

  @Override
  public Long getPosition() {
    return position;
  }

  /**
   * Opens this reader to prepare for consumption. Calling this method is optional as the
   * {@link #read(java.util.Collection, int, long, java.util.concurrent.TimeUnit, co.cask.cdap.data.file.ReadFilter)}
   * method would do the initialization if this method hasn't been called.
   *
   * @throws IOException If there is error initializing.
   */
  @Override
  public void initialize() throws IOException {
    try {
      if (eventInput == null) {
        doOpen();
      }
    } catch (IOException e) {
      if (!(e instanceof EOFException || e instanceof FileNotFoundException)) {
        throw e;
      }
      // It's ok if the file doesn't exists or EOF. As that's the tailing behavior.
    }
  }

  @Override
  public void close() throws IOException {
    if (closed) {
      return;
    }
    try {
      if (eventInput != null) {
        eventInput.close();
      }
    } finally {
      closed = true;
    }
  }

  @Override
  public int read(Collection<? super PositionStreamEvent> events, int maxEvents,
                  long timeout, TimeUnit unit) throws IOException, InterruptedException {
    return read(events, maxEvents, timeout, unit, ReadFilter.ALWAYS_ACCEPT);
  }

  @Override
  public int read(Collection<? super PositionStreamEvent> events, int maxEvents,
                  long timeout, TimeUnit unit, ReadFilter readFilter) throws IOException, InterruptedException {
    if (closed) {
      throw new IOException("Reader already closed.");
    }

    int eventCount = 0;
    long sleepNano = computeSleepNano(timeout, unit);
    try {
      Stopwatch stopwatch = new Stopwatch();
      stopwatch.start();

      // Keep reading events until max events.
      while (!eof && eventCount < maxEvents) {
        try {
          if (eventInput == null) {
            doOpen();
          }

          PositionStreamEvent event = nextStreamEvent(readFilter);
          if (event != null) {
            events.add(event);
            eventCount++;
          } else if (eof) {
            break;
          }

          position = eventInput.getPos();

        } catch (IOException e) {
          if (eventInput != null) {
            eventInput.close();
            eventInput = null;
          }

          if (!(e instanceof EOFException || e instanceof FileNotFoundException)) {
            throw e;
          }

          // If end of stream file or no timeout is allowed, break the loop.
          if (eof || timeout <= 0) {
            break;
          }

          if (stopwatch.elapsedTime(unit) > timeout) {
            break;
          }

          TimeUnit.NANOSECONDS.sleep(sleepNano);

          if (stopwatch.elapsedTime(unit) > timeout) {
            break;
          }
        }
      }

      return (eventCount == 0 && eof) ? -1 : eventCount;

    } catch (IOException e) {
      close();
      throw e;
    }
  }

  /**
   * Returns the index for the stream data or {@code null} if index is absent.
   */
  StreamDataFileIndex getIndex() {
    if (index == null && indexInputSupplier != null) {
      index = new StreamDataFileIndex(indexInputSupplier);
    }
    return index;
  }

  /**
   * Opens and initialize this reader.
   */
  private void doOpen() throws IOException {
    try {
      eventInput = eventInputSupplier.getInput();
      decoder = new BinaryDecoder(eventInput);

      if (position <= 0) {
        init();
      }
      eventInput.seek(position);
    } catch (IOException e) {
      position = 0;
      if (eventInput != null) {
        eventInput.close();
        eventInput = null;
      }
      throw e;
    }
  }

  private long computeSleepNano(long timeout, TimeUnit unit) {
    long sleepNano = TimeUnit.NANOSECONDS.convert(timeout, unit) / 10;
    return sleepNano <= 0 ? 1 : sleepNano;
  }

  private void init() throws IOException {
    readHeader();

    // If it is constructed with an arbitrary offset, need to find an event position
    if (offset > 0) {
      initByOffset();
    } else if (startTime > 0) {
      initByTime(startTime);
    }
  }

  private void readHeader() throws IOException {
    // Read the header of the event file
    // First 2 bytes should be 'E' '1'
    byte[] magic = new byte[MAGIC_HEADER.length];
    ByteStreams.readFully(eventInput, magic);

    if (!Arrays.equals(magic, MAGIC_HEADER)) {
      throw new IOException("Unsupported stream file format. Expected magic bytes as 'E' '1'");
    }

    // Read the properties map.
    Map<String, String> properties = StreamUtils.decodeMap(new BinaryDecoder(eventInput));
    verifySchema(properties.get("stream.schema"));

    position = eventInput.getPos();
  }

  private void initByOffset() throws IOException {
    // If index is provided, lookup the position smaller but closest to the offset.
    StreamDataFileIndex index = getIndex();
    long pos = index == null ? 0 : index.floorPosition(offset);
    if (pos > 0) {
      eventInput.seek(pos);
    }

    skipUntil(new SkipCondition() {
      @Override
      public boolean apply(long position, long timestamp) {
        return position >= offset;
      }
    });
  }

  private void initByTime(final long time) throws IOException {
    // If index is provided, lookup the index find the offset closest to start time.
    // If no offset is found, starts from the beginning of the events
    StreamDataFileIndex index = getIndex();
    long offset = index == null ? 0 : index.floorPositionByTime(time);
    if (offset > 0) {
      eventInput.seek(offset);
    }

    skipUntil(new SkipCondition() {
      @Override
      public boolean apply(long position, long timestamp) {
        return timestamp >= time;
      }
    });
  }

  /**
   * Skips events until the given condition is true.
   */
  private void skipUntil(SkipCondition condition) throws IOException {
    long positionBound = position = eventInput.getPos();

    try {
      while (!eof) {
        positionBound = eventInput.getPos();

        // Read timestamp
        long timestamp = readTimestamp();

        // If EOF or condition match, upper bound found. Break the loop.
        if (timestamp == -1L || condition.apply(positionBound, timestamp)) {
          break;
        }

        int len = readLength();
        position = positionBound;

        // Jump to next timestamp
        eventInput.seek(eventInput.getPos() + len);
      }

      if (eof) {
        position = positionBound;
        return;
      }

      // search for the exact StreamData position within the bound.
      eventInput.seek(position);
      while (position < positionBound) {
        if (timestamp < 0) {
          timestamp = readTimestamp();
        }
        if (condition.apply(position, timestamp)) {
          break;
        }
        nextStreamEvent(ReadFilter.ALWAYS_REJECT_OFFSET);
        position = eventInput.getPos();
      }
    } catch (IOException e) {
      // It's ok if hitting EOF, meaning it's could be a live stream file or closed by a dead stream handler.
      if (!(e instanceof EOFException)) {
        throw e;
      }
    }
  }

  private void verifySchema(String schemaStr) throws IOException {
    if (schemaStr == null) {
      throw new IOException("Missing 'stream.schema' property.");
    }

    try {
      Schema schema = new SchemaTypeAdapter().read(new JsonReader(new StringReader(schemaStr)));
      if (!StreamEventDataCodec.STREAM_DATA_SCHEMA.equals(schema)) {
        throw new IOException("Unsupported schema " + schemaStr);
      }

    } catch (JsonSyntaxException e) {
      throw new IOException("Invalid schema.", e);
    }
  }

  private long readTimestamp() throws IOException {
    ByteStreams.readFully(eventInput, timestampBuffer);
    return Bytes.toLong(timestampBuffer);
  }

  private int readLength() throws IOException {
    try {
      return decoder.readInt();
    } catch (IOException e) {
      // If failed to read data block length, reset the timestamp as well,
      // since the position hasn't been updated yet, and is still pointing to timestamp position.
      timestamp = -1L;
      throw e;
    }
  }

  private StreamEventData readStreamData() throws IOException {
    return StreamEventDataCodec.decode(decoder);
  }

  private void skipStreamData() throws IOException {
    StreamEventDataCodec.skip(decoder);
  }

  /**
   * Reads or skips a {@link StreamEvent}.
   *
   * @param filter to determine to accept or skip a stream event by offset
   *               and accept or skip a stream event block by timestamp.
   * @return The next StreamEvent or {@code null} if the event is rejected by the filter or reached EOF.
   */
  private PositionStreamEvent nextStreamEvent(ReadFilter filter) throws IOException {
    // Data block is <timestamp> <length> <stream_data>+
    PositionStreamEvent event = null;
    boolean done = false;

    while (!done) {
      boolean acceptTimestamp = true;
      if (timestamp < 0) {
        timestamp = readTimestamp();
        if (timestamp >= 0) {
          // See if this timestamp is accepted by the filter
          filter.reset();
          acceptTimestamp = filter.acceptTimestamp(timestamp);
          if (!acceptTimestamp) {
            // If not accepted, try to get a hint.
            long nextTimestamp = filter.getNextTimestampHint();

            // If have hint, re-init this reader with the hinted timestamp.
            // The hint must be > timestamp of current block, as stream file can only read forward.
            if (nextTimestamp > timestamp) {
              timestamp = -1L;
              initByTime(nextTimestamp);
              continue;
            }
          }
        }
      }

      // Timestamp == -1 indicate that's the end of file.
      if (timestamp == -1L) {
        eof = true;
        break;
      }

      boolean isReadBlockLength = length < 0;
      if (isReadBlockLength) {
        length = readLength();
      }

      if (isReadBlockLength && !acceptTimestamp) {
        // If able to read block length, but the timestamp filter return false without providing hint,
        // just skip this timestamp block.
        long bytesSkipped = eventInput.skip(length);
        boolean skippedExpected = (bytesSkipped == length);
        timestamp = -1L;
        length = -1;

        if (skippedExpected) {
          continue;
        } else {
          throw new EOFException();
        }
      }

      if (length > 0) {
        long startPos = eventInput.getPos();

        try {
          if (filter.acceptOffset(startPos)) {
            event = new DefaultPositionStreamEvent(readStreamData(), timestamp, startPos);
          } else {
            skipStreamData();
          }
        } catch (IOException e) {
          // If failed to read first event in the data block, reset the timestamp and length to -1
          // This is because position hasn't been updated yet and retry will start from the timestamp position.
          if (isReadBlockLength) {
            timestamp = -1L;
            length = -1;
          }
          throw e;
        }
        long endPos = eventInput.getPos();
        done = true;
        length -= (int) (endPos - startPos);
      }
      if (length == 0) {
        timestamp = -1L;
        length = -1;
      }
    }

    return event;
  }


  private static final class DefaultPositionStreamEvent implements PositionStreamEvent {

    private final StreamEventData delegate;
    private final long timestamp;
    private final long position;

    private DefaultPositionStreamEvent(StreamEventData delegate, long timestamp, long position) {
      this.delegate = delegate;
      this.timestamp = timestamp;
      this.position = position;
    }

    @Override
    public long getTimestamp() {
      return timestamp;
    }

    @Override
    public ByteBuffer getBody() {
      return delegate.getBody();
    }

    @Override
    public Map<String, String> getHeaders() {
      return delegate.getHeaders();
    }

    @Override
    public long getStart() {
      return position;
    }
  }

  private interface SkipCondition {
    boolean apply(long position, long timestamp);
  }
}
TOP

Related Classes of co.cask.cdap.data.stream.StreamDataFileReader$DefaultPositionStreamEvent

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.