Package org.apache.avro.file

Source Code of org.apache.avro.file.DataFileReader$SeekableBufferedInput$PositionFilter

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.file;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import org.apache.avro.Schema;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.BinaryDecoder;

/** Read files written by {@link DataFileWriter}.
* @see DataFileWriter
*/
public class DataFileReader<D> {

  private Schema schema;
  private DatumReader<D> reader;
  private SeekableBufferedInput in;
  private Decoder vin;

  private Map<String,byte[]> meta = new HashMap<String,byte[]>();

  private long count;                           // # entries in file
  private long blockCount;                      // # entries in block
  private byte[] sync = new byte[DataFileWriter.SYNC_SIZE];
  private byte[] syncBuffer = new byte[DataFileWriter.SYNC_SIZE];

  /** Construct a reader for a file. */
  public DataFileReader(SeekableInput sin, DatumReader<D> reader)
    throws IOException {
    this.in = new SeekableBufferedInput(sin);

    byte[] magic = new byte[4];
    in.read(magic);
    if (!Arrays.equals(DataFileWriter.MAGIC, magic))
      throw new IOException("Not a data file.");

    long length = in.length();
    in.seek(length-4);
    int footerSize=(in.read()<<24)+(in.read()<<16)+(in.read()<<8)+in.read();
    in.seek(length-footerSize);
    this.vin = new BinaryDecoder(in);
    long l = vin.readMapStart();
    if (l > 0) {
      do {
        for (long i = 0; i < l; i++) {
          String key = vin.readString(null).toString();
          ByteBuffer value = vin.readBytes(null);
          byte[] bb = new byte[value.remaining()];
          value.get(bb);
          meta.put(key, bb);
        }
      } while ((l = vin.mapNext()) != 0);
    }

    this.sync = getMeta("sync");
    this.count = getMetaLong("count");
    this.schema = Schema.parse(getMetaString("schema"));
    this.reader = reader;

    reader.setSchema(schema);

    in.seek(DataFileWriter.MAGIC.length);         // seek to start
  }

  /** Return the value of a metadata property. */
  public synchronized byte[] getMeta(String key) {
    return meta.get(key);
  }
  /** Return the value of a metadata property. */
  public synchronized String getMetaString(String key) {
    try {
      return new String(getMeta(key), "UTF-8");
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
  }
  /** Return the value of a metadata property. */
  public synchronized long getMetaLong(String key) {
    return Long.parseLong(getMetaString(key));
  }

  /** Return the next datum in the file. */
  public synchronized D next(D reuse) throws IOException {
    while (blockCount == 0) {                     // at start of block

      if (in.tell() == in.length())               // at eof
        return null;

      skipSync();                                 // skip a sync

      blockCount = vin.readLong();                // read blockCount
        
      if (blockCount == DataFileWriter.FOOTER_BLOCK) {
        in.seek(vin.readLong()+in.tell());        // skip a footer
        blockCount = 0;
      }
    }
    blockCount--;
    return reader.read(reuse, vin);
  }

  private void skipSync() throws IOException {
    vin.readFixed(syncBuffer);
    if (!Arrays.equals(syncBuffer, sync))
      throw new IOException("Invalid sync!");
  }

  /** Move to the specified synchronization point, as returned by {@link
   * DataFileWriter#sync()}. */
  public synchronized void seek(long position) throws IOException {
    in.seek(position);
    blockCount = 0;
  }

  /** Move to the next synchronization point after a position. */
  public synchronized void sync(long position) throws IOException {
    if (in.tell()+DataFileWriter.SYNC_SIZE >= in.length()) {
      in.seek(in.length());
      return;
    }
    in.seek(position);
    vin.readFixed(syncBuffer);
    for (int i = 0; in.tell() < in.length(); i++) {
      int j = 0;
      for (; j < sync.length; j++) {
        if (sync[j] != syncBuffer[(i+j)%sync.length])
          break;
      }
      if (j == sync.length) {                     // position before sync
        in.seek(in.tell() - DataFileWriter.SYNC_SIZE);
        return;
      }
      syncBuffer[i%sync.length] = (byte)in.read();
    }
  }

  /** Close this reader. */
  public synchronized void close() throws IOException {
    in.close();
  }

  private class SeekableBufferedInput extends BufferedInputStream {
    private long position;                        // end of buffer
    private long length;                          // file length

    private class PositionFilter extends InputStream {
      private SeekableInput in;
      public PositionFilter(SeekableInput in) throws IOException {
        this.in = in;
      }
      public int read() { throw new UnsupportedOperationException(); }
      public int read(byte[] b, int off, int len) throws IOException {
        int value = in.read(b, off, len);
        if (value > 0) position += value;         // update on read
        return value;
      }
    }

    public SeekableBufferedInput(SeekableInput in) throws IOException {
      super(null);
      this.in = new PositionFilter(in);
      this.length = in.length();
    }

    public void seek(long p) throws IOException {
      if (p < 0) throw new IOException("Illegal seek: "+p);
      long start = position - count;
      if (p >= start && p < position) {            // in buffer
        this.pos = (int)(p - start);
      } else {                                     // not in buffer
        this.pos = 0;
        this.count = 0;
        ((PositionFilter)in).in.seek(p);
        this.position = p;
      }
    }

    public long tell() { return position-(count-pos); }
    public long length() throws IOException { return length; }

    public int read() throws IOException {        // optimized implementation
      if (pos >= count) return super.read();
      return buf[pos++] & 0xff;
    }

    public long skip(long skip) throws IOException { // optimized implementation
      if (skip > count-pos)
        return super.skip(skip);
      pos += (int)skip;
      return skip;
    }
  }

}
TOP

Related Classes of org.apache.avro.file.DataFileReader$SeekableBufferedInput$PositionFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.