Package org.apache.hadoop.hdfs

Source Code of org.apache.hadoop.hdfs.BlockReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;

import static org.apache.hadoop.hdfs.protocol.DataTransferProtocol.Status.CHECKSUM_OK;
import static org.apache.hadoop.hdfs.protocol.DataTransferProtocol.Status.ERROR_ACCESS_TOKEN;
import static org.apache.hadoop.hdfs.protocol.DataTransferProtocol.Status.SUCCESS;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.Socket;
import java.nio.ByteBuffer;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.security.BlockAccessToken;
import org.apache.hadoop.hdfs.security.InvalidAccessTokenException;
import org.apache.hadoop.hdfs.server.common.HdfsConstants;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.DataChecksum;

/** This is a wrapper around connection to datadone
* and understands checksum, offset etc
*/
@InterfaceAudience.Private
public class BlockReader extends FSInputChecker {

  Socket dnSock; //for now just sending checksumOk.
  private DataInputStream in;
  private DataChecksum checksum;

  /** offset in block of the last chunk received */
  private long lastChunkOffset = -1;
  private long lastChunkLen = -1;
  private long lastSeqNo = -1;

  /** offset in block where reader wants to actually read */
  private long startOffset;

  /** offset in block of of first chunk - may be less than startOffset
      if startOffset is not chunk-aligned */
  private final long firstChunkOffset;

  private int bytesPerChecksum;
  private int checksumSize;

  /**
   * The total number of bytes we need to transfer from the DN.
   * This is the amount that the user has requested plus some padding
   * at the beginning so that the read can begin on a chunk boundary.
   */
  private final long bytesNeededToFinish;

  private boolean gotEOS = false;
 
  byte[] skipBuf = null;
  ByteBuffer checksumBytes = null;
  int dataLeft = 0;
 
  /* FSInputChecker interface */
 
  /* same interface as inputStream java.io.InputStream#read()
   * used by DFSInputStream#read()
   * This violates one rule when there is a checksum error:
   * "Read should not modify user buffer before successful read"
   * because it first reads the data to user buffer and then checks
   * the checksum.
   */
  @Override
  public synchronized int read(byte[] buf, int off, int len)
                               throws IOException {
   
    // This has to be set here, *before* the skip, since we can
    // hit EOS during the skip, in the case that our entire read
    // is smaller than the checksum chunk.
    boolean eosBefore = gotEOS;

    //for the first read, skip the extra bytes at the front.
    if (lastChunkLen < 0 && startOffset > firstChunkOffset && len > 0) {
      // Skip these bytes. But don't call this.skip()!
      int toSkip = (int)(startOffset - firstChunkOffset);
      if ( skipBuf == null ) {
        skipBuf = new byte[bytesPerChecksum];
      }
      if ( super.read(skipBuf, 0, toSkip) != toSkip ) {
        // should never happen
        throw new IOException("Could not skip required number of bytes");
      }
    }
   
    int nRead = super.read(buf, off, len);
   
    // if gotEOS was set in the previous read and checksum is enabled :
    if (gotEOS && !eosBefore && nRead >= 0 && needChecksum()) {
      //checksum is verified and there are no errors.
      checksumOk(dnSock);
    }
    return nRead;
  }

  @Override
  public synchronized long skip(long n) throws IOException {
    /* How can we make sure we don't throw a ChecksumException, at least
     * in majority of the cases?. This one throws. */ 
    if ( skipBuf == null ) {
      skipBuf = new byte[bytesPerChecksum];
    }

    long nSkipped = 0;
    while ( nSkipped < n ) {
      int toSkip = (int)Math.min(n-nSkipped, skipBuf.length);
      int ret = read(skipBuf, 0, toSkip);
      if ( ret <= 0 ) {
        return nSkipped;
      }
      nSkipped += ret;
    }
    return nSkipped;
  }

  @Override
  public int read() throws IOException {
    throw new IOException("read() is not expected to be invoked. " +
                          "Use read(buf, off, len) instead.");
  }
 
  @Override
  public boolean seekToNewSource(long targetPos) throws IOException {
    /* Checksum errors are handled outside the BlockReader.
     * DFSInputStream does not always call 'seekToNewSource'. In the
     * case of pread(), it just tries a different replica without seeking.
     */
    return false;
  }
 
  @Override
  public void seek(long pos) throws IOException {
    throw new IOException("Seek() is not supported in BlockInputChecker");
  }

  @Override
  protected long getChunkPosition(long pos) {
    throw new RuntimeException("getChunkPosition() is not supported, " +
                               "since seek is not required");
  }
 
  /**
   * Makes sure that checksumBytes has enough capacity
   * and limit is set to the number of checksum bytes needed
   * to be read.
   */
  private void adjustChecksumBytes(int dataLen) {
    int requiredSize =
      ((dataLen + bytesPerChecksum - 1)/bytesPerChecksum)*checksumSize;
    if (checksumBytes == null || requiredSize > checksumBytes.capacity()) {
      checksumBytes =  ByteBuffer.wrap(new byte[requiredSize]);
    } else {
      checksumBytes.clear();
    }
    checksumBytes.limit(requiredSize);
  }
 
  @Override
  protected synchronized int readChunk(long pos, byte[] buf, int offset,
                                       int len, byte[] checksumBuf)
                                       throws IOException {
    // Read one chunk.
    if ( gotEOS ) {
      // Already hit EOF
      return -1;
    }
   
    // Read one DATA_CHUNK.
    long chunkOffset = lastChunkOffset;
    if ( lastChunkLen > 0 ) {
      chunkOffset += lastChunkLen;
    }
   
    // pos is relative to the start of the first chunk of the read.
    // chunkOffset is relative to the start of the block.
    // This makes sure that the read passed from FSInputChecker is the
    // for the same chunk we expect to be reading from the DN.
    if ( (pos + firstChunkOffset) != chunkOffset ) {
      throw new IOException("Mismatch in pos : " + pos + " + " +
                            firstChunkOffset + " != " + chunkOffset);
    }

    // Read next packet if the previous packet has been read completely.
    if (dataLeft <= 0) {
      //Read packet headers.
      int packetLen = in.readInt();
      long offsetInBlock = in.readLong();
      long seqno = in.readLong();
      boolean lastPacketInBlock = in.readBoolean();
   
      if (LOG.isDebugEnabled()) {
        LOG.debug("DFSClient readChunk got seqno " + seqno +
                  " offsetInBlock " + offsetInBlock +
                  " lastPacketInBlock " + lastPacketInBlock +
                  " packetLen " + packetLen);
      }
     
      int dataLen = in.readInt();
   
      // Sanity check the lengths
      if ( ( dataLen <= 0 && !lastPacketInBlock ) ||
           ( dataLen != 0 && lastPacketInBlock) ||
           (seqno != (lastSeqNo + 1)) ) {
           throw new IOException("BlockReader: error in packet header" +
                                 "(chunkOffset : " + chunkOffset +
                                 ", dataLen : " + dataLen +
                                 ", seqno : " + seqno +
                                 " (last: " + lastSeqNo + "))");
      }
     
      lastSeqNo = seqno;
      dataLeft = dataLen;
      adjustChecksumBytes(dataLen);
      if (dataLen > 0) {
        IOUtils.readFully(in, checksumBytes.array(), 0,
                          checksumBytes.limit());
      }
    }

    // Sanity checks
    assert len >= bytesPerChecksum;
    assert checksum != null;
    assert checksumSize == 0 || (checksumBuf.length % checksumSize == 0);


    int checksumsToRead, bytesToRead;

    if (checksumSize > 0) {

      // How many chunks left in our stream - this is a ceiling
      // since we may have a partial chunk at the end of the file
      int chunksLeft = (dataLeft - 1) / bytesPerChecksum + 1;

      // How many chunks we can fit in databuffer
      //  - note this is a floor since we always read full chunks
      int chunksCanFit = Math.min(len / bytesPerChecksum,
                                  checksumBuf.length / checksumSize);

      // How many chunks should we read
      checksumsToRead = Math.min(chunksLeft, chunksCanFit);
      // How many bytes should we actually read
      bytesToRead = Math.min(
        checksumsToRead * bytesPerChecksum, // full chunks
        dataLeft); // in case we have a partial
    } else {
      // no checksum
      bytesToRead = Math.min(dataLeft, len);
      checksumsToRead = 0;
    }

    if ( bytesToRead > 0 ) {
      // Assert we have enough space
      assert bytesToRead <= len;
      assert checksumBytes.remaining() >= checksumSize * checksumsToRead;
      assert checksumBuf.length >= checksumSize * checksumsToRead;
      IOUtils.readFully(in, buf, offset, bytesToRead);
      checksumBytes.get(checksumBuf, 0, checksumSize * checksumsToRead);
    }

    dataLeft -= bytesToRead;
    assert dataLeft >= 0;

    lastChunkOffset = chunkOffset;
    lastChunkLen = bytesToRead;

    // If there's no data left in the current packet after satisfying
    // this read, and we have satisfied the client read, we expect
    // an empty packet header from the DN to signify this.
    // Note that pos + bytesToRead may in fact be greater since the
    // DN finishes off the entire last chunk.
    if (dataLeft == 0 &&
        pos + bytesToRead >= bytesNeededToFinish) {

      // Read header
      int packetLen = in.readInt();
      long offsetInBlock = in.readLong();
      long seqno = in.readLong();
      boolean lastPacketInBlock = in.readBoolean();
      int dataLen = in.readInt();

      if (!lastPacketInBlock ||
          dataLen != 0) {
        throw new IOException("Expected empty end-of-read packet! Header: " +
                              "(packetLen : " + packetLen +
                              ", offsetInBlock : " + offsetInBlock +
                              ", seqno : " + seqno +
                              ", lastInBlock : " + lastPacketInBlock +
                              ", dataLen : " + dataLen);
      }

      gotEOS = true;
    }

    if ( bytesToRead == 0 ) {
      return -1;
    }

    return bytesToRead;
  }
 
  private BlockReader( String file, long blockId, DataInputStream in,
                       DataChecksum checksum, boolean verifyChecksum,
                       long startOffset, long firstChunkOffset,
                       long bytesToRead,
                       Socket dnSock ) {
    super(new Path("/blk_" + blockId + ":of:" + file)/*too non path-like?*/,
          1, verifyChecksum,
          checksum.getChecksumSize() > 0? checksum : null,
          checksum.getBytesPerChecksum(),
          checksum.getChecksumSize());
   
    this.dnSock = dnSock;
    this.in = in;
    this.checksum = checksum;
    this.startOffset = Math.max( startOffset, 0 );

    // The total number of bytes that we need to transfer from the DN is
    // the amount that the user wants (bytesToRead), plus the padding at
    // the beginning in order to chunk-align. Note that the DN may elect
    // to send more than this amount if the read ends mid-chunk.
    this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset);

    this.firstChunkOffset = firstChunkOffset;
    lastChunkOffset = firstChunkOffset;
    lastChunkLen = -1;

    bytesPerChecksum = this.checksum.getBytesPerChecksum();
    checksumSize = this.checksum.getChecksumSize();
  }

  public static BlockReader newBlockReader(Socket sock, String file, long blockId, BlockAccessToken accessToken,
      long genStamp, long startOffset, long len, int bufferSize) throws IOException {
    return newBlockReader(sock, file, blockId, accessToken, genStamp, startOffset, len, bufferSize,
        true);
  }

  /** Java Doc required */
  public static BlockReader newBlockReader( Socket sock, String file, long blockId,
                                     BlockAccessToken accessToken,
                                     long genStamp,
                                     long startOffset, long len,
                                     int bufferSize, boolean verifyChecksum)
                                     throws IOException {
    return newBlockReader(sock, file, blockId, accessToken, genStamp, startOffset,
                          len, bufferSize, verifyChecksum, "");
  }

  public static BlockReader newBlockReader( Socket sock, String file,
                                     long blockId,
                                     BlockAccessToken accessToken,
                                     long genStamp,
                                     long startOffset, long len,
                                     int bufferSize, boolean verifyChecksum,
                                     String clientName)
                                     throws IOException {
    // in and out will be closed when sock is closed (by the caller)
    DataTransferProtocol.Sender.opReadBlock(
        new DataOutputStream(new BufferedOutputStream(
            NetUtils.getOutputStream(sock,HdfsConstants.WRITE_TIMEOUT))),
        blockId, genStamp, startOffset, len, clientName, accessToken);
   
    //
    // Get bytes in block, set streams
    //

    DataInputStream in = new DataInputStream(
        new BufferedInputStream(NetUtils.getInputStream(sock),
                                bufferSize));
   
    DataTransferProtocol.Status status = DataTransferProtocol.Status.read(in);
    if (status != SUCCESS) {
      if (status == ERROR_ACCESS_TOKEN) {
        throw new InvalidAccessTokenException(
            "Got access token error for OP_READ_BLOCK, self="
                + sock.getLocalSocketAddress() + ", remote="
                + sock.getRemoteSocketAddress() + ", for file " + file
                + ", for block " + blockId + "_" + genStamp);
      } else {
        throw new IOException("Got error for OP_READ_BLOCK, self="
            + sock.getLocalSocketAddress() + ", remote="
            + sock.getRemoteSocketAddress() + ", for file " + file
            + ", for block " + blockId + "_" + genStamp);
      }
    }
    DataChecksum checksum = DataChecksum.newDataChecksum( in );
    //Warning when we get CHECKSUM_NULL?
   
    // Read the first chunk offset.
    long firstChunkOffset = in.readLong();
   
    if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
        firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) {
      throw new IOException("BlockReader: error in first chunk offset (" +
                            firstChunkOffset + ") startOffset is " +
                            startOffset + " for file " + file);
    }

    return new BlockReader( file, blockId, in, checksum, verifyChecksum,
                            startOffset, firstChunkOffset, len,
                            sock );
  }

  @Override
  public synchronized void close() throws IOException {
    startOffset = -1;
    checksum = null;
    // in will be closed when its Socket is closed.
  }
 
  /** kind of like readFully(). Only reads as much as possible.
   * And allows use of protected readFully().
   */
  public int readAll(byte[] buf, int offset, int len) throws IOException {
    return readFully(this, buf, offset, len);
  }
 
  /* When the reader reaches end of a block and there are no checksum
   * errors, we send OP_STATUS_CHECKSUM_OK to datanode to inform that
   * checksum was verified and there was no error.
   */
  void checksumOk(Socket sock) {
    try {
      OutputStream out = NetUtils.getOutputStream(sock, HdfsConstants.WRITE_TIMEOUT);
      CHECKSUM_OK.writeOutputStream(out);
      out.flush();
    } catch (IOException e) {
      // its ok not to be able to send this.
      LOG.debug("Could not write to datanode " + sock.getInetAddress() +
                ": " + e.getMessage());
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.BlockReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.