Package org.apache.hadoop.hdfs.server.datanode

Source Code of org.apache.hadoop.hdfs.server.datanode.BlockWithChecksumFileReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.Socket;
import java.net.SocketException;
import java.nio.channels.FileChannel;
import java.util.Arrays;

import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.SocketOutputStream;
import org.apache.hadoop.util.ChecksumUtil;
import org.apache.hadoop.util.CrcConcat;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.StringUtils;

/**
* Read from blocks with separate checksum files.
* Block file name:
* blk_(blockId)
*
* Checksum file name:
* blk_(blockId)_(generation_stamp).meta
*
* The on disk file format is:
* Data file keeps just data in the block:
*
*  +---------------+
*  |               |
*  |     Data      |
*  |      .        |
*  |      .        |
*  |      .        |
*  |      .        |
*  |      .        |
*  |      .        |
*  |               |
*  +---------------+
*  Checksum file:
*  +----------------------+
*  |   Checksum Header    |
*  +----------------------+
*  | Checksum for Chunk 1 |
*  +----------------------+
*  | Checksum for Chunk 2 |
*  +----------------------+
*  |          .           |
*  |          .           |
*  |          .           |
*  +----------------------+
*  |  Checksum for last   |
*  |   Chunk (Partial)    |
*  +----------------------+
*
*/
public class BlockWithChecksumFileReader extends DatanodeBlockReader {
  private InputStreamWithChecksumFactory streamFactory;
  private DataInputStream checksumIn; // checksum datastream
  private BlockDataFile.Reader blockDataFileReader;
  boolean useTransferTo = false;
  MemoizedBlock memoizedBlock;

  BlockWithChecksumFileReader(int namespaceId, Block block,
      boolean isFinalized,  boolean ignoreChecksum,
      boolean verifyChecksum, boolean corruptChecksumOk,
      InputStreamWithChecksumFactory streamFactory) throws IOException {
    super(namespaceId, block, isFinalized, ignoreChecksum, verifyChecksum,
        corruptChecksumOk);
    this.streamFactory = streamFactory;
    this.checksumIn = streamFactory.getChecksumStream();
    this.block = block;
  }
 
  @Override
  public void fadviseStream(int advise, long offset, long len)
      throws IOException {
    blockDataFileReader.posixFadviseIfPossible(offset, len, advise);
  }

  private void initializeNullChecksum() {
    checksumIn = null;
    // This only decides the buffer size. Use BUFFER_SIZE?
    checksum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL,
        16 * 1024);
  }

  public DataChecksum getChecksumToSend(long blockLength) throws IOException {
    if (!corruptChecksumOk || checksumIn != null) {

      // read and handle the common header here. For now just a version
      try {
      BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
      short version = header.getVersion();

      if (version != FSDataset.FORMAT_VERSION_NON_INLINECHECKSUM) {
        LOG.warn("Wrong version (" + version + ") for metadata file for "
            + block + " ignoring ...");
      }
      checksum = header.getChecksum();
      } catch (IOException ioe) {
        if (blockLength == 0) {
          initializeNullChecksum();
        } else {
          throw ioe;
        }
      }
    } else {
      LOG.warn("Could not find metadata file for " + block);
      initializeNullChecksum();
    }
    super.getChecksumInfo(blockLength);
    return checksum;

  }

  public void initialize(long offset, long blockLength)
      throws IOException {
    // seek to the right offsets
    if (offset > 0) {
      long checksumSkip = (offset / bytesPerChecksum) * checksumSize;
      // note blockInStream is seeked when created below
      if (checksumSkip > 0) {
        // Should we use seek() for checksum file as well?
        IOUtils.skipFully(checksumIn, checksumSkip);
      }
    }

    blockDataFileReader = streamFactory.getBlockDataFileReader();
    memoizedBlock = new MemoizedBlock(blockLength, streamFactory, block);
  }

  public boolean prepareTransferTo() throws IOException {
    useTransferTo = true;
    return useTransferTo;
  }

  @Override
  public void sendChunks(OutputStream out, byte[] buf, long offset,
      int checksumOff, int numChunks, int len, BlockCrcUpdater crcUpdater,
      int packetVersion) throws IOException {
    if (packetVersion != DataTransferProtocol.PACKET_VERSION_CHECKSUM_FIRST) {
      throw new IOException("packet version " + packetVersion
          + " is not supported by non-inline checksum blocks.");
    }

    int checksumLen = numChunks * checksumSize;

    if (checksumSize > 0 && checksumIn != null) {
      try {
        checksumIn.readFully(buf, checksumOff, checksumLen);
        if (dnData != null) {
          dnData.recordReadChunkCheckSumTime();
        }
        if (crcUpdater != null) {
          long tempOffset = offset;
          long remain = len;
          for (int i = 0; i < checksumLen; i += checksumSize) {
            long chunkSize = (remain > bytesPerChecksum) ? bytesPerChecksum
                : remain;
            crcUpdater.updateBlockCrc(tempOffset, (int) chunkSize,
                DataChecksum.getIntFromBytes(buf, checksumOff + i));
            remain -= chunkSize;
          }
        }
      } catch (IOException e) {
        LOG.warn(" Could not read or failed to veirfy checksum for data"
            + " at offset " + offset + " for block " + block + " got : "
            + StringUtils.stringifyException(e));
        IOUtils.closeStream(checksumIn);
        checksumIn = null;
        if (corruptChecksumOk) {
          if (checksumOff < checksumLen) {
            // Just fill the array with zeros.
            Arrays.fill(buf, checksumOff, checksumLen, (byte) 0);
            if (dnData != null) {
              dnData.recordReadChunkCheckSumTime();
            }
          }
        } else {
          throw e;
        }
      }
    }

    int dataOff = checksumOff + checksumLen;
   
    if (!useTransferTo) {
      // normal transfer
      blockDataFileReader.readFully(buf, dataOff, len, offset, true);
     
      if (dnData != null) {
        dnData.recordReadChunkDataTime();
      }

      if (verifyChecksum) {
        int dOff = dataOff;
        int cOff = checksumOff;
        int dLeft = len;

        for (int i = 0; i < numChunks; i++) {
          checksum.reset();
          int dLen = Math.min(dLeft, bytesPerChecksum);
          checksum.update(buf, dOff, dLen);
          if (!checksum.compare(buf, cOff)) {
            throw new ChecksumException("Checksum failed at "
                + (offset + len - dLeft), len);
          }
          dLeft -= dLen;
          dOff += dLen;
          cOff += checksumSize;
        }
       
        if (dnData != null) {
          dnData.recordVerifyCheckSumTime();
        }
      }

      // only recompute checksum if we can't trust the meta data due to
      // concurrent writes
      if (memoizedBlock.hasBlockChanged(len, offset)) {
        ChecksumUtil.updateChunkChecksum(buf, checksumOff, dataOff, len,
            checksum);
        if (dnData != null) {
          dnData.recordUpdateChunkCheckSumTime();
        }
      }

      try {
        out.write(buf, 0, dataOff + len);
        if (dnData != null) {
          dnData.recordSendChunkToClientTime();
        }
      } catch (IOException e) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("IOException when reading block " + block + " offset " + offset, e);
        }
        throw BlockSender.ioeToSocketException(e);
      }
    } else {
      try {
        // use transferTo(). Checks on out and blockIn are already done.
        SocketOutputStream sockOut = (SocketOutputStream) out;

        if (memoizedBlock.hasBlockChanged(len, offset)) {
          blockDataFileReader.readFully(buf, dataOff, len, offset, true);
          if (dnData != null) {
            dnData.recordReadChunkDataTime();
          }

          ChecksumUtil.updateChunkChecksum(buf, checksumOff, dataOff, len,
              checksum);
          if (dnData != null) {
            dnData.recordUpdateChunkCheckSumTime();
          }
         
          sockOut.write(buf, 0, dataOff + len);
          if (dnData != null) {
            dnData.recordSendChunkToClientTime();
          }
        } else {
          // first write the packet
          sockOut.write(buf, 0, dataOff);
          // no need to flush. since we know out is not a buffered stream.
          blockDataFileReader.transferToSocketFully(sockOut,offset, len);
          if (dnData != null) {
            dnData.recordTransferChunkToClientTime();
          }
        }

      } catch (IOException e) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("IOException when reading block " + block + " offset "
              + offset, e);
        }
        /*
         * exception while writing to the client (well, with transferTo(), it
         * could also be while reading from the local file).
         */
        throw BlockSender.ioeToSocketException(e);
      }
    }
  }
 
  @Override
  public int getPreferredPacketVersion() {
    return DataTransferProtocol.PACKET_VERSION_CHECKSUM_FIRST;
  }

  public void close() throws IOException {
    IOException ioe = null;

    // close checksum file
    if (checksumIn != null) {
      try {
        checksumIn.close();
      } catch (IOException e) {
        ioe = e;
      }
      checksumIn = null;
    }
    // throw IOException if there is any
    if (ioe != null) {
      throw ioe;
    }
  }

  /**
   * helper class used to track if a block's meta data is verifiable or not
   */
  class MemoizedBlock {
    // visible block length
    private long blockLength;
    private final Block block;
    private final InputStreamWithChecksumFactory isf;

    private MemoizedBlock(long blockLength,
        InputStreamWithChecksumFactory isf, Block block) {
      this.blockLength = blockLength;
      this.isf = isf;
      this.block = block;
    }

    // logic: if we are starting or ending on a partial chunk and the block
    // has more data than we were told at construction, the block has 'changed'
    // in a way that we care about (ie, we can't trust crc data)
    boolean hasBlockChanged(long dataLen, long offset) throws IOException {
      if (isFinalized) {
        // We would treat it an error case for a finalized block at open time
        // has an unmatched size when closing. There might be false positive
        // for append() case. We made the trade-off to avoid false negative.
        // always return true so it data integrity is guaranteed by checksum
        // checking.
        return false;
      }

      // check if we are using transferTo since we tell if the file has changed
      // (blockInPosition >= 0 => we are using transferTo and File Channels
      if (useTransferTo) {
        long currentLength = blockDataFileReader.size();

        return (offset % bytesPerChecksum != 0 || dataLen
            % bytesPerChecksum != 0)
            && currentLength > blockLength;

      } else {
        FSDatasetInterface ds = null;
        if (isf instanceof DatanodeBlockReader.BlockInputStreamFactory) {
          ds = ((DatanodeBlockReader.BlockInputStreamFactory) isf).getDataset();
        }

        // offset is the offset into the block
        return (offset % bytesPerChecksum != 0 || dataLen % bytesPerChecksum != 0)
            && ds != null
            && ds.getOnDiskLength(namespaceId, block) > blockLength;
      }
    }
  }
 
  public static interface InputStreamWithChecksumFactory extends
      BlockSender.InputStreamFactory {
    public InputStream createStream(long offset) throws IOException;
    public DataInputStream getChecksumStream() throws IOException;
  }

  /** Find the metadata file for the specified block file.
   * Return the generation stamp from the name of the metafile.
   */
  static long getGenerationStampFromSeperateChecksumFile(String[] listdir, String blockName) {
    for (int j = 0; j < listdir.length; j++) {
      String path = listdir[j];
      if (!path.startsWith(blockName)) {
        continue;
      }
      String[] vals = StringUtils.split(path, '_');
      if (vals.length != 3) {     // blk, blkid, genstamp.meta
        continue;
      }
      String[] str = StringUtils.split(vals[2], '.');
      if (str.length != 2) {
        continue;
      }
      return Long.parseLong(str[0]);
    }
    DataNode.LOG.warn("Block " + blockName +
                      " does not have a metafile!");
    return Block.GRANDFATHER_GENERATION_STAMP;
  }
 

  /**
   * Find generation stamp from block file and meta file.
   * @param blockFile
   * @param metaFile
   * @return
   * @throws IOException
   */
  static long parseGenerationStampInMetaFile(File blockFile, File metaFile
      ) throws IOException {
    String metaname = metaFile.getName();
    String gs = metaname.substring(blockFile.getName().length() + 1,
        metaname.length() - FSDataset.METADATA_EXTENSION.length());
    try {
      return Long.parseLong(gs);
    } catch(NumberFormatException nfe) {
      throw (IOException)new IOException("blockFile=" + blockFile
          + ", metaFile=" + metaFile).initCause(nfe);
    }
  }

  /**
   * This class provides the input stream and length of the metadata
   * of a block
   *
   */
  static class MetaDataInputStream extends FilterInputStream {
    MetaDataInputStream(InputStream stream, long len) {
      super(stream);
      length = len;
    }
    private long length;
   
    public long getLength() {
      return length;
    }
  }
 
  static protected File getMetaFile(FSDatasetInterface dataset, int namespaceId,
      Block b) throws IOException {
    return BlockWithChecksumFileWriter.getMetaFile(dataset.getBlockFile(namespaceId, b), b);
  }
 
  /**
   * Does the meta file exist for this block?
   * @param namespaceId - parent namespace id
   * @param b - the block
   * @return true of the metafile for specified block exits
   * @throws IOException
   */
  static public boolean metaFileExists(FSDatasetInterface dataset, int namespaceId, Block b) throws IOException {
    return getMetaFile(dataset, namespaceId, b).exists();
  }

  /**
   * Returns metaData of block b as an input stream (and its length)
   * @param namespaceId - parent namespace id
   * @param b - the block
   * @return the metadata input stream;
   * @throws IOException
   */
  static public MetaDataInputStream getMetaDataInputStream(
      FSDatasetInterface dataset, int namespace, Block b) throws IOException {
    File checksumFile = getMetaFile(dataset, namespace, b);
    return new MetaDataInputStream(new FileInputStream(checksumFile),
                                                    checksumFile.length());
  }
 
  static byte[] getMetaData(FSDatasetInterface dataset, int namespaceId,
      Block block) throws IOException {
    MetaDataInputStream checksumIn = null;
    try {
      checksumIn = getMetaDataInputStream(dataset, namespaceId, block);

      long fileSize = checksumIn.getLength();

      if (fileSize >= 1L << 31 || fileSize <= 0) {
        throw new IOException("Unexpected size for checksumFile of block"
            + block);
      }

      byte[] buf = new byte[(int) fileSize];
      IOUtils.readFully(checksumIn, buf, 0, buf.length);
      return buf;
    } finally {
      IOUtils.closeStream(checksumIn);
    }
  }
 
  /**
   * Calculate CRC Checksum of the whole block. Implemented by concatenating
   * checksums of all the chunks.
   *
   * @param datanode
   * @param ri
   * @param namespaceId
   * @param block
   * @return
   * @throws IOException
   */
  static public int getBlockCrc(DataNode datanode, ReplicaToRead ri,
      int namespaceId, Block block) throws IOException {
    InputStream rawStreamIn = null;
    DataInputStream streamIn = null;
    try {
      int bytesPerCRC;
      int checksumSize;

      long crcPerBlock;

      rawStreamIn = BlockWithChecksumFileReader.getMetaDataInputStream(
          datanode.data, namespaceId, block);
      streamIn = new DataInputStream(new BufferedInputStream(rawStreamIn,
          FSConstants.BUFFER_SIZE));

      final BlockMetadataHeader header = BlockMetadataHeader
          .readHeader(streamIn);
      final DataChecksum checksum = header.getChecksum();
      if (checksum.getChecksumType() != DataChecksum.CHECKSUM_CRC32) {
        throw new IOException("File Checksum now is only supported for CRC32");
      }

      bytesPerCRC = checksum.getBytesPerChecksum();
      checksumSize = checksum.getChecksumSize();
      crcPerBlock = (((BlockWithChecksumFileReader.MetaDataInputStream) rawStreamIn)
          .getLength() - BlockMetadataHeader.getHeaderSize()) / checksumSize;

      int blockCrc = 0;
      byte[] buffer = new byte[checksumSize];
      for (int i = 0; i < crcPerBlock; i++) {
        IOUtils.readFully(streamIn, buffer, 0, buffer.length);
        int intChecksum = ((buffer[0] & 0xff) << 24)
            | ((buffer[1] & 0xff) << 16) | ((buffer[2] & 0xff) << 8)
            | ((buffer[3] & 0xff));

        if (i == 0) {
          blockCrc = intChecksum;
        } else {
          int chunkLength;
          if (i != crcPerBlock - 1 || ri.getBytesVisible() % bytesPerCRC == 0) {
            chunkLength = bytesPerCRC;
          } else {
            chunkLength = (int) ri.getBytesVisible() % bytesPerCRC;
          }
          blockCrc = CrcConcat.concatCrc(blockCrc, intChecksum, chunkLength);
        }
      }
      return blockCrc;
    } finally {
      if (streamIn != null) {
        IOUtils.closeStream(streamIn);
      }
      if (rawStreamIn != null) {
        IOUtils.closeStream(rawStreamIn);
      }
    }
  }
 
  static long readBlockAccelerator(Socket s, File dataFile, Block block,
      long startOffset, long length, DataNode datanode) throws IOException {
    File checksumFile = BlockWithChecksumFileWriter.getMetaFile(dataFile, block);
    FileInputStream datain = new FileInputStream(dataFile);
    FileInputStream metain = new FileInputStream(checksumFile);
    FileChannel dch = datain.getChannel();
    FileChannel mch = metain.getChannel();

    // read in type of crc and bytes-per-checksum from metadata file
    int versionSize = 2// the first two bytes in meta file is the version
    byte[] cksumHeader = new byte[versionSize + DataChecksum.HEADER_LEN];
    int numread = metain.read(cksumHeader);
    if (numread != versionSize + DataChecksum.HEADER_LEN) {
      String msg = "readBlockAccelerator: metafile header should be atleast " +
                   (versionSize + DataChecksum.HEADER_LEN) + " bytes " +
                   " but could read only " + numread + " bytes.";
      LOG.warn(msg);
      throw new IOException(msg);
    }
    DataChecksum ckHdr = DataChecksum.newDataChecksum(cksumHeader, versionSize);

    int type = ckHdr.getChecksumType();
    int bytesPerChecksum =  ckHdr.getBytesPerChecksum();
    long cheaderSize = DataChecksum.getChecksumHeaderSize();

    // align the startOffset with the previous bytesPerChecksum boundary.
    long delta = startOffset % bytesPerChecksum;
    startOffset -= delta;
    length += delta;

    // align the length to encompass the entire last checksum chunk
    delta = length % bytesPerChecksum;
    if (delta != 0) {
      delta = bytesPerChecksum - delta;
      length += delta;
    }
   
    // find the offset in the metafile
    long startChunkNumber = startOffset / bytesPerChecksum;
    long numChunks = length / bytesPerChecksum;
    long checksumSize = ckHdr.getChecksumSize();
    long startMetaOffset = versionSize + cheaderSize + startChunkNumber * checksumSize;
    long metaLength = numChunks * checksumSize;

    // get a connection back to the client
    SocketOutputStream out = new SocketOutputStream(s, datanode.socketWriteTimeout);

    try {

      // write out the checksum type and bytesperchecksum to client
      // skip the first two bytes that describe the version
      long val = mch.transferTo(versionSize, cheaderSize, out);
      if (val != cheaderSize) {
        String msg = "readBlockAccelerator for block  " + block +
                     " at offset " + 0 +
                     " but could not transfer checksum header.";
        LOG.warn(msg);
        throw new IOException(msg);
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("readBlockAccelerator metaOffset "  +  startMetaOffset +
                  " mlength " +  metaLength);
      }
      // write out the checksums back to the client
      val = mch.transferTo(startMetaOffset, metaLength, out);
      if (val != metaLength) {
        String msg = "readBlockAccelerator for block  " + block +
                     " at offset " + startMetaOffset +
                     " but could not transfer checksums of size " +
                     metaLength + ". Transferred only " + val;
        LOG.warn(msg);
        throw new IOException(msg);
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("readBlockAccelerator dataOffset " + startOffset +
                  " length  "  + length);
      }
      // send data block back to client
      long read = dch.transferTo(startOffset, length, out);
      if (read != length) {
        String msg = "readBlockAccelerator for block  " + block +
                     " at offset " + startOffset +
                     " but block size is only " + length +
                     " and could transfer only " + read;
        LOG.warn(msg);
        throw new IOException(msg);
      }
      return read;
    } catch ( SocketException ignored ) {
      // Its ok for remote side to close the connection anytime.
      datanode.myMetrics.blocksRead.inc();
      return -1;
    } catch ( IOException ioe ) {
      /* What exactly should we do here?
       * Earlier version shutdown() datanode if there is disk error.
       */
      LOG.warn(datanode.getDatanodeInfo()
          ":readBlockAccelerator:Got exception while serving " +
          block + " to " +
                s.getInetAddress() + ":\n" +
                StringUtils.stringifyException(ioe) );
      throw ioe;
    } finally {
      IOUtils.closeStream(out);
      IOUtils.closeStream(datain);
      IOUtils.closeStream(metain);
    }
   
  }
 
  public static boolean isMetaFilename(String name) {
    return name.startsWith(Block.BLOCK_FILE_PREFIX)
        && name.endsWith(Block.METADATA_EXTENSION);
  }
 
  /**
   * Returns array of two longs: the first one is the block id, and the second
   * one is genStamp. The method workds under assumption that metafile name has
   * the following format: "blk_<blkid>_<gensmp>.meta"
   */
  public static long[] parseMetafileName(String path) {
    String[] groundSeparated = StringUtils.split(path, '_');
    if (groundSeparated.length != 3) { // blk, blkid, genstamp.meta
      throw new IllegalArgumentException("Not a valid meta file name");
    }
    String[] dotSeparated = StringUtils.split(groundSeparated[2], '.');
    if (dotSeparated.length != 2) {
      throw new IllegalArgumentException("Not a valid meta file name");
    }
    return new long[] { Long.parseLong(groundSeparated[1]),
        Long.parseLong(dotSeparated[0]) };
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.datanode.BlockWithChecksumFileReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.