Package org.apache.hadoop.hdfs.server.datanode

Source Code of org.apache.hadoop.hdfs.server.datanode.BlockWithChecksumFileWriter

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.FileChannel;
import java.util.Arrays;
import java.util.zip.Checksum;

import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.datanode.BlockDataFile.RandomAccessor;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.CrcConcat;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.util.NativeCrc32;


/**
* Write data into block file and checksum into a separate checksum files.  
*
* The on disk file format is: 
* Data file:  
*  +---------------+  
*  |               |  
*  |     Data      |  
*  |      .        |  
*  |      .        |  
*  |      .        |  
*  |      .        |  
*  |      .        |  
*  |      .        |  
*  |               |  
*  +---------------+  
*     
*  Checksum file: 
*  +----------------------+  
*  |   Checksum Header    |  
*  +----------------------+   
*  | Checksum for Chunk 1 |   
*  +----------------------+   
*  | Checksum for Chunk 2 |   
*  +----------------------+   
*  |          .           |   
*  |          .           |   
*  |          .           |   
*  +----------------------+   
*  |  Checksum for last   |   
*  |   Chunk (Partial)    |   
*  +----------------------+   
*/
public class BlockWithChecksumFileWriter extends DatanodeBlockWriter {
  final private BlockDataFile blockDataFile;
  protected BlockDataFile.Writer blockDataWriter = null;
 
  File metafile;

  protected DataOutputStream checksumOut = null; // to crc file at local disk
  protected OutputStream cout = null; // output stream for checksum file

  public BlockWithChecksumFileWriter(BlockDataFile blockDataFile, File metafile) {
    this.blockDataFile = blockDataFile;
    this.metafile = metafile;
  }

  public void initializeStreams(int bytesPerChecksum, int checksumSize,
      Block block, String inAddr, int namespaceId, DataNode datanode)
      throws FileNotFoundException, IOException {
    if (this.blockDataWriter == null) {
      blockDataWriter = blockDataFile.getWriter(-1);
    }
    if (this.cout == null) {
      this.cout = new FileOutputStream(
          new RandomAccessFile(metafile, "rw").getFD());
    }
    checksumOut = new DataOutputStream(new BufferedOutputStream(cout,
        FSConstants.SMALL_BUFFER_SIZE));

    setParameters(bytesPerChecksum, checksumSize, block, inAddr, namespaceId,
        datanode);
  }

  @Override
  public void fadviseStream(int advise, long offset, long len)
      throws IOException {
      fadviseStream(advise, offset, len, false);
  }

  @Override
  public void fadviseStream(int advise, long offset, long len, boolean sync)
      throws IOException {
    if (LOG.isDebugEnabled()) {
      LOG.debug("posix_fadvise with advise : " + advise + " for : "
          + blockDataFile.getFile());
    }
    blockDataWriter.posixFadviseIfPossible(offset, len, advise, sync);
  }

  @Override
  public void writeHeader(DataChecksum checksum) throws IOException {
    BlockMetadataHeader.writeHeader(checksumOut, checksum);
  }

  @Override
  public void writePacket(byte pktBuf[], int len, int dataOff,
      int pktBufStartOff, int numChunks, int packetVersion) throws IOException {
    if (packetVersion != DataTransferProtocol.PACKET_VERSION_CHECKSUM_FIRST) {
      throw new IOException(
          "non-inline checksum doesn't support packet version " + packetVersion);
    }
    if (len == 0) {
      return;
    }
   
    // finally write to the disk :
    blockDataWriter.write(pktBuf, dataOff, len);

    boolean lastChunkStartsFromChunkStart = false;
    if (firstChunkOffset > 0) {
      // packet doesn't start as beginning of the chunk, need to concatenate
      // checksums of two pieces.
      int crcPart2 = DataChecksum.getIntFromBytes(pktBuf, pktBufStartOff);
      partialCrcInt = CrcConcat.concatCrc(partialCrcInt, crcPart2,
          Math.min(len, bytesPerChecksum - firstChunkOffset));
      byte[] tempBuf = new byte[4];
      DataChecksum.writeIntToBuf(partialCrcInt, tempBuf, 0);
      checksumOut.write(tempBuf);
      if (numChunks > 1) {
        // write the other chunk's checksums.
        checksumOut.write(pktBuf, pktBufStartOff + checksumSize, (numChunks - 1)
            * checksumSize);
        lastChunkStartsFromChunkStart = true;
     }
    } else {
      checksumOut.write(pktBuf, pktBufStartOff, numChunks * checksumSize);
      lastChunkStartsFromChunkStart = true;
    }
    firstChunkOffset = (firstChunkOffset + len) % bytesPerChecksum;
    if (firstChunkOffset > 0 && lastChunkStartsFromChunkStart) {
      // The last chunk is partial and starts from the chunk boundary,
      // need to remember its checksum for the next chunk.
      partialCrcInt = DataChecksum.getIntFromBytes(pktBuf, pktBufStartOff
          + (numChunks - 1) * checksumSize);
    }
  }

  /**
   * Retrieves the offset in the block to which the the next write will write
   * data to.
   */
  public long getChannelPosition() throws IOException {
    return blockDataWriter.getChannelPosition();
  }
 
  private long getChecksumOffset(long offsetInBlock) {
    return BlockMetadataHeader.getHeaderSize() + offsetInBlock
        / bytesPerChecksum * checksumSize;
  }

  @Override
  public void setPosAndRecomputeChecksumIfNeeded(long offsetInBlock, DataChecksum checksum) throws IOException {
    firstChunkOffset = (int) (offsetInBlock % bytesPerChecksum);

    if (getChannelPosition() == offsetInBlock) {
      if (firstChunkOffset > 0) {
        // Partial block, need to seek checksum stream back.
        setChecksumOffset(getChecksumOffset(offsetInBlock));
      }
      return; // nothing to do
    }
    long offsetInChecksum = getChecksumOffset(offsetInBlock);

    if (blockDataWriter != null) {
      blockDataWriter.flush();
    }
    if (checksumOut != null) {
      checksumOut.flush();
    }

    // If this is a partial chunk, then read in pre-existing checksum
    if (offsetInBlock % bytesPerChecksum != 0) {
      LOG.info("setBlockPosition trying to set position to " + offsetInBlock
          + " for block " + block
          + " which is not a multiple of bytesPerChecksum " + bytesPerChecksum);
      computePartialChunkCrc(offsetInBlock, offsetInChecksum, bytesPerChecksum, checksum);
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("Changing block file offset of block " + block + " from "
          + getChannelPosition() + " to " + offsetInBlock
          + " meta file offset to " + offsetInChecksum);
    }

    // set the position of the block file
    setChannelPosition(offsetInBlock, offsetInChecksum);
  }

  /**
   * Sets the offset in the block to which the the next write will write data
   * to.
   */
  public void setChannelPosition(long dataOffset, long ckOffset)
      throws IOException {
    long channelSize = blockDataWriter.getChannelSize();
    if (channelSize < dataOffset) {
      String fileName;
      if (datanode.data instanceof FSDataset) {
        FSDataset fsDataset = (FSDataset) datanode.data;
        fileName = fsDataset.getDatanodeBlockInfo(namespaceId, block)
            .getBlockDataFile().getTmpFile(namespaceId, block).toString();
      } else {
        fileName = "unknown";
      }
      String msg = "Trying to change block file offset of block " + block
          + " file " + fileName + " to " + dataOffset
          + " but actual size of file is " + blockDataWriter.getChannelSize();
      throw new IOException(msg);
    }
    if (dataOffset > channelSize) {
      throw new IOException("Set position over the end of the data file.");
    }
    if (dataOffset % bytesPerChecksum != 0 && channelSize != dataOffset) {
      DFSClient.LOG.warn("Non-inline Checksum Block " + block
          + " channel size " + channelSize + " but data starts from "
          + dataOffset);
    }
    blockDataWriter.position(dataOffset);

    setChecksumOffset(ckOffset);
  }
 
  private void setChecksumOffset(long ckOffset) throws IOException {
    FileOutputStream file = (FileOutputStream) cout;
    if (ckOffset > file.getChannel().size()) {
      throw new IOException("Set position over the end of the checksum file.");
    }
    file.getChannel().position(ckOffset);
  }

  /**
   * reads in the partial crc chunk and computes checksum of pre-existing data
   * in partial chunk.
   */
  private void computePartialChunkCrc(long blkoff, long ckoff,
      int bytesPerChecksum, DataChecksum checksum) throws IOException {

    // find offset of the beginning of partial chunk.
    //
    int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
    int checksumSize = checksum.getChecksumSize();
    blkoff = blkoff - sizePartialChunk;
    LOG.info("computePartialChunkCrc sizePartialChunk " + sizePartialChunk
        + " block " + block + " offset in block " + blkoff
        + " offset in metafile " + ckoff);

    // create an input stream from the block file
    // and read in partial crc chunk into temporary buffer
    //
    byte[] buf = new byte[sizePartialChunk];
    byte[] crcbuf = new byte[checksumSize];
    FileInputStream dataIn = null, metaIn = null;
   
    try {

      DatanodeBlockInfo info = datanode.data.getDatanodeBlockInfo(namespaceId,
          block);
      if (info == null) {
        throw new IOException("Block " + block
            + " does not exist in volumeMap.");
      }
      File blockFile = info.getDataFileToRead();
      if (blockFile == null) {
        blockFile = info.getBlockDataFile().getTmpFile(namespaceId, block);
      }
      RandomAccessFile blockInFile = new RandomAccessFile(blockFile, "r");
     
      if (blkoff > 0) {
        blockInFile.seek(blkoff);
      }
      File metaFile = getMetaFile(blockFile, block);
      RandomAccessFile metaInFile = new RandomAccessFile(metaFile, "r");
      if (ckoff > 0) {
        metaInFile.seek(ckoff);
      }
      dataIn = new FileInputStream(blockInFile.getFD());
      metaIn = new FileInputStream(metaInFile.getFD());

      IOUtils.readFully(dataIn, buf, 0, sizePartialChunk);

      // open meta file and read in crc value computer earlier
      IOUtils.readFully(metaIn, crcbuf, 0, crcbuf.length);
    } finally {
      if (dataIn != null) {
        dataIn.close();
      }
      if (metaIn != null) {
        metaIn.close();
      }
    }

    // compute crc of partial chunk from data read in the block file.
    Checksum partialCrc = new NativeCrc32();
    partialCrc.update(buf, 0, sizePartialChunk);
    LOG.info("Read in partial CRC chunk from disk for block " + block);

    // paranoia! verify that the pre-computed crc matches what we
    // recalculated just now
    if (partialCrc.getValue() != FSInputChecker.checksum2long(crcbuf)) {
      String msg = "Partial CRC " + partialCrc.getValue()
          + " does not match value computed the "
          + " last time file was closed "
          + FSInputChecker.checksum2long(crcbuf);
      throw new IOException(msg);
    }
    // LOG.debug("Partial CRC matches 0x" +
    // Long.toHexString(partialCrc.getValue()));
   
    partialCrcInt = (int) partialCrc.getValue();
  }

  /**
   * Flush the data and checksum data out to the stream. Please call sync to
   * make sure to write the data in to disk
   *
   * @throws IOException
   */
  @Override
  public void flush(boolean forceSync)
      throws IOException {
    if (checksumOut != null) {
      checksumOut.flush();
      if (forceSync && (cout instanceof FileOutputStream)) {
        ((FileOutputStream) cout).getChannel().force(true);
      }
    }
    if (blockDataWriter != null) {
      blockDataWriter.flush();
      if (forceSync) {
        blockDataWriter.force(true);
      }
    }
  }

  @Override
  public void fileRangeSync(long lastBytesToSync, int flags) throws IOException {
    if (cout instanceof FileOutputStream && lastBytesToSync > 0) {
      FileChannel fc = ((FileOutputStream) cout).getChannel();
      long pos = fc.position();
      long startOffset = pos - lastBytesToSync;
      if (startOffset < 0) {
        startOffset = 0;
      }
      if (LOG.isDebugEnabled()) {
        LOG.debug("file_range_sync " + block + " channel position " + pos
            + " offset " + startOffset);
      }
      blockDataWriter.syncFileRangeIfPossible(startOffset, pos
          - startOffset, flags);
    }
  }
 
  public void truncateBlock(long oldBlockFileLen, long newlen)
      throws IOException {
    if (newlen == 0) {
      // Special case for truncating to 0 length, since there's no previous
      // chunk.
      RandomAccessor ra = blockDataFile.getRandomAccessor();
      try {
        // truncate blockFile
        ra.setLength(newlen);
      } finally {
        ra.close();
      }
      // update metaFile
      RandomAccessFile metaRAF = new RandomAccessFile(metafile, "rw");
      try {
        metaRAF.setLength(BlockMetadataHeader.getHeaderSize());
      } finally {
        metaRAF.close();
      }
      return;
    }
    DataChecksum dcs = BlockMetadataHeader.readHeader(metafile).getChecksum();
    int checksumsize = dcs.getChecksumSize();
    int bpc = dcs.getBytesPerChecksum();
    long newChunkCount = (newlen - 1) / bpc + 1;
    long newmetalen = BlockMetadataHeader.getHeaderSize() + newChunkCount
        * checksumsize;
    long lastchunkoffset = (newChunkCount - 1) * bpc;
    int lastchunksize = (int) (newlen - lastchunkoffset);
    byte[] b = new byte[Math.max(lastchunksize, checksumsize)];

    RandomAccessor ra = blockDataFile.getRandomAccessor();
    try {
      // truncate blockFile
      ra.setLength(newlen);

      // read last chunk
      ra.seek(lastchunkoffset);
      ra.readFully(b, 0, lastchunksize);
    } finally {
      ra.close();
    }

    // compute checksum
    dcs.update(b, 0, lastchunksize);
    dcs.writeValue(b, 0, false);

    // update metaFile
    RandomAccessFile metaRAF = new RandomAccessFile(metafile, "rw");
    try {
      metaRAF.setLength(newmetalen);
      metaRAF.seek(newmetalen - checksumsize);
      metaRAF.write(b, 0, checksumsize);
    } finally {
      metaRAF.close();
    }
  }

  @Override
  public void close() throws IOException {
    close(0);
  }

  public void close(int fadvise) throws IOException {
    IOException ioe = null;

    // close checksum file
    try {
      if (checksumOut != null) {
        try {
          checksumOut.flush();
          if (datanode.syncOnClose && (cout instanceof FileOutputStream)) {
            ((FileOutputStream) cout).getChannel().force(true);
          }
        } finally {
          checksumOut.close();         
          checksumOut = null;
        }
      }
    } catch (IOException e) {
      ioe = e;
    }
    // close block file
    try {
      if (blockDataWriter != null) {
        try {
          blockDataWriter.flush();
          if (datanode.syncOnClose) {
            blockDataWriter.force(true);
          }
          if (fadvise != 0) {
            fadviseStream(fadvise, 0, 0, true);
          }
        } finally {
          blockDataWriter.close();
          blockDataWriter = null;
        }
      }
    } catch (IOException e) {
      ioe = e;
    }

    // disk check
    // We don't check disk for ClosedChannelException as close() can be
    // called twice and it is possible that out.close() throws.
    // No need to check or recheck disk then.
    //
    if (ioe != null) {
      if (!(ioe instanceof ClosedChannelException)) {
        datanode.checkDiskError(ioe);
      }
      throw ioe;
    }
  }

  static String getMetaFileName(String blockFileName, long genStamp) {
    return blockFileName + "_" + genStamp + FSDataset.METADATA_EXTENSION;
  }

  public static File getMetaFile(File f , Block b) {
    return new File(getMetaFileName(f.getAbsolutePath(),
                                    b.getGenerationStamp()));
  }

  /** Find the corresponding meta data file from a given block file */
  public static File findMetaFile(final File blockFile) throws IOException {
    return findMetaFile(blockFile, false);
  }

  static File findMetaFile(final File blockFile, boolean missingOk)
    throws IOException {
    final String prefix = blockFile.getName() + "_";
    final File parent = blockFile.getParentFile();
    File[] matches = parent.listFiles(new FilenameFilter() {
      public boolean accept(File dir, String name) {
        return dir.equals(parent)
            && name.startsWith(prefix) && name.endsWith(FSDataset.METADATA_EXTENSION);
      }
    });

    if (matches == null || matches.length == 0) {
      if (missingOk) {
        return null;
      } else {
        throw new IOException("Meta file not found, blockFile=" + blockFile);
      }
    }
    else if (matches.length > 1) {
      throw new IOException("Found more than one meta files: "
          + Arrays.asList(matches));
    }
    return matches[0];
  }

}
TOP

Related Classes of org.apache.hadoop.hdfs.server.datanode.BlockWithChecksumFileWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.