Package org.apache.hadoop.raid

Source Code of org.apache.hadoop.raid.TestRaidShellFsck

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;

import java.io.File;
import java.io.FileWriter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Random;
import org.junit.Test;
import org.junit.After;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.raid.RaidNode;
import org.apache.hadoop.raid.HarIndex;

public class TestRaidShellFsck {
  final static Log LOG =
    LogFactory.getLog("org.apache.hadoop.raid.TestRaidShellFsck");
  final static String TEST_DIR =
    new File(System.
             getProperty("test.build.data", "build/contrib/raid/test/data")).
    getAbsolutePath();
  final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").
    getAbsolutePath();
  final static long RELOAD_INTERVAL = 1000;
  final static int NUM_DATANODES = 4;
  final static int STRIPE_BLOCKS = 3; // number of blocks per stripe
  final static int FILE_BLOCKS = 6; // number of blocks that file consists of
  final static short REPL = 1; // replication factor before raiding
  final static long BLOCK_SIZE = 8192L; // size of block in byte
  final static String DIR_PATH = "/user/pkling/raidtest";
  final static Path FILE_PATH0 =
    new Path("/user/pkling/raidtest/raidfsck.test");
  final static Path FILE_PATH1 =
    new Path("/user/pkling/raidtest/raidfsck2.test");
  final static Path RAID_PATH = new Path("/raid/user/pkling/raidtest");
  final static String HAR_NAME = "raidtest_raid.har";
  final static String RAID_DIR = "/raid";

  Configuration conf = null;
  Configuration raidConf = null;
  Configuration clientConf = null;
  MiniDFSCluster cluster = null;
  DistributedFileSystem dfs = null;
  RaidNode rnode = null;
 
 
  RaidShell shell = null;
  String[] args = null;


  /**
   * creates a MiniDFS instance with a raided file in it
   */
  private void setUp(boolean doHar) throws IOException, ClassNotFoundException {
    final int timeBeforeHar;
    if (doHar) {
      timeBeforeHar = 0;
    } else {
      timeBeforeHar = -1;
    }


    new File(TEST_DIR).mkdirs(); // Make sure data directory exists
    conf = new Configuration();

    Utils.loadTestCodecs(conf, 3, 1, 3, "/raid", "/raidrs");

    conf.set("raid.config.file", CONFIG_FILE);
    conf.setBoolean("raid.config.reload", true);
    conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);

    // scan all policies once every 5 second
    conf.setLong("raid.policy.rescan.interval", 5000);

    // do not use map-reduce cluster for Raiding
    conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
    // use local block fixer
    conf.set("raid.blockfix.classname",
             "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");

    conf.set("raid.server.address", "localhost:0");

    conf.setInt("dfs.corruptfilesreturned.max", 500);

    conf.setBoolean("dfs.permissions", false);

    cluster = new MiniDFSCluster(conf, NUM_DATANODES, true, null);
    cluster.waitActive();
    dfs = (DistributedFileSystem) cluster.getFileSystem();
    String namenode = dfs.getUri().toString();

    FileSystem.setDefaultUri(conf, namenode);

    FileWriter fileWriter = new FileWriter(CONFIG_FILE);
    fileWriter.write("<?xml version=\"1.0\"?>\n");
    String str =
      "<configuration> " +
      "    <policy name = \"RaidTest1\"> " +
      "      <srcPath prefix=\"" + DIR_PATH + "\"/> " +
      "      <codecId>xor</codecId> " +
      "      <destPath> " + RAID_DIR + " </destPath> " +
      "      <property> " +
      "        <name>targetReplication</name> " +
      "        <value>1</value> " +
      "        <description>after RAIDing, decrease the replication " +
      "factor of a file to this value.</description> " +
      "      </property> " +
      "      <property> " +
      "        <name>metaReplication</name> " +
      "        <value>1</value> " +
      "        <description> replication factor of parity file</description> " +
      "      </property> " +
      "      <property> " +
      "        <name>modTimePeriod</name> " +
      "        <value>2000</value> " +
      "        <description>time (milliseconds) after a file is modified " +
      "to make it a candidate for RAIDing</description> " +
      "      </property> ";
  
    if (timeBeforeHar >= 0) {
      str +=
        "      <property> " +
        "        <name>time_before_har</name> " +
        "        <value>" + timeBeforeHar + "</value> " +
        "        <description> amount of time waited before har'ing parity " +
        "files</description> " +
        "     </property> ";
    }

    str +=
      "    </policy>" +
      "</configuration>";

    fileWriter.write(str);
    fileWriter.close();

    createTestFile(FILE_PATH0);
    createTestFile(FILE_PATH1);
   
    Path[] filePaths = { FILE_PATH0, FILE_PATH1 };
    raidTestFiles(RAID_PATH, filePaths, doHar);

    clientConf = new Configuration(raidConf);
    clientConf.set("fs.hdfs.impl",
                   "org.apache.hadoop.hdfs.DistributedRaidFileSystem");
    clientConf.set("fs.raid.underlyingfs.impl",
                   "org.apache.hadoop.hdfs.DistributedFileSystem");

    // prepare shell and arguments
    shell = new RaidShell(clientConf);
    args = new String[2];
    args[0] = "-fsck";
    args[1] = DIR_PATH;

  }
 
  /**
   * Creates test file consisting of random data
   */
  private void createTestFile(Path filePath) throws IOException {
    Random rand = new Random();
    FSDataOutputStream stm = dfs.create(filePath, true,
                                        conf.getInt("io.file.buffer.size",
                                                    4096), REPL, BLOCK_SIZE);

    final byte[] b = new byte[(int) BLOCK_SIZE];
    for (int i = 0; i < FILE_BLOCKS; i++) {
      rand.nextBytes(b);
      stm.write(b);
    }
    stm.close();
    LOG.info("test file created");

  }

  /**
   * raids test file
   */
  private void raidTestFiles(Path raidPath, Path[] filePaths, boolean doHar)
    throws IOException, ClassNotFoundException {
    // create RaidNode
    raidConf = new Configuration(conf);
    raidConf.setInt(RaidNode.RAID_PARITY_HAR_THRESHOLD_DAYS_KEY, 0);
    raidConf.setInt("raid.blockfix.interval", 1000);
    // the RaidNode does the raiding inline (instead of submitting to MR node)
    conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
    rnode = RaidNode.createRaidNode(null, raidConf);

    for (Path filePath: filePaths) {
      long waitStart = System.currentTimeMillis();
      boolean raided = false;
     
      Path parityFilePath = new Path(RAID_DIR,
                                     filePath.toString().substring(1));

      while (!raided) {
        try {
          FileStatus[] listPaths = dfs.listStatus(raidPath);
          if (listPaths != null) {
            if (doHar) {
              // case with HAR
              for (FileStatus f: listPaths) {
                if (f.getPath().toString().endsWith(".har")) {
                  // check if the parity file is in the index
                  final Path indexPath = new Path(f.getPath(), "_index");
                  final FileStatus indexFileStatus =
                    dfs.getFileStatus(indexPath);
                  final HarIndex harIndex =
                    new HarIndex(dfs.open(indexPath), indexFileStatus.getLen());
                  final HarIndex.IndexEntry indexEntry =
                    harIndex.findEntryByFileName(parityFilePath.toString());
                  if (indexEntry != null) {
                    LOG.info("raid file " + parityFilePath.toString() +
                             " found in Har archive: " +
                             f.getPath().toString() +
                             " ts=" + indexEntry.mtime);
                    raided = true;
                    break;
                  }
                }
              }
             
            } else {
              // case without HAR
              for (FileStatus f : listPaths) {
                Path found = new Path(f.getPath().toUri().getPath());
                if (parityFilePath.equals(found)) {
                  LOG.info("raid file found: " + f.getPath().toString());
                  raided = true;
                  break;
                }
              }
            }
          }
        } catch (FileNotFoundException ignore) {
        }
        if (!raided) {
          if (System.currentTimeMillis() > waitStart + 40000L) {
            LOG.error("parity file not created after 40s");
            throw new IOException("parity file not HARed after 40s");
          } else {
            try {
              Thread.sleep(1000);
            } catch (InterruptedException ignore) {
            }
          }
        }
      }
    }

    rnode.stop();
    rnode.join();
    rnode = null;
    LOG.info("test file raided");   
  }

  /**
   * sleeps for up to 20s until the number of corrupt files
   * in the file system is equal to the number specified
   */
  static public void waitUntilCorruptFileCount(DistributedFileSystem dfs,
                                         int corruptFiles)
    throws IOException, InterruptedException {
    long waitStart = System.currentTimeMillis();
    while (DFSUtil.getCorruptFiles(dfs).length != corruptFiles &&
        System.currentTimeMillis() < waitStart + 20000L) {
      Thread.sleep(1000);
    }
    assertEquals("expected " + corruptFiles + " corrupt files",
        corruptFiles, DFSUtil.getCorruptFiles(dfs).length);
  }

  /**
   * removes a specified block from MiniDFS storage and reports it as corrupt
   */
  private void removeAndReportBlock(DistributedFileSystem blockDfs,
                                    Path filePath,
                                    LocatedBlock block)
    throws IOException {
    TestRaidDfs.corruptBlock(filePath, block.getBlock(),
        NUM_DATANODES, true, cluster);
  
    // report deleted block to the name node
    LocatedBlock[] toReport = { block };
    blockDfs.getClient().namenode.reportBadBlocks(toReport);

  }


  /**
   * removes a file block in the specified stripe
   */
  private void removeFileBlock(Path filePath, int stripe, int blockInStripe)
    throws IOException {
    LocatedBlocks fileBlocks = dfs.getClient().namenode.
      getBlockLocations(filePath.toString(), 0, FILE_BLOCKS * BLOCK_SIZE);
    if (fileBlocks.locatedBlockCount() != FILE_BLOCKS) {
      throw new IOException("expected " + FILE_BLOCKS +
                            " file blocks but found " +
                            fileBlocks.locatedBlockCount());
    }
    if (blockInStripe >= STRIPE_BLOCKS) {
      throw new IOException("blockInStripe is " + blockInStripe +
                            " but must be smaller than " + STRIPE_BLOCKS);
    }
    LocatedBlock block = fileBlocks.get(stripe * STRIPE_BLOCKS + blockInStripe);
    removeAndReportBlock(dfs, filePath, block);
    LOG.info("removed file " + filePath.toString() + " block " +
             stripe * STRIPE_BLOCKS + " in stripe " + stripe);
  }

  /**
   * removes a parity block in the specified stripe
   */
  private void removeParityBlock(Path filePath, int stripe) throws IOException {
    // find parity file
    ParityFilePair ppair =
        ParityFilePair.getParityFile(Codec.getCodec("xor"), filePath, conf);
    String parityPathStr = ppair.getPath().toUri().getPath();
    LOG.info("parity path: " + parityPathStr);
    FileSystem parityFS = ppair.getFileSystem();
    if (!(parityFS instanceof DistributedFileSystem)) {
      throw new IOException("parity file is not on distributed file system");
    }
    DistributedFileSystem parityDFS = (DistributedFileSystem) parityFS;

   
    // now corrupt the block corresponding to the stripe selected
    FileStatus parityFileStatus =
      parityDFS.getFileStatus(new Path(parityPathStr));
    long parityBlockSize = parityFileStatus.getBlockSize();
    long parityFileLength = parityFileStatus.getLen();
    long parityFileLengthInBlocks = (parityFileLength / parityBlockSize) +
      (((parityFileLength % parityBlockSize) == 0) ? 0L : 1L);
    if (parityFileLengthInBlocks <= stripe) {
      throw new IOException("selected stripe " + stripe +
                            " but parity file only has " +
                            parityFileLengthInBlocks + " blocks");
    }
    if (parityBlockSize != BLOCK_SIZE) {
      throw new IOException("file block size is " + BLOCK_SIZE +
                            " but parity file block size is " +
                            parityBlockSize);
    }
    LocatedBlocks parityFileBlocks = parityDFS.getClient().namenode.
      getBlockLocations(parityPathStr, 0, parityFileLength);
    if (parityFileBlocks.locatedBlockCount() != parityFileLengthInBlocks) {
      throw new IOException("expected " + parityFileLengthInBlocks +
                            " parity file blocks but got " +
                            parityFileBlocks.locatedBlockCount() +
                            " blocks");
    }
    LocatedBlock parityFileBlock = parityFileBlocks.get(stripe);
    removeAndReportBlock(parityDFS, new Path(parityPathStr), parityFileBlock);
    LOG.info("removed parity file block/stripe " + stripe +
             " for " + filePath.toString());

  }

  /**
   * removes a block from the har part file
   */
  private void removeHarParityBlock(int block) throws IOException {
    Path harPath = new Path(RAID_PATH, HAR_NAME);
    FileStatus [] listPaths = dfs.listStatus(harPath);
   
    boolean deleted = false;
   
    for (FileStatus f: listPaths) {
      if (f.getPath().getName().startsWith("part-")) {
        final Path partPath = new Path(f.getPath().toUri().getPath());
        final LocatedBlocks partBlocks  = dfs.getClient().namenode.
          getBlockLocations(partPath.toString(),
                            0,
                            f.getLen());
       
        if (partBlocks.locatedBlockCount() <= block) {
          throw new IOException("invalid har block " + block);
        }

        final LocatedBlock partBlock = partBlocks.get(block);
        removeAndReportBlock(dfs, partPath, partBlock);
        LOG.info("removed block " + block + "/" +
                 partBlocks.locatedBlockCount() +
                 " of file " + partPath.toString() +
                 " block size " + partBlock.getBlockSize());
        deleted = true;
        break;
      }
    }

    if (!deleted) {
      throw new IOException("cannot find part file in " + harPath.toString());
    }
  }

  /**
   * checks fsck with no missing blocks
   */
  @Test
  public void testClean() throws Exception {
    LOG.info("testClean");
    setUp(false);
    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }


  /**
   * checks fsck with missing block in file block but not in parity block
   */
  @Test
  public void testFileBlockMissing() throws Exception {
    LOG.info("testFileBlockMissing");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 0, 0);
    waitUntilCorruptFileCount(dfs, 1);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }

  /**
   * checks fsck with missing block in parity block but not in file block
   */
  @Test
  public void testParityBlockMissing() throws Exception {
    LOG.info("testParityBlockMissing");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeParityBlock(FILE_PATH0, 0);
    waitUntilCorruptFileCount(dfs, 1);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }

  /**
   * checks fsck with missing block in both file block and parity block
   * in different stripes
   */
  @Test
  public void testFileBlockAndParityBlockMissingInDifferentStripes()
    throws Exception {
    LOG.info("testFileBlockAndParityBlockMissingInDifferentStripes");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 0, 0);
    waitUntilCorruptFileCount(dfs, 1);
    removeParityBlock(FILE_PATH0, 1);
    waitUntilCorruptFileCount(dfs, 2);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }

  /**
   * checks fsck with missing block in both file block and parity block
   * in same stripe
   */
  @Test
  public void testFileBlockAndParityBlockMissingInSameStripe()
    throws Exception {
    LOG.info("testFileBlockAndParityBlockMissingInSameStripe");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeParityBlock(FILE_PATH0, 1);
    waitUntilCorruptFileCount(dfs, 1);
    removeFileBlock(FILE_PATH0, 1, 0);
    waitUntilCorruptFileCount(dfs, 2);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 1, but returns " +
               Integer.toString(result), result == 1);
  }

  /**
   * checks fsck with two missing file blocks in same stripe
   */
  @Test
  public void test2FileBlocksMissingInSameStripe()
    throws Exception {
    LOG.info("test2FileBlocksMissingInSameStripe");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 1, 1);
    waitUntilCorruptFileCount(dfs, 1);
    removeFileBlock(FILE_PATH0, 1, 0);
    waitUntilCorruptFileCount(dfs, 1);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 1, but returns " +
               Integer.toString(result), result == 1);
  }

  /**
   * checks fsck with two missing file blocks in different stripes
   */
  @Test
  public void test2FileBlocksMissingInDifferentStripes()
    throws Exception {
    LOG.info("test2FileBlocksMissingInDifferentStripes");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 1, 1);
    waitUntilCorruptFileCount(dfs, 1);
    removeFileBlock(FILE_PATH0, 0, 0);
    waitUntilCorruptFileCount(dfs, 1);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }

  /**
   * checks fsck with file block missing (HAR)
   * use 2 files to verify HAR offset logic in RaidShell fsck
   * both files have one corrupt block, parity blocks are clean
   *
   * parity blocks in har (file.stripe):
   * +-----+ +-----+ +-----+  +-----+
   * | 0.0 | | 0.1 | | 1.0 |  | 1.1 |
   * +-----+ +-----+ +-----+  +-----+
   *  0       1       2        3
   *
   */
  @Test
  public void testFileBlockMissingHar()
    throws Exception {
    LOG.info("testFileBlockMissingHar");
    setUp(true);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 1, 1);
    removeFileBlock(FILE_PATH1, 1, 1);
    waitUntilCorruptFileCount(dfs, 2);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }

  /**
   * checks fsck with file block missing (HAR)
   * use 2 files to verify HAR offset logic in RaidShell fsck
   *
   * parity blocks in har (file.stripe):
   * +-----+ +-----+ +-----+  +-----+
   * | 0.0 | | 0.1 | | 1.0 |  | 1.1 |
   * +-----+ +-----+ +-----+  +-----+
   *  0       1       2        3
   *
   * corrupt file 0, stripe 0 file block 0
   * corrupt file 0, stripe 1 file block 0
   * corrupt file 1, stripe 0 file block 0
   * corrupt file 1, stripe 1 file block 0
   * corrupt har block 2
   * only file 1 should be corrupt
   */
  @Test
  public void testFileBlockAndParityBlockMissingHar()
    throws Exception {
    LOG.info("testFileBlockAndParityBlockMissingHar2");
    setUp(true);
    waitUntilCorruptFileCount(dfs, 0);
    removeFileBlock(FILE_PATH0, 0, 0);
    removeFileBlock(FILE_PATH0, 1, 0);
    removeFileBlock(FILE_PATH1, 0, 0);
    removeFileBlock(FILE_PATH1, 1, 0);
    removeHarParityBlock(2);
    waitUntilCorruptFileCount(dfs, 3);

    ToolRunner.run(shell, args);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 1, but returns " +
               Integer.toString(result), result == 1);
  }

  /**
   * checks that fsck does not report corrupt file that is not in
   * the specified path
   */
  @Test
  public void testPathFilter()
    throws Exception {
    LOG.info("testPathFilter");
    setUp(false);
    waitUntilCorruptFileCount(dfs, 0);
    removeParityBlock(FILE_PATH0, 1);
    waitUntilCorruptFileCount(dfs, 1);
    removeFileBlock(FILE_PATH0, 1, 0);
    waitUntilCorruptFileCount(dfs, 2);

    String[] otherArgs = new String[2];
    otherArgs[0] = "-fsck";
    otherArgs[1] = "/user/pkling/other";
    ToolRunner.run(shell, otherArgs);
    int result = shell.getCorruptCount();

    assertTrue("fsck should return 0, but returns " +
               Integer.toString(result), result == 0);
  }


  @After
  public void tearDown() throws Exception {
    if (rnode != null) {
      rnode.stop();
      rnode.join();
      rnode = null;
    }

    if (cluster != null) {
      cluster.shutdown();
      cluster = null;
    }
   
    dfs = null;

    LOG.info("Test cluster shut down");
  }
 

}
TOP

Related Classes of org.apache.hadoop.raid.TestRaidShellFsck

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.