Package org.apache.hadoop.hbase.master

Source Code of org.apache.hadoop.hbase.master.TestRollingRestart

/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.List;
import java.util.NavigableSet;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.junit.Test;
import org.junit.experimental.categories.Category;

/**
* Tests the restarting of everything as done during rolling restarts.
*/
@Category(LargeTests.class)
public class  TestRollingRestart {
  private static final Log LOG = LogFactory.getLog(TestRollingRestart.class);

  @Test (timeout=500000)
  public void testBasicRollingRestart() throws Exception {

    // Start a cluster with 2 masters and 4 regionservers
    final int NUM_MASTERS = 2;
    final int NUM_RS = 3;
    final int NUM_REGIONS_TO_CREATE = 20;

    int expectedNumRS = 3;

    // Start the cluster
    log("Starting cluster");
    Configuration conf = HBaseConfiguration.create();
    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    log("Waiting for active/ready master");
    cluster.waitForActiveAndReadyMaster();
    ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "testRollingRestart",
        null);
    HMaster master = cluster.getMaster();

    // Create a table with regions
    TableName table = TableName.valueOf("tableRestart");
    byte [] family = Bytes.toBytes("family");
    log("Creating table with " + NUM_REGIONS_TO_CREATE + " regions");
    HTable ht = TEST_UTIL.createTable(table, family);
    int numRegions = TEST_UTIL.createMultiRegions(conf, ht, family,
        NUM_REGIONS_TO_CREATE);
    numRegions += 1; // catalogs
    log("Waiting for no more RIT\n");
    blockUntilNoRIT(zkw, master);
    log("Disabling table\n");
    TEST_UTIL.getHBaseAdmin().disableTable(table);
    log("Waiting for no more RIT\n");
    blockUntilNoRIT(zkw, master);
    NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    log("Verifying only catalog and namespace regions are assigned\n");
    if (regions.size() != 2) {
      for (String oregion : regions) log("Region still online: " + oregion);
    }
    assertEquals(2, regions.size());
    log("Enabling table\n");
    TEST_UTIL.getHBaseAdmin().enableTable(table);
    log("Waiting for no more RIT\n");
    blockUntilNoRIT(zkw, master);
    log("Verifying there are " + numRegions + " assigned on cluster\n");
    regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());

    // Add a new regionserver
    log("Adding a fourth RS");
    RegionServerThread restarted = cluster.startRegionServer();
    expectedNumRS++;
    restarted.waitForServerOnline();
    log("Additional RS is online");
    log("Waiting for no more RIT");
    blockUntilNoRIT(zkw, master);
    log("Verifying there are " + numRegions + " assigned on cluster");
    assertRegionsAssigned(cluster, regions);
    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());

    // Master Restarts
    List<MasterThread> masterThreads = cluster.getMasterThreads();
    MasterThread activeMaster = null;
    MasterThread backupMaster = null;
    assertEquals(2, masterThreads.size());
    if (masterThreads.get(0).getMaster().isActiveMaster()) {
      activeMaster = masterThreads.get(0);
      backupMaster = masterThreads.get(1);
    } else {
      activeMaster = masterThreads.get(1);
      backupMaster = masterThreads.get(0);
    }

    // Bring down the backup master
    log("Stopping backup master\n\n");
    backupMaster.getMaster().stop("Stop of backup during rolling restart");
    cluster.hbaseCluster.waitOnMaster(backupMaster);

    // Bring down the primary master
    log("Stopping primary master\n\n");
    activeMaster.getMaster().stop("Stop of active during rolling restart");
    cluster.hbaseCluster.waitOnMaster(activeMaster);

    // Start primary master
    log("Restarting primary master\n\n");
    activeMaster = cluster.startMaster();
    cluster.waitForActiveAndReadyMaster();
    master = activeMaster.getMaster();

    // Start backup master
    log("Restarting backup master\n\n");
    backupMaster = cluster.startMaster();

    assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());

    // RegionServer Restarts

    // Bring them down, one at a time, waiting between each to complete
    List<RegionServerThread> regionServers =
      cluster.getLiveRegionServerThreads();
    int num = 1;
    int total = regionServers.size();
    for (RegionServerThread rst : regionServers) {
      ServerName serverName = rst.getRegionServer().getServerName();
      log("Stopping region server " + num + " of " + total + " [ " +
          serverName + "]");
      rst.getRegionServer().stop("Stopping RS during rolling restart");
      cluster.hbaseCluster.waitOnRegionServer(rst);
      log("Waiting for RS shutdown to be handled by master");
      waitForRSShutdownToStartAndFinish(activeMaster, serverName);
      log("RS shutdown done, waiting for no more RIT");
      blockUntilNoRIT(zkw, master);
      log("Verifying there are " + numRegions + " assigned on cluster");
      assertRegionsAssigned(cluster, regions);
      expectedNumRS--;
      assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
      log("Restarting region server " + num + " of " + total);
      restarted = cluster.startRegionServer();
      restarted.waitForServerOnline();
      expectedNumRS++;
      log("Region server " + num + " is back online");
      log("Waiting for no more RIT");
      blockUntilNoRIT(zkw, master);
      log("Verifying there are " + numRegions + " assigned on cluster");
      assertRegionsAssigned(cluster, regions);
      assertEquals(expectedNumRS, cluster.getRegionServerThreads().size());
      num++;
    }
    Thread.sleep(1000);
    assertRegionsAssigned(cluster, regions);

    // TODO: Bring random 3 of 4 RS down at the same time

    ht.close();
    // Stop the cluster
    TEST_UTIL.shutdownMiniCluster();
  }

  private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master)
  throws KeeperException, InterruptedException {
    ZKAssign.blockUntilNoRIT(zkw);
    master.assignmentManager.waitUntilNoRegionsInTransition(60000);
  }

  private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster,
      ServerName serverName) throws InterruptedException {
    ServerManager sm = activeMaster.getMaster().getServerManager();
    // First wait for it to be in dead list
    while (!sm.getDeadServers().isDeadServer(serverName)) {
      log("Waiting for [" + serverName + "] to be listed as dead in master");
      Thread.sleep(1);
    }
    log("Server [" + serverName + "] marked as dead, waiting for it to " +
        "finish dead processing");
    while (sm.areDeadServersInProgress()) {
      log("Server [" + serverName + "] still being processed, waiting");
      Thread.sleep(100);
    }
    log("Server [" + serverName + "] done with server shutdown processing");
  }

  private void log(String msg) {
    LOG.debug("\n\nTRR: " + msg + "\n");
  }

  private int getNumberOfOnlineRegions(MiniHBaseCluster cluster) {
    int numFound = 0;
    for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
      numFound += rst.getRegionServer().getNumberOfOnlineRegions();
    }
    for (MasterThread mt : cluster.getMasterThreads()) {
      numFound += mt.getMaster().getNumberOfOnlineRegions();
    }
    return numFound;
  }
 
  private void assertRegionsAssigned(MiniHBaseCluster cluster,
      Set<String> expectedRegions) throws IOException {
    int numFound = getNumberOfOnlineRegions(cluster);
    if (expectedRegions.size() > numFound) {
      log("Expected to find " + expectedRegions.size() + " but only found"
          + " " + numFound);
      NavigableSet<String> foundRegions =
        HBaseTestingUtility.getAllOnlineRegions(cluster);
      for (String region : expectedRegions) {
        if (!foundRegions.contains(region)) {
          log("Missing region: " + region);
        }
      }
      assertEquals(expectedRegions.size(), numFound);
    } else if (expectedRegions.size() < numFound) {
      int doubled = numFound - expectedRegions.size();
      log("Expected to find " + expectedRegions.size() + " but found"
          + " " + numFound + " (" + doubled + " double assignments?)");
      NavigableSet<String> doubleRegions = getDoubleAssignedRegions(cluster);
      for (String region : doubleRegions) {
        log("Region is double assigned: " + region);
      }
      assertEquals(expectedRegions.size(), numFound);
    } else {
      log("Success!  Found expected number of " + numFound + " regions");
    }
  }

  private NavigableSet<String> getDoubleAssignedRegions(
      MiniHBaseCluster cluster) throws IOException {
    NavigableSet<String> online = new TreeSet<String>();
    NavigableSet<String> doubled = new TreeSet<String>();
    for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
      for (HRegionInfo region : ProtobufUtil.getOnlineRegions(
          rst.getRegionServer().getRSRpcServices())) {
        if(!online.add(region.getRegionNameAsString())) {
          doubled.add(region.getRegionNameAsString());
        }
      }
    }
    return doubled;
  }


}
TOP

Related Classes of org.apache.hadoop.hbase.master.TestRollingRestart

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.