Examples of MonitoredTask

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

  private long replayRecoveredEdits(final Path edits,
      Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter)
    throws IOException {
    String msg = "Replaying edits from " + edits;
    LOG.info(msg);
    MonitoredTask status = TaskMonitor.get().createStatus(msg);


    status.setStatus("Opening logs");
    HLog.Reader reader = null;
    try {
      reader = HLog.getReader(this.fs, edits, conf);
      long currentEditSeqId = -1;
      long firstSeqIdInLog = -1;
      long skippedEdits = 0;
      long editsCount = 0;
      long intervalEdits = 0;
      HLog.Entry entry;
      Store store = null;
      boolean reported_once = false;


      try {
        // How many edits seen before we check elapsed time
        int interval = this.conf.getInt("hbase.hstore.report.interval.edits",
            2000);
        // How often to send a progress report (default 1/2 master timeout)
        int period = this.conf.getInt("hbase.hstore.report.period",
            this.conf.getInt("hbase.master.assignment.timeoutmonitor.timeout",
                180000) / 2);
        long lastReport = EnvironmentEdgeManager.currentTimeMillis();


        while ((entry = reader.next()) != null) {
          HLogKey key = entry.getKey();
          WALEdit val = entry.getEdit();


          if (reporter != null) {
            intervalEdits += val.size();
            if (intervalEdits >= interval) {
              // Number of edits interval reached
              intervalEdits = 0;
              long cur = EnvironmentEdgeManager.currentTimeMillis();
              if (lastReport + period <= cur) {
                status.setStatus("Replaying edits..." +
                    " skipped=" + skippedEdits +
                    " edits=" + editsCount);
                // Timeout reached
                if(!reporter.progress()) {
                  msg = "Progressable reporter failed, stopping replay";
                  LOG.warn(msg);
                  status.abort(msg);
                  throw new IOException(msg);
                }
                reported_once = true;
                lastReport = cur;
              }
            }
          }


          // Start coprocessor replay here. The coprocessor is for each WALEdit
          // instead of a KeyValue.
          if (coprocessorHost != null) {
            status.setStatus("Running pre-WAL-restore hook in coprocessors");
            if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
              // if bypass this log entry, ignore it ...
              continue;
            }
          }


          if (firstSeqIdInLog == -1) {
            firstSeqIdInLog = key.getLogSeqNum();
          }
          boolean flush = false;
          for (KeyValue kv: val.getKeyValues()) {
            // Check this edit is for me. Also, guard against writing the special
            // METACOLUMN info such as HBASE::CACHEFLUSH entries
            if (kv.matchingFamily(HLog.METAFAMILY) ||
                !Bytes.equals(key.getEncodedRegionName(), this.regionInfo.getEncodedNameAsBytes())) {
              skippedEdits++;
              continue;
                }
            // Figure which store the edit is meant for.
            if (store == null || !kv.matchingFamily(store.getFamily().getName())) {
              store = this.stores.get(kv.getFamily());
            }
            if (store == null) {
              // This should never happen.  Perhaps schema was changed between
              // crash and redeploy?
              LOG.warn("No family for " + kv);
              skippedEdits++;
              continue;
            }
            // Now, figure if we should skip this edit.
            if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily()
                .getName())) {
              skippedEdits++;
              continue;
            }
            currentEditSeqId = key.getLogSeqNum();
            // Once we are over the limit, restoreEdit will keep returning true to
            // flush -- but don't flush until we've played all the kvs that make up
            // the WALEdit.
            flush = restoreEdit(store, kv);
            editsCount++;
          }
          if (flush) internalFlushcache(null, currentEditSeqId, status);


          if (coprocessorHost != null) {
            coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
          }
        }
      } catch (EOFException eof) {
        Path p = HLog.moveAsideBadEditsFile(fs, edits);
        msg = "Encountered EOF. Most likely due to Master failure during " +
            "log spliting, so we have this data in another edit.  " +
            "Continuing, but renaming " + edits + " as " + p;
        LOG.warn(msg, eof);
        status.abort(msg);
      } catch (IOException ioe) {
        // If the IOE resulted from bad file format,
        // then this problem is idempotent and retrying won't help
        if (ioe.getCause() instanceof ParseException) {
          Path p = HLog.moveAsideBadEditsFile(fs, edits);
          msg = "File corruption encountered!  " +
              "Continuing, but renaming " + edits + " as " + p;
          LOG.warn(msg, ioe);
          status.setStatus(msg);
        } else {
          status.abort(StringUtils.stringifyException(ioe));
          // other IO errors may be transient (bad network connection,
          // checksum exception on one datanode, etc).  throw & retry
          throw ioe;
        }
      }
      if (reporter != null && !reported_once) {
        reporter.progress();
      }
      msg = "Applied " + editsCount + ", skipped " + skippedEdits +
        ", firstSequenceidInLog=" + firstSeqIdInLog +
        ", maxSequenceidInLog=" + currentEditSeqId + ", path=" + edits;
      status.markComplete(msg);
      LOG.debug(msg);
      return currentEditSeqId;
    } finally {
      status.cleanup();
      if (reader != null) {
         reader.close();
      }
    }
  }

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

        writer.append(new HLog.Entry(new HLogKey(regionName, tableName,
            i, time, HConstants.DEFAULT_CLUSTER_ID), edit));


        writer.close();
      }
      MonitoredTask status = TaskMonitor.get().createStatus(method);
      Map<byte[], Long> maxSeqIdInStores = new TreeMap<byte[], Long>(
          Bytes.BYTES_COMPARATOR);
      for (Store store : region.getStores().values()) {
        maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(),
            minSeqId - 1);

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

            i, time, HConstants.DEFAULT_CLUSTER_ID), edit));


        writer.close();
      }
      long recoverSeqId = 1030;
      MonitoredTask status = TaskMonitor.get().createStatus(method);
      Map<byte[], Long> maxSeqIdInStores = new TreeMap<byte[], Long>(
          Bytes.BYTES_COMPARATOR);
      for (Store store : region.getStores().values()) {
        maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(),
            recoverSeqId - 1);

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

   * <li>Stop services and perform cleanup once stopped
   * </ol>
   */
  @Override
  public void run() {
    MonitoredTask startupStatus =
      TaskMonitor.get().createStatus("Master startup");
    startupStatus.setDescription("Master startup");
    masterStartTime = System.currentTimeMillis();
    try {
      this.registeredZKListenersBeforeRecovery = this.zooKeeper.getListeners();


      // Put up info server.
      int port = this.conf.getInt("hbase.master.info.port", 60010);
      if (port >= 0) {
        String a = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
        this.infoServer = new InfoServer(MASTER, a, port, false, this.conf);
        this.infoServer.addServlet("status", "/master-status", MasterStatusServlet.class);
        this.infoServer.addServlet("dump", "/dump", MasterDumpServlet.class);
        this.infoServer.setAttribute(MASTER, this);
        this.infoServer.start();
      }


      /*
       * Block on becoming the active master.
       *
       * We race with other masters to write our address into ZooKeeper.  If we
       * succeed, we are the primary/active master and finish initialization.
       *
       * If we do not succeed, there is another active master and we should
       * now wait until it dies to try and become the next active master.  If we
       * do not succeed on our first attempt, this is no longer a cluster startup.
       */
      becomeActiveMaster(startupStatus);


      // We are either the active master or we were asked to shutdown
      if (!this.stopped) {
        finishInitialization(startupStatus, false);
        loop();
      }
    } catch (Throwable t) {
      // HBASE-5680: Likely hadoop23 vs hadoop 20.x/1.x incompatibility
      if (t instanceof NoClassDefFoundError &&
          t.getMessage().contains("org/apache/hadoop/hdfs/protocol/FSConstants$SafeModeAction")) {
          // improved error message for this special case
          abort("HBase is having a problem with its Hadoop jars.  You may need to "
              + "recompile HBase against Hadoop version "
              +  org.apache.hadoop.util.VersionInfo.getVersion()
              + " or change your hadoop jars to start properly", t);
      } else {
        abort("Unhandled exception. Starting shutdown.", t);
      }
    } finally {
      startupStatus.cleanup();


      stopChores();
      // Wait for all the remaining region servers to report in IFF we were
      // running a cluster shutdown AND we were NOT aborting.
      if (!this.abort && this.serverManager != null &&

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

    this.zooKeeper.reconnectAfterExpiration();


    Callable<Boolean> callable = new Callable<Boolean> () {
      public Boolean call() throws InterruptedException,
          IOException, KeeperException {
        MonitoredTask status =
          TaskMonitor.get().createStatus("Recovering expired ZK session");
        try {
          if (!becomeActiveMaster(status)) {
            return Boolean.FALSE;
          }
          serverManager.disableSSHForRoot();
          serverShutdownHandlerEnabled = false;
          initialized = false;
          finishInitialization(status, true);
          return Boolean.TRUE;
        } finally {
          status.cleanup();
        }
      }
    };


    long timeout =

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

    ActiveMasterManager activeMasterManager =
      dummyMaster.getActiveMasterManager();
    assertFalse(activeMasterManager.clusterHasActiveMaster.get());


    // First test becoming the active master uninterrupted
    MonitoredTask status = Mockito.mock(MonitoredTask.class);
    clusterStatusTracker.setClusterUp();


    activeMasterManager.blockUntilBecomingActiveMaster(status);
    assertTrue(activeMasterManager.clusterHasActiveMaster.get());
    assertMaster(zk, master);

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

   * @throws IOException If there was an error while splitting any log file
   * @return cumulative size of the logfiles split
   */
  public long splitLogDistributed(final List<Path> logDirs, PathFilter filter) 
      throws IOException {
    MonitoredTask status = TaskMonitor.get().createStatus(
          "Doing distributed log split in " + logDirs);
    FileStatus[] logfiles = getFileList(logDirs, filter);
    status.setStatus("Checking directory contents...");
    LOG.debug("Scheduling batch of logs to split");
    tot_mgr_log_split_batch_start.incrementAndGet();
    LOG.info("started splitting logs in " + logDirs);
    long t = EnvironmentEdgeManager.currentTimeMillis();
    long totalSize = 0;
    TaskBatch batch = new TaskBatch();
    for (FileStatus lf : logfiles) {
      // TODO If the log file is still being written to - which is most likely
      // the case for the last log file - then its length will show up here
      // as zero. The size of such a file can only be retrieved after
      // recover-lease is done. totalSize will be under in most cases and the
      // metrics that it drives will also be under-reported.
      totalSize += lf.getLen();
      if (enqueueSplitTask(lf.getPath().toString(), batch) == false) {
        throw new IOException("duplicate log split scheduled for "
            + lf.getPath());
      }
    }
    waitForSplittingCompletion(batch, status);
    if (batch.done != batch.installed) {
      batch.isDead = true;
      tot_mgr_log_split_batch_err.incrementAndGet();
      LOG.warn("error while splitting logs in " + logDirs +
      " installed = " + batch.installed + " but only " + batch.done + " done");
      String msg = "error or interrupted while splitting logs in "
        + logDirs + " Task = " + batch;
      status.abort(msg);
      throw new IOException(msg);
    }
    for(Path logDir: logDirs){
      status.setStatus("Cleaning up log directory...");
      try {
        if (fs.exists(logDir) && !HBaseFileSystem.deleteFileFromFileSystem(fs, logDir)) {
          LOG.warn("Unable to delete log src dir. Ignoring. " + logDir);
        }
      } catch (IOException ioe) {
        FileStatus[] files = fs.listStatus(logDir);
        if (files != null && files.length > 0) {
          LOG.warn("returning success without actually splitting and " +
              "deleting all the log files in path " + logDir);
        } else {
          LOG.warn("Unable to delete log src dir. Ignoring. " + logDir, ioe);
        }
      }
      tot_mgr_log_split_batch_success.incrementAndGet();
    }
    String msg = "finished splitting (more than or equal to) " + totalSize +
        " bytes in " + batch.installed + " log files in " + logDirs + " in " +
        (EnvironmentEdgeManager.currentTimeMillis() - t) + "ms";
    status.markComplete(msg);
    LOG.info(msg);
    return totalSize;
  }

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

        writer.append(new HLog.Entry(new HLogKey(regionName, tableName, i, time,
            HConstants.DEFAULT_CLUSTER_ID), edit));


        writer.close();
      }
      MonitoredTask status = TaskMonitor.get().createStatus(method);
      Map<byte[], Long> maxSeqIdInStores = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
      for (Store store : region.getStores().values()) {
        maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), minSeqId - 1);
      }
      long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status);

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

            HConstants.DEFAULT_CLUSTER_ID), edit));


        writer.close();
      }
      long recoverSeqId = 1030;
      MonitoredTask status = TaskMonitor.get().createStatus(method);
      Map<byte[], Long> maxSeqIdInStores = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
      for (Store store : region.getStores().values()) {
        maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), recoverSeqId - 1);
      }
      long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status);

View Full Code Here

Examples of org.apache.hadoop.hbase.monitoring.MonitoredTask

   *           e
   */
  public boolean close(final boolean abort) throws IOException {
    // Only allow one thread to close at a time. Serialize them so dual
    // threads attempting to close will run up against each other.
    MonitoredTask status = TaskMonitor.get().createStatus(
        "Closing entityGroup " + this + (abort ? " due to abort" : ""));


    status.setStatus("Waiting for close lock");
    try {
      synchronized (closeLock) {
        return doClose(abort, status);
      }
    } finally {
      status.cleanup();
    }
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.