Package org.apache.accumulo.server.gc

Source Code of org.apache.accumulo.server.gc.SimpleGarbageCollector

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.server.gc;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.Constants;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.IsolatedScanner;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.impl.HdfsZooInstance;
import org.apache.accumulo.core.client.impl.ScannerImpl;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.KeyExtent;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.gc.thrift.GCMonitorService;
import org.apache.accumulo.core.gc.thrift.GCStatus;
import org.apache.accumulo.core.gc.thrift.GcCycleStats;
import org.apache.accumulo.core.gc.thrift.GCMonitorService.Iface;
import org.apache.accumulo.core.security.thrift.AuthInfo;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.accumulo.core.util.ServerServices;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.accumulo.core.util.ServerServices.Service;
import org.apache.accumulo.core.zookeeper.ZooLock;
import org.apache.accumulo.core.zookeeper.ZooUtil;
import org.apache.accumulo.core.zookeeper.ZooLock.LockLossReason;
import org.apache.accumulo.core.zookeeper.ZooLock.LockWatcher;
import org.apache.accumulo.server.Accumulo;
import org.apache.accumulo.server.master.state.tables.TableManager;
import org.apache.accumulo.server.master.state.tables.TableState;
import org.apache.accumulo.server.security.SecurityConstants;
import org.apache.accumulo.server.util.Halt;
import org.apache.accumulo.server.util.OfflineMetadataScanner;
import org.apache.accumulo.server.util.TServerUtils;
import org.apache.accumulo.server.util.TabletIterator;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Logger;
import org.apache.thrift.transport.TServerTransport;
import org.apache.zookeeper.KeeperException;

import cloudtrace.instrument.CountSampler;
import cloudtrace.instrument.Sampler;
import cloudtrace.instrument.Span;
import cloudtrace.instrument.Trace;
import cloudtrace.instrument.thrift.TraceWrap;
import cloudtrace.thrift.TInfo;

public class SimpleGarbageCollector implements Iface {
  private static final Text EMPTY_TEXT = new Text();
 
  // how much of the JVM's available memory should it use gathering candidates
  private static final float CANDIDATE_MEMORY_PERCENTAGE = 0.75f;
  private boolean candidateMemExceeded;
 
  private static final Logger log = Logger.getLogger(SimpleGarbageCollector.class);
 
  private Instance instance;
  private AuthInfo credentials;
  private long gcStartDelay, gcDelay;
  private boolean checkForBulkProcessingFiles;
  private FileSystem fs;
  private Option optSafeMode, optOffline, optVerboseMode, optAddress;
  private boolean safemode, offline, verbose;
  private String address;
  private CommandLine commandLine;
  private ZooLock lock;
  private Key continueKey = null;
 
  private GCStatus status = new GCStatus(new GcCycleStats(), new GcCycleStats(), new GcCycleStats(), new GcCycleStats());
 
  private int numDeleteThreads;
 
  public static void main(String[] args) throws UnknownHostException, IOException {
    Accumulo.init("gc");
    SimpleGarbageCollector gc = new SimpleGarbageCollector(args);
    gc.run();
  }
 
  public SimpleGarbageCollector(String[] args) throws UnknownHostException {
    Options opts = new Options();
    optVerboseMode = new Option("v", "verbose", false, "extra information will get printed to stdout also");
    optSafeMode = new Option("s", "safemode", false, "safe mode will not delete files");
    optOffline = new Option("o", "offline", false,
        "offline mode will run once and check data files directly; this is dangerous if accumulo is running or not shut down properly");
    optAddress = new Option("a", "address", true, "specify our local address");
    opts.addOption(optVerboseMode);
    opts.addOption(optSafeMode);
    opts.addOption(optOffline);
    opts.addOption(optAddress);
   
    try {
      fs = FileSystem.get(CachedConfiguration.getInstance());
      commandLine = new BasicParser().parse(opts, args);
      if (commandLine.getArgs().length != 0)
        throw new ParseException("Extraneous arguments");
     
      safemode = commandLine.hasOption(optSafeMode.getOpt());
      offline = commandLine.hasOption(optOffline.getOpt());
      verbose = commandLine.hasOption(optVerboseMode.getOpt());
      address = commandLine.getOptionValue(optAddress.getOpt());
    } catch (ParseException e) {
      String str = "Can't parse the command line options";
      log.fatal(str, e);
      throw new IllegalArgumentException(str, e);
    } catch (IOException e) {
      String str = "Can't get default file system";
      log.fatal(str, e);
      throw new IllegalStateException(str, e);
    }
   
    instance = HdfsZooInstance.getInstance();
    credentials = SecurityConstants.systemCredentials;
   
    gcStartDelay = AccumuloConfiguration.getSystemConfiguration().getTimeInMillis(Property.GC_CYCLE_START);
    gcDelay = AccumuloConfiguration.getSystemConfiguration().getTimeInMillis(Property.GC_CYCLE_DELAY);
    numDeleteThreads = AccumuloConfiguration.getSystemConfiguration().getCount(Property.GC_DELETE_THREADS);
    log.info("start delay: " + (offline ? 0 + " sec (offline)" : gcStartDelay + " milliseconds"));
    log.info("time delay: " + gcDelay + " milliseconds");
    log.info("safemode: " + safemode);
    log.info("offline: " + offline);
    log.info("verbose: " + verbose);
    log.info("memory threshold: " + CANDIDATE_MEMORY_PERCENTAGE + " of " + Runtime.getRuntime().maxMemory() + " bytes");
    log.info("delete threads: " + numDeleteThreads);
    Accumulo.enableTracing(address, "gc");
  }
 
  private void run() {
    long tStart, tStop;
   
    // Sleep for an initial period, giving the master time to start up and
    // old data files to be unused
    if (!offline) {
      try {
        getZooLock(startStatsService());
      } catch (Exception ex) {
        log.error(ex, ex);
        System.exit(1);
      }
     
      try {
        log.debug("Sleeping for " + gcStartDelay + " milliseconds before beginning garbage collection cycles");
        Thread.sleep(gcStartDelay);
      } catch (InterruptedException e) {
        log.warn(e, e);
        return;
      }
    }
   
    Sampler sampler = new CountSampler(100);
   
    while (true) {
      if (sampler.next())
        Trace.on("gc");
     
      Span gcSpan = Trace.start("loop");
      tStart = System.currentTimeMillis();
      try {
        // STEP 1: gather candidates
        System.gc(); // make room
        candidateMemExceeded = false;
        checkForBulkProcessingFiles = false;
       
        Span candidatesSpan = Trace.start("getCandidates");
        status.current.started = System.currentTimeMillis();
        SortedSet<String> candidates = getCandidates();
        status.current.candidates = candidates.size();
        candidatesSpan.stop();
       
        // STEP 2: confirm deletes
        // WARNING: This line is EXTREMELY IMPORTANT.
        // You MUST confirm candidates are okay to delete
        Span confirmDeletesSpan = Trace.start("confirmDeletes");
        confirmDeletes(candidates);
        status.current.inUse = status.current.candidates - candidates.size();
        confirmDeletesSpan.stop();
       
        // STEP 3: delete files
        if (safemode) {
          if (verbose)
            System.out.println("SAFEMODE: There are " + candidates.size() + " data file candidates marked for deletion.\n"
                + "          Examine the log files to identify them.\n" + "          They can be removed by executing: bin/accumulo gc --offline\n"
                + "WARNING:  Do not run the garbage collector in offline mode unless you are positive\n"
                + "          that the accumulo METADATA table is in a clean state, or that accumulo\n"
                + "          has not yet been run, in the case of an upgrade.");
          log.info("SAFEMODE: Listing all data file candidates for deletion");
          for (String s : candidates)
            log.info("SAFEMODE: " + s);
          log.info("SAFEMODE: End candidates for deletion");
        } else {
          Span deleteSpan = Trace.start("deleteFiles");
          deleteFiles(candidates);
          log.info("Number of data file candidates for deletion: " + status.current.candidates);
          log.info("Number of data file candidates still in use: " + status.current.inUse);
          log.info("Number of successfully deleted data files: " + status.current.deleted);
          log.info("Number of data files delete failures: " + status.current.errors);
          deleteSpan.stop();
         
          // check bulk dirs we just to deleted files from to see if empty
          deleteEmptyBulkDirs(candidates);
        }
       
        status.current.finished = System.currentTimeMillis();
        status.last = status.current;
        status.current = new GcCycleStats();
       
      } catch (Exception e) {
        log.error(e, e);
      }
      tStop = System.currentTimeMillis();
      log.info(String.format("Collect cycle took %.2f seconds", ((tStop - tStart) / 1000.0)));
     
      if (offline)
        break;
     
      if (candidateMemExceeded) {
        log.info("Gathering of candidates was interrupted due to memory shortage. Bypassing cycle delay to collect the remaining candidates.");
        continue;
      }
     
      // Clean up any unused write-ahead logs
      Span waLogs = Trace.start("walogs");
      try {
        log.info("Beginning garbage collection of write-ahead logs");
        GarbageCollectWriteAheadLogs.collect(fs, status);
      } catch (Exception e) {
        log.error(e, e);
      }
      waLogs.stop();
      gcSpan.stop();
     
      Trace.offNoFlush();
      try {
        log.debug("Sleeping for " + gcDelay + " milliseconds");
        Thread.sleep(gcDelay);
      } catch (InterruptedException e) {
        log.warn(e, e);
        return;
      }
    }
  }
 
  private void getZooLock(InetSocketAddress addr) throws KeeperException, InterruptedException {
    String address = addr.getHostName() + ":" + addr.getPort();
    String path = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZGC_LOCK;
   
    LockWatcher lockWatcher = new LockWatcher() {
      public void lostLock(LockLossReason reason) {
        Halt.halt("GC lock in zookeeper lost (reason = " + reason + "), exiting!");
      }
    };
   
    while (true) {
      lock = new ZooLock(path);
      if (lock.tryLock(lockWatcher, new ServerServices(address, Service.GC_CLIENT).toString().getBytes())) {
        break;
      }
      UtilWaitThread.sleep(1000);
    }
  }
 
  private InetSocketAddress startStatsService() throws UnknownHostException {
    GCMonitorService.Processor processor = new GCMonitorService.Processor(TraceWrap.service(this));
    TServerTransport serverTransport = null;
    int port = AccumuloConfiguration.getSystemConfiguration().getPort(Property.GC_PORT);
    try {
      serverTransport = TServerUtils.openPort(port);
    } catch (Exception ex) {
      log.fatal(ex, ex);
      throw new RuntimeException(ex);
    }
    TServerUtils.startTServer(processor, serverTransport, this.getClass().getSimpleName(), "GC Monitor Service", -1);
    return new InetSocketAddress(Accumulo.getLocalAddress(new String[] {"--address", address}), port);
  }
 
  /**
   * This method gets a set of candidates for deletion by scanning the METADATA table deleted flag keyspace
   */
  private SortedSet<String> getCandidates() {
    TreeSet<String> candidates = new TreeSet<String>();
   
    if (offline) {
      checkForBulkProcessingFiles = true;
      try {
        for (String validExtension : FileOperations.getValidExtensions()) {
          for (FileStatus stat : fs.globStatus(new Path(Constants.getTablesDir() + "/*/*/*." + validExtension))) {
            String cand = stat.getPath().toUri().getPath();
            if (!cand.contains(Constants.getRootTabletDir())) {
              candidates.add(cand.substring(Constants.getTablesDir().length()));
              log.debug("Offline candidate: " + cand);
            }
          }
        }
      } catch (IOException e) {
        log.error("Unable to check the filesystem for offline candidates. Removing all candidates for deletion to be safe.", e);
        candidates.clear();
      }
      return candidates;
    }
   
    Scanner scanner = new ScannerImpl(instance, credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS);
   
    // scan the reserved keyspace for deletes
    scanner.setRange(Constants.METADATA_DELETES_KEYSPACE);
   
    if (continueKey != null) {
      // want to ensure GC makes progress... if the 1st N deletes are stable and we keep processing them, then will never inspect deletes after N
      scanner.setRange(new Range(continueKey, true, Constants.METADATA_DELETES_KEYSPACE.getEndKey(), Constants.METADATA_DELETES_KEYSPACE.isEndKeyInclusive()));
      continueKey = null;
    } else {
      // scan the reserved keyspace for deletes
      scanner.setRange(Constants.METADATA_DELETES_KEYSPACE);
    }
   
    // find candidates for deletion; chop off the prefix
    checkForBulkProcessingFiles = false;
    for (Entry<Key,Value> entry : scanner) {
      String cand = entry.getKey().getRow().toString().substring(Constants.METADATA_DELETE_FLAG_PREFIX.length());
      candidates.add(cand);
      checkForBulkProcessingFiles |= cand.toLowerCase(Locale.ENGLISH).contains("bulk");
      if (almostOutOfMemory()) {
        candidateMemExceeded = true;
        log.info("List of delete candidates has exceeded the memory threshold. Attempting to delete what has been gathered so far.");
        continueKey = entry.getKey();
        break;
      }
    }
   
    return candidates;
  }
 
  static public boolean almostOutOfMemory() {
    Runtime runtime = Runtime.getRuntime();
    return runtime.totalMemory() - runtime.freeMemory() > CANDIDATE_MEMORY_PERCENTAGE * runtime.maxMemory();
  }
 
  /**
   * This method removes candidates from the candidate list under two conditions: 1. They are in the same folder as a bulk processing file, if that option is
   * selected 2. They are still in use in the file column family in the METADATA table
   */
  private void confirmDeletes(SortedSet<String> candidates) throws AccumuloException {
    // skip candidates that are in a bulk processing folder
    if (checkForBulkProcessingFiles) {
      log.debug("Checking for bulk processing files");
      HashSet<String> bulks = new HashSet<String>();
      for (String candidate : candidates) {
        if (candidate.contains("/bulk_"))
          bulks.add(candidate.substring(0, candidate.lastIndexOf('/')));
      }
      log.debug("... looking at " + bulks.size() + " bulk directories");
      TreeSet<String> processing = new TreeSet<String>();
      try {
        for (String bulk : bulks) {
          Path glob = new Path(Constants.getTablesDir() + bulk + "/processing_proc_*");
          log.debug("Looking for processing flags in " + glob);
          FileStatus[] flags = fs.globStatus(glob);
          if (flags != null && flags.length > 0) {
            String parent = flags[0].getPath().getParent().toUri().getPath();
            processing.add(parent);
            log.debug("Folder contains bulk processing file: " + parent);
          }
        }
      } catch (IOException e) {
        log.error("Unable to check the filesystem for bulk processing files. Removing all candidates for deletion to be safe.", e);
        candidates.clear();
        return;
      }
      log.debug("Found " + processing.size() + " processing files");
     
      // WARNING: This block is IMPORTANT
      // You MUST REMOVE candidates that are in the same folder as a bulk
      // processing file!
      Iterator<String> iter = candidates.iterator();
      while (iter.hasNext()) {
        String next = Constants.getTablesDir() + iter.next();
        if (processing.contains(new Path(next).getParent().toUri().getPath())) {
          iter.remove();
          log.debug("Candidate is in a bulk folder with a processing file: " + next);
        }
      }
    }
   
    // skip candidates that are still in use in the file column family in
    // the metadata table
    Scanner scanner;
    if (offline) {
      try {
        scanner = new OfflineMetadataScanner();
      } catch (IOException e) {
        throw new IllegalStateException("Unable to create offline metadata scanner", e);
      }
    } else {
      scanner = new IsolatedScanner(new ScannerImpl(instance, credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS));
    }
   
    scanner.setRange(Constants.METADATA_KEYSPACE);
    scanner.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY);
    scanner.fetchColumnFamily(Constants.METADATA_SCANFILE_COLUMN_FAMILY);
   
    TabletIterator tabletIterator = new TabletIterator(scanner, false, false);
   
    while (tabletIterator.hasNext()) {
      Map<Key,Value> tabletKeyValues = tabletIterator.next();
     
      for (Entry<Key,Value> entry : tabletKeyValues.entrySet()) {
        if (entry.getKey().getColumnFamily().equals(Constants.METADATA_DATAFILE_COLUMN_FAMILY)
            || entry.getKey().getColumnFamily().equals(Constants.METADATA_SCANFILE_COLUMN_FAMILY)) {
          String table = new String(KeyExtent.tableOfMetadataRow(entry.getKey().getRow()));
          String delete = "/" + table + entry.getKey().getColumnQualifier().toString();
         
          // WARNING: This line is EXTREMELY IMPORTANT.
          // You MUST REMOVE candidates that are still in use
          if (candidates.remove(delete))
            log.debug("Candidate was still in use in the METADATA table: " + delete);
        } else
          throw new AccumuloException("Scanner over metadata table returned unexpected column : " + entry.getKey());
      }
    }
  }
 
  /**
   * This method attempts to do its best to remove files from the filesystem that have been confirmed for deletion.
   */
  private void deleteFiles(SortedSet<String> confirmedDeletes) {
    // create a batchwriter to remove the delete flags for successful
    // deletes
    BatchWriter writer = null;
    if (!offline) {
      Connector c;
      try {
        c = instance.getConnector(SecurityConstants.SYSTEM_USERNAME, SecurityConstants.systemCredentials.password);
        writer = c.createBatchWriter(Constants.METADATA_TABLE_NAME, 10000000, 60000l, 3);
      } catch (Exception e) {
        log.error("Unable to create writer to remove file from the !METADATA table", e);
      }
    }
   
    final BatchWriter finalWriter = writer;
   
    ExecutorService deleteThreadPool = Executors.newFixedThreadPool(numDeleteThreads);
   
    for (final String file : confirmedDeletes) {
     
      Runnable deleteTask = new Runnable() {
        @Override
        public void run() {
          boolean removeFlag;
         
          log.debug("Deleting " + Constants.getTablesDir() + file);
          try {
           
            Path p = new Path(Constants.getTablesDir() + file);
           
            if (fs.delete(p, true)) {
              // delete succeeded, still want to delete
              removeFlag = true;
              synchronized (SimpleGarbageCollector.this) {
                ++status.current.deleted;
              }
            } else if (fs.exists(p)) {
              // leave the entry in the METADATA table; we'll try again
              // later
              removeFlag = false;
              synchronized (SimpleGarbageCollector.this) {
                ++status.current.errors;
              }
              log.warn("File exists, but was not deleted for an unknown reason: " + p);
            } else {
              // this failure, we still want to remove the METADATA table
              // entry
              removeFlag = true;
              synchronized (SimpleGarbageCollector.this) {
                ++status.current.errors;
              }
              String parts[] = file.split("/");
              if (parts.length > 1) {
                String tableId = parts[1];
                TableManager.getInstance().updateTableStateCache(tableId);
                TableState tableState = TableManager.getInstance().getTableState(tableId);
                if (tableState != null && tableState != TableState.DELETING)
                  log.warn("File doesn't exist: " + p);
              } else {
                log.warn("Very strange path name: " + file);
              }
            }
           
            // proceed to clearing out the flags for successful deletes and
            // non-existent files
            if (removeFlag && finalWriter != null) {
              Mutation m = new Mutation(new Text(Constants.METADATA_DELETE_FLAG_PREFIX + file));
              m.putDelete(EMPTY_TEXT, EMPTY_TEXT);
              finalWriter.addMutation(m);
            }
          } catch (Exception e) {
            log.error(e, e);
          }
         
        }
      };
     
      deleteThreadPool.execute(deleteTask);
    }
   
    deleteThreadPool.shutdown();
   
    try {
      while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) {}
    } catch (InterruptedException e1) {
      log.error(e1, e1);
    }
   
    if (writer != null) {
      try {
        writer.close();
      } catch (MutationsRejectedException e) {
        log.error("Problem removing entries from the metadata table: ", e);
      }
    }
  }
 
  private void deleteEmptyBulkDirs(SortedSet<String> candidates) {
    HashSet<String> bulkDirs = new HashSet<String>();
   
    // get unique set of bulk dirs inorder to avoid unneeded calls to namenode
    for (String candidate : candidates) {
      Path parent = new Path(candidate).getParent();
      if (parent.getName().startsWith("bulk_")) {
        bulkDirs.add(parent.toString());
      }
    }
   
    for (String bulkDir : bulkDirs) {
      try {
        Path path = new Path(Constants.getTablesDir() + bulkDir);
        FileStatus[] entries = fs.listStatus(path);
        if (entries != null && entries.length == 0) {
          log.debug("Deleting empty bulk dir " + bulkDir);
          if (!fs.delete(path, false)) {
            log.warn("Empty bulk dir " + bulkDir + " was not deleted");
          }
        }
       
      } catch (IOException e) {
        log.warn("Failed to list files in bulk dir " + bulkDir, e);
      }
     
    }
  }
 
  @Override
  public GCStatus getStatus(TInfo info, AuthInfo credentials) {
    return status;
  }
}
TOP

Related Classes of org.apache.accumulo.server.gc.SimpleGarbageCollector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.