Package org.apache.hadoop.hbase.client

Source Code of org.apache.hadoop.hbase.client.TableSnapshotScanner

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hbase.client;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.UUID;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
import org.apache.hadoop.hbase.snapshot.ExportSnapshot;
import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.FSUtils;

/**
* A Scanner which performs a scan over snapshot files. Using this class requires copying the
* snapshot to a temporary empty directory, which will copy the snapshot reference files into that
* directory. Actual data files are not copied.
*
* <p>
* This also allows one to run the scan from an
* online or offline hbase cluster. The snapshot files can be exported by using the
* {@link ExportSnapshot} tool, to a pure-hdfs cluster, and this scanner can be used to
* run the scan directly over the snapshot files. The snapshot should not be deleted while there
* are open scanners reading from snapshot files.
*
* <p>
* An internal RegionScanner is used to execute the {@link Scan} obtained
* from the user for each region in the snapshot.
* <p>
* HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
* snapshot files and data files. HBase also enforces security because all the requests are handled
* by the server layer, and the user cannot read from the data files directly. To read from snapshot
* files directly from the file system, the user who is running the MR job must have sufficient
* permissions to access snapshot and reference files. This means that to run mapreduce over
* snapshot files, the job has to be run as the HBase user or the user must have group or other
* priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
* snapshot/data files will completely circumvent the access control enforced by HBase.
* @see TableSnapshotInputFormat
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class TableSnapshotScanner extends AbstractClientScanner {

  private static final Log LOG = LogFactory.getLog(TableSnapshotScanner.class);

  private Configuration conf;
  private String snapshotName;
  private FileSystem fs;
  private Path rootDir;
  private Path restoreDir;
  private Scan scan;
  private ArrayList<HRegionInfo> regions;
  private HTableDescriptor htd;

  private ClientSideRegionScanner currentRegionScanner  = null;
  private int currentRegion = -1;

  /**
   * Creates a TableSnapshotScanner.
   * @param conf the configuration
   * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
   * The scanner deletes the contents of the directory once the scanner is closed.
   * @param snapshotName the name of the snapshot to read from
   * @param scan a Scan representing scan parameters
   * @throws IOException in case of error
   */
  public TableSnapshotScanner(Configuration conf, Path restoreDir,
      String snapshotName, Scan scan) throws IOException {
    this(conf, FSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
  }

  /**
   * Creates a TableSnapshotScanner.
   * @param conf the configuration
   * @param rootDir root directory for HBase.
   * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
   * The scanner deletes the contents of the directory once the scanner is closed.
   * @param snapshotName the name of the snapshot to read from
   * @param scan a Scan representing scan parameters
   * @throws IOException in case of error
   */
  public TableSnapshotScanner(Configuration conf, Path rootDir,
      Path restoreDir, String snapshotName, Scan scan) throws IOException {
    this.conf = conf;
    this.snapshotName = snapshotName;
    this.rootDir = rootDir;
    // restoreDir will be deleted in close(), use a unique sub directory
    this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
    this.scan = scan;
    this.fs = rootDir.getFileSystem(conf);
    init();
  }

  private void init() throws IOException {
    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
    SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);

    // load table descriptor
    htd = manifest.getTableDescriptor();

    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
    if (regionManifests == null) {
      throw new IllegalArgumentException("Snapshot seems empty");
    }

    regions = new ArrayList<HRegionInfo>(regionManifests.size());
    for (SnapshotRegionManifest regionManifest : regionManifests) {
      // load region descriptor
      HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());

      if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
          hri.getStartKey(), hri.getEndKey())) {
        regions.add(hri);
      }
    }

    // sort for regions according to startKey.
    Collections.sort(regions);

    initScanMetrics(scan);

    RestoreSnapshotHelper.copySnapshotForScanner(conf, fs,
      rootDir, restoreDir, snapshotName);
  }

  @Override
  public Result next() throws IOException {
    Result result = null;
    while (true) {
      if (currentRegionScanner == null) {
        currentRegion++;
        if (currentRegion >= regions.size()) {
          return null;
        }

        HRegionInfo hri = regions.get(currentRegion);
        currentRegionScanner = new ClientSideRegionScanner(conf, fs,
          restoreDir, htd, hri, scan, scanMetrics);
        if (this.scanMetrics != null) {
          this.scanMetrics.countOfRegions.incrementAndGet();
        }
      }

      try {
        result = currentRegionScanner.next();
        if (result != null) {
          return result;
        }
      } finally {
        if (result == null) {
          currentRegionScanner.close();
          currentRegionScanner = null;
        }       
      }
    }
  }

  @Override
  public void close() {
    if (currentRegionScanner != null) {
      currentRegionScanner.close();
    }
    try {
      fs.delete(this.restoreDir, true);
    } catch (IOException ex) {
      LOG.warn("Could not delete restore directory for the snapshot:" + ex);
    }
  }

}
TOP

Related Classes of org.apache.hadoop.hbase.client.TableSnapshotScanner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.