Package org.apache.hadoop.hdfs.server.namenode

Source Code of org.apache.hadoop.hdfs.server.namenode.SnapshotNode

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.*;

import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSLocatedBlocks;
import org.apache.hadoop.hdfs.DFSInputStream;
import org.apache.hadoop.hdfs.protocol.*;
import org.apache.hadoop.hdfs.server.common.Storage.*;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.FSImage.CheckpointStates;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.*;
import org.apache.hadoop.hdfs.server.namenode.WaitingRoom.*;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.SnapshotProtocol;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.ipc.*;
import org.apache.hadoop.util.Daemon;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.*;

import org.apache.hadoop.metrics.jvm.JvmMetrics;

/**********************************************************
* The SnapshotNode is responsible for taking periodic
* snapshots of the HDFS. The current design only allows
* one SnapshotNode per cluster.
*
* The SnapshotNode is a daemon that periodically wakes
* up (determined by the schedule specified in the configuration),
* triggers a periodic snapshot and then goes back to sleep.
* The SnapshotNode uses the Namesystem's jetty server to
* retreive files.
*
**********************************************************/
public class SnapshotNode implements SnapshotProtocol {

  public static final Log LOG =
      LogFactory.getLog(SnapshotNode.class);

  public static final String CURRENT_DIR = "/current";
  public static String SSNAME = "dfs_snapshot_"; // prefix of ss files

  private Configuration conf; // conf

  private String fileServer; // jetty image server namenode listens on
  private FileSystem dfs; // file system

  private String tempDir; // temp dir to download files from namenode
  private String ssDir; // path to store snapshots in

  private Daemon purgeThread; //waiting room purger thread

  private ExecutorService leaseUpdateThreadPool;
  private int maxLeaseUpdateThreads;

  private Server server; // RPC Server
  private InetSocketAddress serverAddress = null; // RPC server address

  private NamenodeProtocol namenode;
  private InetSocketAddress nameNodeAddr;

  public SnapshotNode(Configuration conf) {
    try {
      this.conf = conf;
      init();
    } catch (IOException e) {
      LOG.error("Failed to start SnapshotNode");
      shutdown();
    }
  }

  /**
   * Initialize SnapshotNode
   * @throws IOException
   */
  private void init() throws IOException {
    ssDir = conf.get("fs.snapshot.dir", "/.SNAPSHOT");
    tempDir = conf.get("fs.snapshot.tempdir", "/tmp/snapshot");

    fileServer = getImageServer();
    dfs = FileSystem.get(conf);

    Path ssPath = new Path(ssDir);
    if (!dfs.exists(ssPath)) {
      dfs.mkdirs(ssPath);
    }

    maxLeaseUpdateThreads = conf.getInt("fs.snapshot.leaseupdatethreads", 100);

    // Waiting room purge thread
    purgeThread = new Daemon((new WaitingRoom(conf)).getPurger());
    purgeThread.start();

    // Get namenode rpc connection
    nameNodeAddr = NameNode.getAddress(conf);
    namenode = (NamenodeProtocol) RPC.waitForProxy(NamenodeProtocol.class,
                               NamenodeProtocol.versionID, nameNodeAddr, conf);

    // Snapshot RPC Server
    InetSocketAddress socAddr = SnapshotNode.getAddress(conf);
    int handlerCount = conf.getInt("fs.snapshot.handler.count", 10);
    server = RPC.getServer(this, socAddr.getHostName(), socAddr.getPort(),
                           handlerCount, false, conf);
    // The rpc-server port can be ephemeral... ensure we have the correct info
    serverAddress = server.getListenerAddress();
    LOG.info("SnapshotNode up at: " + serverAddress);

    server.start(); // start rpc server
  }

  private static InetSocketAddress getAddress(String address) {
    return NetUtils.createSocketAddr(address);
  }

  public static InetSocketAddress getAddress(Configuration conf) {
    String nodeport = conf.get("fs.snapshot.server.address");
    if (nodeport == null) {
      nodeport = "localhost:" + 60000; // DEFAULT PORT
    }
    return getAddress(nodeport);
  }

  @Override
  public long getProtocolVersion(String protocol, long clientVersion) throws IOException {
    if (protocol.equals(SnapshotProtocol.class.getName())) {
      return SnapshotProtocol.versionID;
    }

    throw new IOException("Unknown protocol to snapshot node: " + protocol);
  }

  @Override
  public ProtocolSignature getProtocolSignature(String protocol, long clientVersion,
                                         int clientMethodsHash) throws IOException {
    return ProtocolSignature.getProtocolSignature(this, protocol, clientVersion,
                                                  clientMethodsHash);
  }

  void prepareDownloadDirs() throws IOException {
    // Check if temp dir exists
    File temp = new File(tempDir);
    if (!temp.exists()) temp.mkdirs();
    if(!temp.isDirectory()) throw new IOException("Temp Dir: " +
                               tempDir + " is not a directory.");

    // Check if current dir in temp exists
    temp = new File(tempDir + CURRENT_DIR);
    if (!temp.exists()) temp.mkdir();
    if(!temp.isDirectory()) throw new IOException("Current in Temp Dir: " +
                           tempDir + CURRENT_DIR + " is not a directory.");

    // Delete all previously downloaded files
    for (File f: temp.listFiles()) {
      f.delete();
    }
  }

  /**
   * Shutdown snapshot node and attached daemons
   */
  public void shutdown() {
    if (purgeThread != null) {
      WaitingRoomPurger purger = (WaitingRoomPurger) purgeThread.getRunnable();
      purger.shutdown();
    }

    RPC.stopProxy(namenode);
    if (server != null) server.stop();
  }

  /**
   * Shutdown snapshot node and attached daemons
   */
  public void shutdownWaitingRoomPurger() {
    if (purgeThread != null) {
      WaitingRoomPurger purger = (WaitingRoomPurger) purgeThread.getRunnable();
      purger.shutdown();
    }
  }


  // SNAPSHOT PROTOCOL //

  @Override
  public String[] listSnapshots() throws IOException {
    Path ssPath = new Path(ssDir);

    if (!dfs.exists(ssPath)) {
      throw new FileNotFoundException("Snapshot dir doesn't exist");
    }

    FileStatus ssStatus = dfs.getFileStatus(ssPath);
      if (!ssStatus.isDir()) {
        throw new IOException("ssDir " + ssDir  +" is not a directory");
      }

    FileStatus[] files = dfs.listStatus(ssPath);
    List<String> ssIds = new ArrayList<String>();

    // Separate snapshot files
    for (FileStatus ss: files) {
      if (ss.isDir()) continue; // skips dirs
      String name = ss.getPath().getName();
      if (!name.startsWith("dfs_snapshot_")) continue;
      ssIds.add(name.substring(13));
    }

    String[] rtn = new String[ssIds.size()];
    for (int i = 0; i < ssIds.size(); i++) {
      rtn[i] = ssIds.get(i);
    }

    return rtn;
  }

  @Override
  public FileStatus getSnapshotFileStatus(String id) throws IOException {
    Path ss = new Path(ssDir + "/" + SSNAME + id);
    return dfs.getFileStatus(ss);
  }

  @Override
  public boolean deleteSnapshot(String id) throws IOException {
    Path fileToDelete = new Path(ssDir + "/" + SSNAME + id);
    return dfs.delete(fileToDelete, false);
  }

  @Override
  public LocatedBlocksWithMetaInfo[] getLocatedBlocks(String snapshotId,
      String path)
  throws IOException {
    FSImage fsImage = new FSImage();
    FSNamesystem namesystem = new FSNamesystem(fsImage, conf);
    Path ssPath = new Path(ssDir + "/" + SSNAME + snapshotId);
    FSDataInputStream in = dfs.open(ssPath);
    fsImage.loadFSImage(new File(ssPath.toString()), in);
    INode inode = namesystem.dir.getInode(path);

    if (inode == null) {
      throw new IOException("File/dir at " + path +
                            " does not exist in snapshot " + snapshotId);
    }

    List<LocatedBlocksWithMetaInfo> blocks = new ArrayList<LocatedBlocksWithMetaInfo>();
    getAllLocatedBlocks(inode, blocks); // fill blocks with LocatedBlocks for all files

    LocatedBlocksWithMetaInfo[] blocksArr = new LocatedBlocksWithMetaInfo[blocks
        .size()];
    for (int i = 0; i < blocksArr.length; ++i) {
      blocksArr[i] = blocks.get(i);
    }

    fsImage.close();
    return blocksArr;
  }

  @Override
  public void createSnapshot(String snapshotId, boolean updateLeases) throws IOException {
    // Create new SnapshotStore
    SnapshotStorage ssStore = new SnapshotStorage(conf, new File(tempDir));

    // Download image & edit files from namenode
    downloadSnapshotFiles(ssStore);

    // Merge image and edit files
    doMerge(ssStore);

    // Update file lengths for leased files (optional)
    if (updateLeases) {
      updateLeasedFiles(ssStore);
    }

    // Save snapshot
    saveSnapshot(ssStore, snapshotId);
    ssStore.close();
  }

  private void getAllLocatedBlocks(INode inode,
      List<LocatedBlocksWithMetaInfo> blocks)
  throws IOException {
    if (inode.isDirectory()) {
      INodeDirectory dir = (INodeDirectory) inode;
      for (INode child: dir.getChildren()) {
        getAllLocatedBlocks(child, blocks);
      }
    } else {
      INodeFile file = (INodeFile) inode;
      BlockInfo[] fileBlocks = file.getBlocks();
      List<LocatedBlock> lb = new ArrayList<LocatedBlock>();
      for (BlockInfo block: fileBlocks) {
        // DatanodeInfo is unavailable, so set as empty for now
        lb.add(new LocatedBlock(block, new DatanodeInfo[0]));
      }

      LocatedBlocks locatedBlocks =  new LocatedBlocks(
                             file.computeContentSummary().getLength(), // flength
                             lb, // blks
                             false); // isUnderConstruction

      // Update DatanodeInfo from NN
      blocks.add(namenode.updateDatanodeInfo(locatedBlocks));
    }
  }

  void saveSnapshot(SnapshotStorage ssStore, String id) throws IOException {
    // Create new snapshot in temp file
    Path tmpPath = new Path("/tmp/" + SSNAME + id);
    FSDataOutputStream out = dfs.create(tmpPath);
    ssStore.saveSnapshot(tmpPath.toString(), out);
    out.close();

    // Rename snapshot
    Path ssPath = new Path(ssDir + "/" + SSNAME + id);
    if (!dfs.rename(tmpPath, ssPath)) {
      throw new IOException("Could not rename temp snapshot file");
    }
  }

  void doMerge(SnapshotStorage ssStore) throws IOException {
    FSNamesystem namesystem = new FSNamesystem(ssStore, conf);
    ssStore.doMerge();
  }

  /**
   * Create a snapshot with id equals to
   * current system time.
   */
  void createSnapshot() throws IOException {
    createSnapshot(Long.toString(System.currentTimeMillis()), true);
  }

  void createSnapshot(String id) throws IOException {
    createSnapshot(id, true);
  }

  /**
   * Tries to get the most up to date lengths of files under construction.
   */
  void updateLeasedFiles(SnapshotStorage ssStore) throws IOException {
    FSNamesystem fsNamesys = ssStore.getFSNamesystem();
    List<Block> blocksForNN = new ArrayList<Block>();

    leaseUpdateThreadPool = new ThreadPoolExecutor(1, maxLeaseUpdateThreads, 60,
                                                TimeUnit.SECONDS,
                                                new LinkedBlockingQueue<Runnable>());
    ((ThreadPoolExecutor)leaseUpdateThreadPool).allowCoreThreadTimeOut(true);

    // Try to update lengths for leases from DN
    LightWeightLinkedSet<Lease> sortedLeases = fsNamesys.leaseManager.getSortedLeases();
    Iterator<Lease> itr = sortedLeases.iterator();
    while (itr.hasNext()) {
      Lease lease = itr.next();
      for (String path : lease.getPaths()) {
        // Update file lengths using worker threads to increase throughput
        leaseUpdateThreadPool.execute(
                   new LeaseUpdateWorker(conf, path, fsNamesys, blocksForNN));
      }
    }

    try {
      leaseUpdateThreadPool.shutdown();
      // Wait till update tasks finish successfully (max 20 mins?)
      if (!leaseUpdateThreadPool.awaitTermination(1200, TimeUnit.SECONDS)) {
        throw new IOException("Updating lease files failed");
      }
    } catch (InterruptedException e) {
        throw new IOException("Snapshot creation interrupted while updating leased files");
    }

    // Fetch block lengths for renamed/deleted leases from NN
    long[] blockIds = new long[blocksForNN.size()];

    for (int i = 0; i < blocksForNN.size(); ++i) {
      blockIds[i] = blocksForNN.get(i).getBlockId();
    }

    long[] lengths = namenode.getBlockLengths(blockIds);

    for (int i = 0; i < blocksForNN.size(); ++i) {
      if (lengths[i] == -1) {
        // Couldn't update block length, keep preferred length
        LOG.error("Couldn't update length for block " + blocksForNN.get(i));
      } else {
        blocksForNN.get(i).setNumBytes(lengths[i]);
      }
    }
  }

  /**
   * Download fsimage, edits and edits.new files from the name-node.
   * Files will be downloaded in CURRENT_DIR
   * @throws IOException
   */
  void downloadSnapshotFiles(SnapshotStorage ssStore) throws IOException {
    CheckpointSignature start = namenode.getCheckpointSignature();
    CheckpointSignature end = null;
    boolean success;

    do {
      // Clear temp files
      prepareDownloadDirs();

      // get fsimage
      String fileId = "getimage=1";
      File[] srcNames = ssStore.getImageFiles();
      assert srcNames.length == 1 : "No snapshot temporary dir.";
      TransferFsImage.getFileClient(fileServer, fileId, srcNames, false);
      LOG.info("Downloaded file " + srcNames[0].getName() + " size " +
               srcNames[0].length() + " bytes.");

      // get edits file
      fileId = "getedit=1";
      srcNames = ssStore.getEditsFiles();
      assert srcNames.length == 1 : "No snapshot temporary dir.";
      TransferFsImage.getFileClient(fileServer, fileId, srcNames, false);
      LOG.info("Downloaded file " + srcNames[0].getName() + " size " +
               srcNames[0].length() + " bytes.");

      // get edits.new file (only if in the middle of ckpt)
      try {
        fileId = "geteditnew=1";
        srcNames = ssStore.getEditsNewFiles();
        assert srcNames.length == 1 : "No snapshot temporary dir.";
        TransferFsImage.getFileClient(fileServer, fileId, srcNames, false);
        LOG.info("Downloaded file " + srcNames[0].getName() + " size " +
               srcNames[0].length() + " bytes.");
      } catch (FileNotFoundException e) {
        // do nothing
      }

      end = namenode.getCheckpointSignature();

      // Are the downloaded files consistent?
      success = end.checkpointTime == start.checkpointTime &&
                end.checkpointState != CheckpointStates.UPLOAD_DONE;

      start = end;
    } while (!success);
  }

  /**
   * Returns the jetty image server that the Namenode is listening on.
   * @throws IOException
   */
  private String getImageServer() throws IOException {
    URI fsName = FileSystem.getDefaultUri(conf);

    if (!"hdfs".equals(fsName.getScheme())) {
      throw new IOException("This is not a DFS");
    }

    return NetUtils.getServerAddress(conf, "dfs.info.bindAddress",
                             "dfs.info.port", "dfs.http.address");
  }

  static class SnapshotStorage extends FSImage {
    Configuration conf;
    File tempDir;
    DataOutputStream out;

    public SnapshotStorage(Configuration conf, File tempDir) throws IOException {
      super(tempDir);
      this.conf = conf;
      this.tempDir = tempDir;
    }

    @Override
    public boolean isConversionNeeded(StorageDirectory sd) {
      return false;
    }

    /**
     * Merge image and edit log (in memory).
     * Files to merge include fsimage, edits, and possibly edits.new
     * @throws IOException
     */
    void doMerge() throws IOException {
      StorageDirectory sdTemp = null;
      Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE_AND_EDITS);
      if (it.hasNext()) {
        sdTemp = it.next();
      } else {
        throw new IOException("Could not locate snapshot temp directory.");
      }

      loadFSImage(getImageFile(sdTemp, NameNodeFile.IMAGE));
      loadFSEdits(sdTemp);
    }

    /**
     * Writes snapshot to the OutputStream.
     * @param out Stream to write snapshot to
     */
    void saveSnapshot(String dest, DataOutputStream out) throws IOException {
      saveFSImage(dest, out);
    }
  }

  private class LeaseUpdateWorker implements Runnable {
    String path;
    Configuration conf;
    List<Block> blocks;
    FSNamesystem fsNamesys;

    public LeaseUpdateWorker(Configuration conf, String path,
                             FSNamesystem namesystem, List<Block> blocks) {
      this.path = path;
      this.conf = conf;
      this.blocks = blocks;
      this.fsNamesys = namesystem;
    }
 
    @Override
    public void run() {
      boolean error = false;
      INodeFile node = null;
      DFSClient client = null;

      try {
        client = new DFSClient(conf);

  LOG.info("Trying to update lease for file at " + path);

        // verify that path exists in namespace
        node = fsNamesys.dir.getFileINode(path);
        if (node == null) {
          error = true;
        }
        if (!node.isUnderConstruction()) {
          error = true;
        }
      }
      catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        error = true;
      }

      // Could not find inode in FSNamespace, quit now
      if (error) {
        LOG.error("Couldn't update length for leased file at " + path +
                  " because file not in namespace");
  return;
      }

      BlockInfo[] blks = node.getBlocks();

      // If NN has not leased out any block, return
      if (blks.length == 0) return;

      int index = blks.length - 1; // index of last file block

      LOG.info("Block at index " + index + " being written for file at  " +
               path);

      // Pessimistically update last block length from DataNode.
      // File could have been renamed, and a new file created in its place.
      try {
        DFSInputStream stm = client.open(path);
        DFSLocatedBlocks locBlks = stm.fetchLocatedBlocks();

        if (locBlks.locatedBlockCount() >= blks.length) {
          if (blks[index] != null && locBlks.get(index) != null) {
            if (blks[index].getBlockId() == locBlks.get(index).getBlock().getBlockId()) {
              blks[index].setNumBytes(locBlks.get(index).getBlock().getNumBytes());
              return;
            }
          }
        }

        stm.close();
        client.close(); // close dfs client
      }
      catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
      }

      // If file was renamed/deleted, set block length to preferred size
      // and add it to list of blocks which we should try to update from NN
      LOG.info("Couldn't update block " + blks[index] + " for file " +
               "at " + path + " from DN. Setting length to preferred length " +
               "and queuing block to be checked from NN for updated length.");
      blks[index].setNumBytes(node.getPreferredBlockSize());

      synchronized(blocks) {     
        blocks.add(blks[index]);
      }
    }
  }
}
TOP

Related Classes of org.apache.hadoop.hdfs.server.namenode.SnapshotNode

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.