Package org.apache.hadoop.chukwa.extraction.archive

Source Code of org.apache.hadoop.chukwa.extraction.archive.ChukwaArchiveManager

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.chukwa.extraction.archive;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;

import org.apache.hadoop.chukwa.conf.ChukwaConfiguration;
import org.apache.hadoop.chukwa.extraction.CHUKWA_CONSTANT;
import org.apache.hadoop.chukwa.util.DaemonWatcher;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

public class ChukwaArchiveManager implements CHUKWA_CONSTANT {
  static Logger log = Logger.getLogger(ChukwaArchiveManager.class);
  static SimpleDateFormat day = new java.text.SimpleDateFormat("yyyyMMdd");
 
  static final  int ONE_HOUR = 60 * 60 * 1000;
  static final int ONE_DAY = 24*ONE_HOUR;
  static final int MAX_FILES = 500;
 
  protected ChukwaConfiguration conf = null;
  protected FileSystem fs = null;
  protected boolean isRunning = true;
 
  public ChukwaArchiveManager() throws Exception {
    conf = new ChukwaConfiguration();
    init();
  }

  public ChukwaArchiveManager(ChukwaConfiguration conf) throws Exception {
    conf = new ChukwaConfiguration();
    init();
  }
 
  protected void init() throws IOException, URISyntaxException {
    String fsName = conf.get(WRITER_HDFS_FILESYSTEM_FIELD);
    fs = FileSystem.get(new URI(fsName), conf);
  }

  public static void main(String[] args) throws Exception {
    DaemonWatcher.createInstance("ArchiveManager");
   
    ChukwaArchiveManager manager = new ChukwaArchiveManager();
    manager.start();
  }

  public void shutdown() {
    this.isRunning = false;
  }
 
  public void start() throws Exception {
   
    String chukwaRootDir = conf.get(CHUKWA_ROOT_DIR_FIELD, DEFAULT_DEMUX_ROOT_DIR_NAME);
    if ( ! chukwaRootDir.endsWith("/") ) {
      chukwaRootDir += "/";
    }
    log.info("chukwaRootDir:" + chukwaRootDir);
   
    String archiveRootDir = conf.get(CHUKWA_ARCHIVE_DIR_FIELD, chukwaRootDir +DEFAULT_CHUKWA_DATASINK_DIR_NAME);
    if ( ! archiveRootDir.endsWith("/") ) {
      archiveRootDir += "/";
    }
    log.info("archiveDir:" + archiveRootDir);
    Path pArchiveRootDir = new Path(archiveRootDir);
    setup(pArchiveRootDir);
   
    String archivesRootProcessingDir = chukwaRootDir + DEFAULT_ARCHIVES_PROCESSING_DIR_NAME;
    // String archivesErrorDir = archivesRootProcessingDir + DEFAULT_ARCHIVES_IN_ERROR_DIR_NAME;
    String archivesMRInputDir = archivesRootProcessingDir + DEFAULT_ARCHIVES_MR_INPUT_DIR_NAME;
    String archivesMROutputDir = archivesRootProcessingDir+ DEFAULT_ARCHIVES_MR_OUTPUT_DIR_NAME;
    String finalArchiveOutput = chukwaRootDir + DEFAULT_FINAL_ARCHIVES;


   
    Path pDailyRawArchivesInput = new Path(archiveRootDir);
    Path pArchivesMRInputDir = new Path(archivesMRInputDir);
    Path pArchivesRootProcessingDir = new Path(archivesRootProcessingDir);
    Path pFinalArchiveOutput =  new Path(finalArchiveOutput);
   
   
    if (!archivesMRInputDir.endsWith("/")) {
      archivesMRInputDir +="/";
    }
    setup( pArchivesRootProcessingDir );
    setup( pDailyRawArchivesInput );
    setup( pFinalArchiveOutput );
   
    int errorCount = 0;
   
    long lastRun = 0l;
   
    while (isRunning) {
      try {
       
        if (errorCount >= 4 ) {
          // it's better to exit, Watchdog will re-start it
          log.warn("Too many error - bail out!");
          DaemonWatcher.bailout(-1);
        }
        // /chukwa/archives/<YYYYMMDD>/dataSinkDirXXX
        //  to
        // /chukwa/archives/final/<YYYYMMDD>_<TS>
       
        if (fs.exists(pArchivesMRInputDir)) {
          FileStatus[] days = fs.listStatus(pArchivesMRInputDir);
          if (days.length > 0) {
            log.info("reprocessing current Archive input" +  days[0].getPath());
           
            runArchive(archivesMRInputDir + days[0].getPath().getName() + "/",archivesMROutputDir,finalArchiveOutput)
            continue;
          }
        }
       
       
        log.info("Raw Archive dir:" + pDailyRawArchivesInput);
        long now = System.currentTimeMillis();
        int currentDay = Integer.parseInt(day.format(System.currentTimeMillis()));
        FileStatus[] daysInRawArchiveDir = fs.listStatus(pDailyRawArchivesInput);
       
        if (daysInRawArchiveDir.length == 0 ) {
          log.debug( pDailyRawArchivesInput + " is empty, going to sleep for 1 minute");
          Thread.sleep(1 * 60 * 1000);
          continue;
        }
        // We don't want to process DataSink file more than once every 2 hours
        // for current day
        if (daysInRawArchiveDir.length == 1 ) {
          int workingDay = Integer.parseInt(daysInRawArchiveDir[0].getPath().getName());
          long nextRun = lastRun + (2*ONE_HOUR) - (1*60*1000);// 2h -1min
          if (workingDay == currentDay && now < nextRun) {
            log.info("lastRun < 2 hours so skip archive for now, going to sleep for 30 minutes, currentDate is:" + new java.util.Date());
            Thread.sleep(30 * 60 * 1000);
            continue;
          }
        }
       
        String dayArchivesMRInputDir = null;
        for (FileStatus fsDay : daysInRawArchiveDir) {
          dayArchivesMRInputDir = archivesMRInputDir + fsDay.getPath().getName() + "/";
          processDay(fsDay, dayArchivesMRInputDir,archivesMROutputDir, finalArchiveOutput);
          lastRun = now;
        }
       
      }catch (Throwable e) {
        errorCount ++;
        e.printStackTrace();
        log.warn(e);
      }
     
    }
   
  }
 
  public void processDay(FileStatus fsDay, String archivesMRInputDir,
      String archivesMROutputDir,String finalArchiveOutput) throws Exception {
    FileStatus[] dataSinkDirsInRawArchiveDir = fs.listStatus(fsDay.getPath());
    long now = System.currentTimeMillis();
   
    int currentDay = Integer.parseInt(day.format(System.currentTimeMillis()));
    int workingDay = Integer.parseInt(fsDay.getPath().getName());
   
    long oneHourAgo = now -  ONE_HOUR;
    if (dataSinkDirsInRawArchiveDir.length == 0 && workingDay < currentDay) {
      fs.delete(fsDay.getPath(),false);
      log.info("deleting raw dataSink dir for day:" + fsDay.getPath().getName());
      return;
    }
   
    int fileCount = 0;
    for (FileStatus fsDataSinkDir : dataSinkDirsInRawArchiveDir) {
      long modificationDate = fsDataSinkDir.getModificationTime();
      if (modificationDate < oneHourAgo) {
        log.info("processDay,modificationDate:" + modificationDate +", adding: " + fsDataSinkDir.getPath() );
        fileCount += fs.listStatus(fsDataSinkDir.getPath()).length;
        moveDataSinkFilesToArchiveMrInput(fsDataSinkDir,archivesMRInputDir);
        // process no more than MAX_FILES directories
        if (fileCount >= MAX_FILES) {
          log.info("processDay, reach capacity");
          runArchive(archivesMRInputDir,archivesMROutputDir,finalArchiveOutput)
          fileCount = 0;
        } else {
          log.info("processDay,modificationDate:" + modificationDate +", skipping: " + fsDataSinkDir.getPath() );
        }
      }
    }   
  }
 
  public void runArchive(String archivesMRInputDir,String archivesMROutputDir,
      String finalArchiveOutput) throws Exception {
    String[] args = new String[3];
    args[0] = "Stream";
    args[1] = archivesMRInputDir + "*/*.done" ;
    args[2] = archivesMROutputDir;
   
    Path pArchivesMRInputDir = new Path(archivesMRInputDir);
    Path pArchivesMROutputDir = new Path(archivesMROutputDir);

   
    if (fs.exists(pArchivesMROutputDir)) {
      log.warn("Deleteing mroutput dir for archive ...");
      fs.delete(pArchivesMROutputDir, true);
    }
   
    log.info("ChukwaArchiveManager processing :" + args[1] + " going to output to " + args[2] );
    int res = ToolRunner.run(this.conf, new ChukwaArchiveBuilder(),args);
    log.info("Archive result: " + res);
    if (res != 0) {
      throw new Exception("Archive result != 0");
    }
  
    if (!finalArchiveOutput.endsWith("/")) {
      finalArchiveOutput +="/";
    }
    String day = pArchivesMRInputDir.getName();
    finalArchiveOutput += day;
    Path pDay = new Path(finalArchiveOutput);
    setup(pDay);
   
    finalArchiveOutput += "/archive_" + System.currentTimeMillis();
    Path pFinalArchiveOutput = new Path(finalArchiveOutput);
   
    log.info("Final move: moving " + pArchivesMROutputDir + " to " + pFinalArchiveOutput);
   
    if (fs.rename(pArchivesMROutputDir, pFinalArchiveOutput ) ) {
      log.info("deleting " + pArchivesMRInputDir);
      fs.delete(pArchivesMRInputDir, true);
    } else {
      log.warn("move to final archive folder failed!");
    }
   

   
  }
 
  public void moveDataSinkFilesToArchiveMrInput(FileStatus fsDataSinkDir,
      String archivesMRInputDir) throws IOException {
   
    if (!archivesMRInputDir.endsWith("/")) {
      archivesMRInputDir +="/";
    }
   
    Path pArchivesMRInputDir = new Path(archivesMRInputDir);
    setup(pArchivesMRInputDir);
    fs.rename(fsDataSinkDir.getPath(), pArchivesMRInputDir);
    log.info("moving " + fsDataSinkDir.getPath() + " to " + pArchivesMRInputDir);
  }
 
  /**
   * Create directory if !exists
   * @param directory
   * @throws IOException
   */
  protected void setup(Path directory) throws IOException {
     if ( ! fs.exists(directory)) {
       fs.mkdirs(directory);
     }
  }
}
TOP

Related Classes of org.apache.hadoop.chukwa.extraction.archive.ChukwaArchiveManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.