Package org.apache.hadoop.mapred

Source Code of org.apache.hadoop.mapred.TaskLogsMonitor$PerJVMInfo

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapred;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.mapred.TaskLog;
import org.apache.hadoop.mapred.TaskLog.LogName;
import org.apache.hadoop.mapred.TaskLog.LogFileDetail;
import org.apache.hadoop.util.StringUtils;

class TaskLogsMonitor extends Thread {
  static final Log LOG = LogFactory.getLog(TaskLogsMonitor.class);

  long mapRetainSize, reduceRetainSize;

  public TaskLogsMonitor(long mapRetSize, long reduceRetSize) {
    mapRetainSize = mapRetSize;
    reduceRetainSize = reduceRetSize;
    LOG.info("Starting logs' monitor with mapRetainSize=" + mapRetainSize
        + " and reduceRetainSize=" + reduceRetSize);
  }

  /**
   * The list of tasks that have finished and so need their logs to be
   * truncated.
   */
  private Map<TaskAttemptID, PerJVMInfo> finishedJVMs =
      new HashMap<TaskAttemptID, PerJVMInfo>();

  private static final int DEFAULT_BUFFER_SIZE = 4 * 1024;

  static final int MINIMUM_RETAIN_SIZE_FOR_TRUNCATION = 0;

  private static class PerJVMInfo {

    List<Task> allAttempts;

    public PerJVMInfo(List<Task> allAtmpts) {
      this.allAttempts = allAtmpts;
    }
  }

  /**
   * Process(JVM/debug script) has finished. Asynchronously truncate the logs of
   * all the corresponding tasks to the configured limit. In case of JVM, both
   * the firstAttempt as well as the list of all attempts that ran in the same
   * JVM have to be passed. For debug script, the (only) attempt itself should
   * be passed as both the firstAttempt as well as the list of attempts.
   *
   * @param firstAttempt
   * @param isTaskCleanup
   */
  void addProcessForLogTruncation(TaskAttemptID firstAttempt,
      List<Task> allAttempts) {
    LOG.info("Adding the jvm with first-attempt " + firstAttempt
        + " for logs' truncation");
    PerJVMInfo lInfo = new PerJVMInfo(allAttempts);
    synchronized (finishedJVMs) {
      finishedJVMs.put(firstAttempt, lInfo);
      finishedJVMs.notify();
    }
  }

  /**
   * Process the removed task's logs. This involves truncating them to
   * retainSize.
   */
  void truncateLogs(TaskAttemptID firstAttempt, PerJVMInfo lInfo) {

    // Read the log-file details for all the attempts that ran in this JVM
    Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails;
    try {
      taskLogFileDetails = getAllLogsFileDetails(lInfo.allAttempts);
    } catch (IOException e) {
      LOG.warn(
          "Exception in truncateLogs while getting allLogsFileDetails()."
              + " Ignoring the truncation of logs of this process.", e);
      return;
    }

    Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails =
        new HashMap<Task, Map<LogName, LogFileDetail>>();

    File attemptLogDir = TaskLog.getBaseDir(firstAttempt.toString());

    FileWriter tmpFileWriter;
    FileReader logFileReader;
    // Now truncate file by file
    logNameLoop: for (LogName logName : LogName.values()) {

      File logFile = TaskLog.getTaskLogFile(firstAttempt, logName);
      if (!logFile.exists()) {
        continue;
      }
      // //// Optimization: if no task is over limit, just skip truncation-code
      if (logFile.exists()
          && !isTruncationNeeded(lInfo, taskLogFileDetails, logName)) {
        LOG.debug("Truncation is not needed for "
            + logFile.getAbsolutePath());
        continue;
      }
      // //// End of optimization

      // Truncation is needed for this log-file. Go ahead now.
      File tmpFile = new File(attemptLogDir, "truncate.tmp");
      try {
        tmpFileWriter = new FileWriter(tmpFile);
      } catch (IOException ioe) {
        LOG.warn("Cannot open " + tmpFile.getAbsolutePath()
            + " for writing truncated log-file "
            + logFile.getAbsolutePath()
            + ". Continuing with other log files. ", ioe);
        continue;
      }

      try {
        logFileReader = new FileReader(logFile);
      } catch (FileNotFoundException fe) {
        LOG.warn("Cannot open " + logFile.getAbsolutePath()
            + " for reading. Continuing with other log files");
        if (!tmpFile.delete()) {
          LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
        }
        continue;
      }

      long newCurrentOffset = 0;
      // Process each attempt from the ordered list passed.
      for (Task task : lInfo.allAttempts) {

        // Truncate the log files of this task-attempt so that only the last
        // retainSize many bytes of this log file is retained and the log
        // file is reduced in size saving disk space.
        long retainSize =
            (task.isMapTask() ? mapRetainSize : reduceRetainSize);
        LogFileDetail newLogFileDetail = new LogFileDetail();
        try {
          newLogFileDetail =
              truncateALogFileOfAnAttempt(task.getTaskID(),
                  taskLogFileDetails.get(task).get(logName), retainSize,
                  tmpFileWriter, logFileReader);
        } catch (IOException ioe) {
          LOG.warn("Cannot truncate the log file "
              + logFile.getAbsolutePath()
              + ". Caught exception while handling " + task.getTaskID(),
              ioe);
          // revert back updatedTaskLogFileDetails
          revertIndexFileInfo(lInfo, taskLogFileDetails,
              updatedTaskLogFileDetails, logName);
          if (!tmpFile.delete()) {
            LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
          }
          continue logNameLoop;
        }

        // Track information for updating the index file properly.
        // Index files don't track DEBUGOUT and PROFILE logs, so skip'em.
        if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) {
          if (!updatedTaskLogFileDetails.containsKey(task)) {
            updatedTaskLogFileDetails.put(task,
                new HashMap<LogName, LogFileDetail>());
          }
          // newLogFileDetail already has the location and length set, just
          // set the start offset now.
          newLogFileDetail.start = newCurrentOffset;
          updatedTaskLogFileDetails.get(task).put(logName, newLogFileDetail);
          newCurrentOffset += newLogFileDetail.length;
        }
      }

      try {
        tmpFileWriter.close();
      } catch (IOException ioe) {
        LOG.warn("Couldn't close the tmp file " + tmpFile.getAbsolutePath()
            + ". Deleting it.", ioe);
        revertIndexFileInfo(lInfo, taskLogFileDetails,
            updatedTaskLogFileDetails, logName);
        if (!tmpFile.delete()) {
          LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
        }
        continue;
      }

      if (!tmpFile.renameTo(logFile)) {
        // If the tmpFile cannot be renamed revert back
        // updatedTaskLogFileDetails to maintain the consistency of the
        // original log file
        revertIndexFileInfo(lInfo, taskLogFileDetails,
            updatedTaskLogFileDetails, logName);
        if (!tmpFile.delete()) {
          LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
        }
      }
    }

    // Update the index files
    updateIndicesAfterLogTruncation(firstAttempt, updatedTaskLogFileDetails);
  }

  /**
   * @param lInfo
   * @param taskLogFileDetails
   * @param updatedTaskLogFileDetails
   * @param logName
   */
  private void revertIndexFileInfo(PerJVMInfo lInfo,
      Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails,
      Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails,
      LogName logName) {
    if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) {
      for (Task task : lInfo.allAttempts) {
        if (!updatedTaskLogFileDetails.containsKey(task)) {
          updatedTaskLogFileDetails.put(task,
              new HashMap<LogName, LogFileDetail>());
        }
        updatedTaskLogFileDetails.get(task).put(logName,
            taskLogFileDetails.get(task).get(logName));
      }
    }
  }

  /**
   * Get the logFileDetails of all the list of attempts passed.
   *
   * @param lInfo
   * @return a map of task to the log-file detail
   * @throws IOException
   */
  private Map<Task, Map<LogName, LogFileDetail>> getAllLogsFileDetails(
      final List<Task> allAttempts) throws IOException {
    Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails =
        new HashMap<Task, Map<LogName, LogFileDetail>>();
    for (Task task : allAttempts) {
      Map<LogName, LogFileDetail> allLogsFileDetails;
      allLogsFileDetails =
          TaskLog.getAllLogsFileDetails(task.getTaskID(),
              task.isTaskCleanupTask());
      taskLogFileDetails.put(task, allLogsFileDetails);
    }
    return taskLogFileDetails;
  }

  /**
   * Check if truncation of logs is needed for the given jvmInfo. If all the
   * tasks that ran in a JVM are within the log-limits, then truncation is not
   * needed. Otherwise it is needed.
   *
   * @param lInfo
   * @param taskLogFileDetails
   * @param logName
   * @return true if truncation is needed, false otherwise
   */
  private boolean isTruncationNeeded(PerJVMInfo lInfo,
      Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails,
      LogName logName) {
    boolean truncationNeeded = false;
    LogFileDetail logFileDetail = null;
    for (Task task : lInfo.allAttempts) {
      long taskRetainSize =
          (task.isMapTask() ? mapRetainSize : reduceRetainSize);
      Map<LogName, LogFileDetail> allLogsFileDetails =
          taskLogFileDetails.get(task);
      logFileDetail = allLogsFileDetails.get(logName);
      if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION
          && logFileDetail.length > taskRetainSize) {
        truncationNeeded = true;
        break;
      }
    }
    return truncationNeeded;
  }

  /**
   * Truncate the log file of this task-attempt so that only the last retainSize
   * many bytes of each log file is retained and the log file is reduced in size
   * saving disk space.
   *
   * @param taskID Task whose logs need to be truncated
   * @param oldLogFileDetail contains the original log details for the attempt
   * @param taskRetainSize retain-size
   * @param tmpFileWriter New log file to write to. Already opened in append
   *          mode.
   * @param logFileReader Original log file to read from.
   * @return
   * @throws IOException
   */
  private LogFileDetail truncateALogFileOfAnAttempt(
      final TaskAttemptID taskID, final LogFileDetail oldLogFileDetail,
      final long taskRetainSize, final FileWriter tmpFileWriter,
      final FileReader logFileReader) throws IOException {
    LogFileDetail newLogFileDetail = new LogFileDetail();

    // ///////////// Truncate log file ///////////////////////

    // New location of log file is same as the old
    newLogFileDetail.location = oldLogFileDetail.location;
    if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION
        && oldLogFileDetail.length > taskRetainSize) {
      LOG.info("Truncating logs for " + taskID + " from "
          + oldLogFileDetail.length + "bytes to " + taskRetainSize
          + "bytes.");
      newLogFileDetail.length = taskRetainSize;
    } else {
      LOG.info("No truncation needed for " + taskID + " length is "
          + oldLogFileDetail.length + " retain size " + taskRetainSize
          + "bytes.");
      newLogFileDetail.length = oldLogFileDetail.length;
    }
    long charsSkipped =
        logFileReader.skip(oldLogFileDetail.length
            - newLogFileDetail.length);
    if (charsSkipped != oldLogFileDetail.length - newLogFileDetail.length) {
      throw new IOException("Erroneously skipped " + charsSkipped
          + " instead of the expected "
          + (oldLogFileDetail.length - newLogFileDetail.length));
    }
    long alreadyRead = 0;
    while (alreadyRead < newLogFileDetail.length) {
      char tmpBuf[]; // Temporary buffer to read logs
      if (newLogFileDetail.length - alreadyRead >= DEFAULT_BUFFER_SIZE) {
        tmpBuf = new char[DEFAULT_BUFFER_SIZE];
      } else {
        tmpBuf = new char[(int) (newLogFileDetail.length - alreadyRead)];
      }
      int bytesRead = logFileReader.read(tmpBuf);
      if (bytesRead < 0) {
        break;
      } else {
        alreadyRead += bytesRead;
      }
      tmpFileWriter.write(tmpBuf);
    }
    // ////// End of truncating log file ///////////////////////

    return newLogFileDetail;
  }

  /**
   * Truncation of logs is done. Now sync the index files to reflect the
   * truncated sizes.
   *
   * @param firstAttempt
   * @param updatedTaskLogFileDetails
   */
  private void updateIndicesAfterLogTruncation(TaskAttemptID firstAttempt,
      Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails) {
    for (Entry<Task, Map<LogName, LogFileDetail>> entry :
                                updatedTaskLogFileDetails.entrySet()) {
      Task task = entry.getKey();
      Map<LogName, LogFileDetail> logFileDetails = entry.getValue();
      Map<LogName, Long[]> logLengths = new HashMap<LogName, Long[]>();
      // set current and previous lengths
      for (LogName logName : TaskLog.LOGS_TRACKED_BY_INDEX_FILES) {
        logLengths.put(logName, new Long[] { Long.valueOf(0L),
            Long.valueOf(0L) });
        LogFileDetail lfd = logFileDetails.get(logName);
        if (lfd != null) {
          // Set previous lengths
          logLengths.get(logName)[0] = Long.valueOf(lfd.start);
          // Set current lengths
          logLengths.get(logName)[1] = Long.valueOf(lfd.start + lfd.length);
        }
      }
      try {
        TaskLog.writeToIndexFile(firstAttempt, task.getTaskID(),
            task.isTaskCleanupTask(), logLengths);
      } catch (IOException ioe) {
        LOG.warn("Exception in updateIndicesAfterLogTruncation : "
            + StringUtils.stringifyException(ioe));
        LOG.warn("Exception encountered while updating index file of task "
            + task.getTaskID()
            + ". Ignoring and continuing with other tasks.");
      }
    }
  }

  /**
   *
   * @throws IOException
   */
  void monitorTaskLogs() throws IOException {

    Map<TaskAttemptID, PerJVMInfo> tasksBeingTruncated =
      new HashMap<TaskAttemptID, PerJVMInfo>();

    // Start monitoring newly added finishedJVMs
    synchronized (finishedJVMs) {
      tasksBeingTruncated.clear();
      tasksBeingTruncated.putAll(finishedJVMs);
      finishedJVMs.clear();
    }

    for (Entry<TaskAttemptID, PerJVMInfo> entry :
                tasksBeingTruncated.entrySet()) {
      truncateLogs(entry.getKey(), entry.getValue());
    }
  }

  @Override
  public void run() {

    while (true) {
      try {
        monitorTaskLogs();
        try {
          synchronized (finishedJVMs) {
            while (finishedJVMs.isEmpty()) {
              finishedJVMs.wait();
            }
          }
        } catch (InterruptedException e) {
          LOG.warn(getName() + " is interrupted. Returning");
          return;
        }
      } catch (Throwable e) {
        LOG.warn(getName()
            + " encountered an exception while monitoring : "
            + StringUtils.stringifyException(e));
        LOG.info("Ingoring the exception and continuing monitoring.");
      }
    }
  }
}
TOP

Related Classes of org.apache.hadoop.mapred.TaskLogsMonitor$PerJVMInfo

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.