Package com.linkedin.whiteelephant.analysis

Source Code of com.linkedin.whiteelephant.analysis.ComputeUsagePerHour$TheReducer

/*
* Copyright 2012 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.linkedin.whiteelephant.analysis;

import java.io.IOException;
import java.util.Calendar;
import java.util.List;
import java.util.ArrayList;
import java.util.Properties;
import java.util.TimeZone;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyValueInputFormat;
import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.log4j.Logger;

import com.linkedin.whiteelephant.analysis.AttemptStatsKey;
import com.linkedin.whiteelephant.analysis.AttemptStatsValue;
import com.linkedin.whiteelephant.analysis.TaskStatus;
import com.linkedin.whiteelephant.analysis.TaskType;
import com.linkedin.whiteelephant.parsing.LogData;
import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob;
import com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJobExecutor;

import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

public class ComputeUsagePerHour
{
  private static final String REDUCE_SHUFFLE_BYTES = "REDUCE_SHUFFLE_BYTES";
  private static final String CPU_MILLISECONDS = "CPU_MILLISECONDS";
  private static final String SPILLED_RECORDS = "SPILLED_RECORDS";
 
  private final Logger _log;
  private final FileSystem _fs;
  private final Properties _props;
  private final String _name;
 
  private final static TimeZone timeZone = TimeZone.getTimeZone("GMT");
 
  private final String _jobsOutputPathRoot;
  private final String _usageOutputPathRoot;
  private final boolean _incremental;
  private final int _numDaysForced;
 
  public ComputeUsagePerHour(String name, Properties props) throws IOException
  {
    _log = Logger.getLogger(name);
    _name = name;
    _props = props;
    _fs = FileSystem.get(StagedOutputJob.createConfigurationFromProps(_props));
   
    if (_props.get("jobs.output.path") == null) {
      throw new IllegalArgumentException("joined.output.path is not specified.");
    }
   
    if (_props.get("usage.output.path") == null) {
      throw new IllegalArgumentException("usage.output.path is not specified.");
    }
   
    if (_props.get("num.days.forced") == null) {
      throw new IllegalArgumentException("num.days.forced is not specified");
    }
   
    _usageOutputPathRoot = (String)_props.get("usage.output.path");
    _jobsOutputPathRoot = (String)_props.get("jobs.output.path");
    _incremental = Boolean.parseBoolean((String)_props.get("incremental"));
    _numDaysForced = Integer.parseInt((String)_props.get("num.days.forced"));
  }
 
  public void execute(StagedOutputJobExecutor executor) throws IOException, InterruptedException, ExecutionException
  {
    FileStatus[] clusterPaths = _fs.listStatus(new Path(_jobsOutputPathRoot));
   
    for (FileStatus clusterPath : clusterPaths)
    {
      String clusterName = clusterPath.getPath().getName();
     
      FileStatus[] yearPaths = _fs.listStatus(clusterPath.getPath());
     
      for (FileStatus yearPath : yearPaths)     
      {
        String year = yearPath.getPath().getName();
       
        System.out.println("Searching under " + yearPath.getPath());
        FileStatus[] dayPaths = _fs.listStatus(yearPath.getPath());
        for (FileStatus dayPath : dayPaths)
        {
          String day = dayPath.getPath().getName();
         
          String pattern = dayPath.getPath().toString() + "/*.avro";
         
          String outputPathForDay = String.format("%s/%s/%s/%s",_usageOutputPathRoot,clusterName,year,day);
         
          FileStatus[] inputFiles = _fs.globStatus(new Path(pattern));
         
          StringBuilder msg = new StringBuilder(pattern + " => " + inputFiles.length + " files");
         
          if (inputFiles.length > 0)
          {
            Calendar cal = Calendar.getInstance(timeZone);
           
            long nowMillis = cal.getTimeInMillis();
           
            cal.set(Integer.parseInt(year), Integer.parseInt(day.substring(0, 2)) - 1, Integer.parseInt(day.substring(2, 4)));
           
            long thenMillis = cal.getTimeInMillis();
           
            double elapsedDays = Math.max(0.0, ((double)(nowMillis - thenMillis))/(24*3600*1000));
           
            if (!_incremental || !_fs.exists(new Path(outputPathForDay)) || elapsedDays < _numDaysForced)
            {
              long totalLength = 0;
              for (FileStatus stat : inputFiles)
              {        
                totalLength += stat.getLen();
              }
             
              msg.append(String.format(", %s MB",totalLength/1024/1024));           
              System.out.println(msg);
             
              // one reducer per 1 GB
              int numReducers = (int)Math.ceil(((double)totalLength)/1024/1024/1024);
             
              submitJob(executor, pattern, outputPathForDay, clusterName, year, day, numReducers);
            }
            else if (_incremental && _fs.exists(new Path(outputPathForDay)))
            {
              msg.append(" (skipping)");
              System.out.println(msg);
            }
          }
        }
       
        executor.waitForCompletion();
      }
    }
  }
 
  private void submitJob(StagedOutputJobExecutor executor, String inputPattern, String output, String clusterName, String year, String day, int numReducers)
  {
    List<String> inputPaths = new ArrayList<String>();
   
    inputPaths.add(inputPattern);
   
    final StagedOutputJob job = StagedOutputJob.createStagedJob(
      _props,
      _name + "-" + "usage-per-hour-" + clusterName + "-" + year + "-" + day,
      inputPaths,
      "/tmp" + output,
      output,
      _log);
   
    final Configuration conf = job.getConfiguration();
   
    conf.set("cluster.name", clusterName);
               
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
   
    job.setInputFormatClass(AvroKeyValueInputFormat.class);
    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
   
    AvroJob.setInputKeySchema(job, Schema.create(Type.STRING));
    AvroJob.setInputValueSchema(job, LogData.SCHEMA$);
   
    AvroJob.setMapOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
    AvroJob.setMapOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
   
    AvroJob.setOutputKeySchema(job, AttemptStatsKey.SCHEMA$);
    AvroJob.setOutputValueSchema(job, AttemptStatsValue.SCHEMA$);
   
    job.setNumReduceTasks(numReducers);
   
    job.setMapperClass(ComputeUsagePerHour.TheMapper.class);
    job.setReducerClass(ComputeUsagePerHour.TheReducer.class);
   
    executor.submit(job);
  }
 
  public static class TheMapper extends Mapper<AvroKey<String>, AvroValue<LogData>, AvroWrapper<AttemptStatsKey>, AvroWrapper<AttemptStatsValue>>
  {
    private String clusterName;
   
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      System.out.println("Setting up reducer");
      Configuration conf = context.getConfiguration();
      clusterName = conf.get("cluster.name");
      System.out.println("Got cluster " + clusterName);
      super.setup(context);
    }
   
    @Override
    protected void map(AvroKey<String> key, AvroValue<LogData> value, Context context) throws java.io.IOException, java.lang.InterruptedException
    {
      LogData data = value.datum();
     
      if (data.getEntry() != null && data.getEntry() instanceof com.linkedin.whiteelephant.parsing.Job)
      {
        com.linkedin.whiteelephant.parsing.Job job = (com.linkedin.whiteelephant.parsing.Job)data.getEntry();
        for (com.linkedin.whiteelephant.parsing.Task task : job.getTasks())
        {
          for (com.linkedin.whiteelephant.parsing.Attempt attempt : task.getAttempts())
          {
            if (attempt.getTaskStatus() == null)
            {
              System.out.println("Status null for job " + attempt.getJobId() + " attempt " + attempt.getTaskAttemptId());           
              context.getCounter("Job Analysis", "Status null").increment(1);           
              continue;
            }
            else
            {
              context.getCounter("Job Analysis", "Status " + attempt.getTaskStatus()).increment(1);
            }
           
            if (attempt.getStartTime() == null || attempt.getFinishTime() == null)
            {
              System.out.println("Missing either startTime or finishTime");
              context.getCounter("Job Analysis", "Missing start or finish").increment(1);
              continue;
            }
           
            if (!(attempt.getStartTime() > 0 && attempt.getFinishTime() > 0))
            {
              System.out.println("Either startTime or finishTime is not positive");
              context.getCounter("Job Analysis", "Not positive start or finish").increment(1);
              continue;
            }

            AttemptStatsKey statsKey = new AttemptStatsKey();
            statsKey.setCluster(clusterName);
            statsKey.setExcess(((com.linkedin.whiteelephant.parsing.DerivedAttemptData)attempt.getDerived()).getExcess());
            statsKey.setStatus(TaskStatus.valueOf(attempt.getTaskStatus().toString()));
            statsKey.setType(TaskType.valueOf(attempt.getType().toString().toUpperCase()));
            statsKey.setUser(job.getUser());
           
            writeStats(statsKey, attempt, context);
          }
        }
      }       
    }
   
    private void writeStats(AttemptStatsKey key, com.linkedin.whiteelephant.parsing.Attempt attempt, Context context) throws IOException, InterruptedException
    {     
      Long start = attempt.getStartTime();
      Long end = attempt.getFinishTime();
     
      if (end < start)
      {
        throw new RuntimeException(String.format("finishTime %s is less than startTime %s", end, start));
      }
     
      TimeUnit unit = TimeUnit.HOURS;
      Long currentTime = start;
     
      key.setUnit(com.linkedin.whiteelephant.analysis.TimeUnit.HOURS);
     
      while (currentTime < end)
      {       
        Calendar currentUnitStart = getCalendarForTime(unit, currentTime);
        Calendar currentUnitEnd = getCalendarForTime(unit, currentTime);
       
        if (unit == TimeUnit.HOURS)
        {         
          currentUnitEnd.add(Calendar.HOUR, 1);
        }
        else
        {
          throw new RuntimeException("Unsupported time unit: " + unit);
        }
       
        long nextMillis = Math.min(currentUnitEnd.getTimeInMillis(),end);
       
        double percentOfTotal = (nextMillis - currentTime)/((double)(end - start));  
       
        AttemptStatsValue value = new AttemptStatsValue();
               
        value.setElapsedMinutes((nextMillis - currentTime)/1000.0/60.0);
       
        if (attempt.getCounters().get(CPU_MILLISECONDS) != null)
        {
          value.setCpuMinutes(percentOfTotal * attempt.getCounters().get(CPU_MILLISECONDS)/1000.0/60.0);
        }
       
        if (attempt.getCounters().get(SPILLED_RECORDS) != null)
        {
          value.setSpilledRecords((long)(percentOfTotal * attempt.getCounters().get(SPILLED_RECORDS)));
        }
       
        if (attempt.getCounters().get(REDUCE_SHUFFLE_BYTES) != null)
        {
          value.setReduceShuffleBytes(attempt.getCounters().get(REDUCE_SHUFFLE_BYTES));
        }
               
        key.setTime(currentUnitStart.getTimeInMillis());
       
        if ((key.getTime() + unit.toMillis(1)) >= start && start >= key.getTime())
        {
          value.setStarted(1);
        }
       
        if ((key.getTime() + unit.toMillis(1)) >= end && end >= key.getTime())
        {
          value.setFinished(1);
        }
       
        currentTime = nextMillis;
       
        context.write(new AvroKey<AttemptStatsKey>(key), new AvroValue<AttemptStatsValue>(value));
      }
    }
       
    private static Calendar getCalendarForTime(TimeUnit unit, Long time)
    {
      Calendar cal = Calendar.getInstance(timeZone);
      cal.setTimeInMillis(time);       

      if (unit == TimeUnit.HOURS)
      {
        int dstOffset = cal.get(Calendar.DST_OFFSET);
       
        // zero these out so we can advance to the next boundary simply by adding an hour
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        cal.set(Calendar.MILLISECOND, 0);
       
        // reset the DST offset, since setting above fields to 0 for some reason alters the value
        cal.set(Calendar.DST_OFFSET, dstOffset);
      }
      else if (unit == TimeUnit.DAYS)
      {
        int dstOffset = cal.get(Calendar.DST_OFFSET);
       
        // zero these out so we can advance to the next boundary simply by adding a day
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        cal.set(Calendar.MILLISECOND, 0);
       
        // reset the DST offset, since setting above fields to 0 for some reason alters the value
        cal.set(Calendar.DST_OFFSET, dstOffset);
      }
      else
      {
        throw new RuntimeException("Unsupported time unit: " + unit);
      }
     
      return cal;
    }
  }
 
  public static class TheReducer extends Reducer<AvroKey<AttemptStatsKey>, AvroValue<AttemptStatsValue>, AvroWrapper<AttemptStatsKey>, AvroWrapper<AttemptStatsValue>>
  {
    private String clusterName;
   
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      System.out.println("Setting up reducer");
      Configuration conf = context.getConfiguration();
      clusterName = conf.get("cluster.name");
      System.out.println("Got cluster " + clusterName);
      super.setup(context);
    }
   
    @Override
    protected void reduce(AvroKey<AttemptStatsKey> key, Iterable<AvroValue<AttemptStatsValue>> values, final Context context) throws IOException, InterruptedException
    {
      AttemptStatsValue merged = new AttemptStatsValue();
           
      merged.setElapsedMinutes(0.0);
      merged.setStarted(0);
      merged.setFinished(0);
     
      for (AvroValue<AttemptStatsValue> valueWrapped : values)
      {
        AttemptStatsValue value = valueWrapped.datum();
        merged.setElapsedMinutes(value.getElapsedMinutes() + merged.getElapsedMinutes());
        merged.setStarted(value.getStarted() + merged.getStarted());
        merged.setFinished(value.getFinished() + merged.getFinished());
       
        if (value.getCpuMinutes() != null)
        {
          if (merged.getCpuMinutes() == null)
          {
            merged.setCpuMinutes(value.getCpuMinutes());
          }
          else
          {
            merged.setCpuMinutes(merged.getCpuMinutes() + value.getCpuMinutes());
          }
        }
       
        if (value.getSpilledRecords() != null)
        {
          if (merged.getSpilledRecords() == null)
          {
            merged.setSpilledRecords(value.getSpilledRecords());
          }
          else
          {
            merged.setSpilledRecords(merged.getSpilledRecords() + value.getSpilledRecords());
          }
        }
       
        if (value.getReduceShuffleBytes() != null)
        {
          if (merged.getReduceShuffleBytes() == null)
          {
            merged.setReduceShuffleBytes(value.getReduceShuffleBytes());
          }
          else
          {
            merged.setReduceShuffleBytes(merged.getReduceShuffleBytes() + value.getReduceShuffleBytes());
          }
        }
      }
     
      context.write(key, new AvroWrapper<AttemptStatsValue>(merged));
    }
  }
}
TOP

Related Classes of com.linkedin.whiteelephant.analysis.ComputeUsagePerHour$TheReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.