Package org.apache.hadoop.chukwa.extraction.demux.processor.mapper

Source Code of org.apache.hadoop.chukwa.extraction.demux.processor.mapper.JobLogHistoryProcessor

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;


import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.chukwa.extraction.engine.Record;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;

public class JobLogHistoryProcessor extends AbstractProcessor {
  static Logger log = Logger.getLogger(JobLogHistoryProcessor.class);

  private static final String recordType = "JobLogHistory";
  private static String internalRegex = null;
  private static Pattern ip = null;

  private Matcher internalMatcher = null;

  public JobLogHistoryProcessor() {
    internalRegex = "(.*?)=\"(.*?)\"(.*)([\\n])?";
    ip = Pattern.compile(internalRegex);
    internalMatcher = ip.matcher("-");
  }

  @Override
  protected void parse(String recordEntry,
      OutputCollector<ChukwaRecordKey, ChukwaRecord> output, Reporter reporter)
      throws Throwable {

    // log.info("JobLogHistoryProcessor record: [" + recordEntry + "] type["
    // + chunk.getDataType() + "]");

    try {

      HashMap<String, String> keys = new HashMap<String, String>();
      ChukwaRecord record = null;

      int firstSep = recordEntry.indexOf(" ");
      keys.put("RECORD_TYPE", recordEntry.substring(0, firstSep));
      // log.info("JobLogHistoryProcessor Add field: [RECORD_TYPE]["
      // + keys.get("RECORD_TYPE") + "]");

      String body = recordEntry.substring(firstSep);

      internalMatcher.reset(body);

      // String fieldName = null;
      // String fieldValue = null;

      while (internalMatcher.matches()) {

        keys.put(internalMatcher.group(1).trim(), internalMatcher.group(2)
            .trim());

        // TODO Remove debug info before production
        // fieldName = internalMatcher.group(1).trim();
        // fieldValue = internalMatcher.group(2).trim();
        // log.info("JobLogHistoryProcessor Add field: [" + fieldName +
        // "][" + fieldValue +"]" );
        // log.info("EOL : [" + internalMatcher.group(3) + "]" );
        internalMatcher.reset(internalMatcher.group(3));
      }

      if (!keys.containsKey("JOBID")) {
        // Extract JobID from taskID
        // JOBID = "job_200804210403_0005"
        // TASKID = "tip_200804210403_0005_m_000018"
        String jobId = keys.get("TASKID");
        int idx1 = jobId.indexOf('_', 0);
        int idx2 = jobId.indexOf('_', idx1 + 1);
        idx2 = jobId.indexOf('_', idx2 + 1);
        keys.put("JOBID", jobId.substring(idx1 + 1, idx2));
        // log.info("JobLogHistoryProcessor Add field: [JOBID]["
        // + keys.get("JOBID") + "]");
      } else {
        String jobId = keys.get("JOBID").replace("_", "").substring(3);
        keys.put("JOBID", jobId);
      }
      // if (keys.get("RECORD_TYPE").equalsIgnoreCase("Job") &&
      // keys.containsKey("SUBMIT_TIME"))
      // {
      // // Job JOBID="job_200804210403_0005" JOBNAME="MY_JOB"
      // USER="userxxx"
      // // SUBMIT_TIME="1208760436751"
      // JOBCONF="/mapredsystem/xxx.yyy.com/job_200804210403_0005/job.xml"
      //         
      //         
      // }
      // else if (keys.get("RECORD_TYPE").equalsIgnoreCase("Job") &&
      // keys.containsKey("LAUNCH_TIME"))
      // {
      // // Job JOBID="job_200804210403_0005" LAUNCH_TIME="1208760437110"
      // TOTAL_MAPS="5912" TOTAL_REDUCES="739"
      //         
      // }
      // else if (keys.get("RECORD_TYPE").equalsIgnoreCase("Job") &&
      // keys.containsKey("FINISH_TIME"))
      // {
      // // Job JOBID="job_200804210403_0005" FINISH_TIME="1208760906816"
      // JOB_STATUS="SUCCESS" FINISHED_MAPS="5912" FINISHED_REDUCES="739"
      // FAILED_MAPS="0" FAILED_REDUCES="0"
      // // COUNTERS="File Systems.Local bytes read:1735053407244,File
      // Systems.Local bytes written:2610106384012,File Systems.HDFS bytes
      // read:801605644910,File Systems.HDFS bytes written:44135800,
      // // Job Counters .Launched map tasks:5912,Job Counters .Launched
      // reduce tasks:739,Job Counters .Data-local map tasks:5573,Job
      // Counters .Rack-local map tasks:316,Map-Reduce Framework.
      // // Map input records:9410696067,Map-Reduce Framework.Map output
      // records:9410696067,Map-Reduce Framework.Map input
      // bytes:801599188816,Map-Reduce Framework.Map output
      // bytes:784427968116,
      // // Map-Reduce Framework.Combine input records:0,Map-Reduce
      // Framework.Combine output records:0,Map-Reduce Framework.Reduce
      // input groups:477265,Map-Reduce Framework.Reduce input
      // records:739000,
      // // Map-Reduce Framework.Reduce output records:739000"
      //         
      // }
      // else
      if (keys.get("RECORD_TYPE").equalsIgnoreCase("MapAttempt")
          && keys.containsKey("START_TIME")) {
        // MapAttempt TASK_TYPE="MAP"
        // TASKID="tip_200804210403_0005_m_000018"
        // TASK_ATTEMPT_ID="task_200804210403_0005_m_000018_0"
        // START_TIME="1208760437531"
        // HOSTNAME="tracker_xxx.yyy.com:xxx.yyy.com/xxx.xxx.xxx.xxx:53734"

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/Map/" + keys.get("JOBID") + "/"
            + keys.get("START_TIME"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("START_TIME")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("START_TIME", keys.get("START_TIME"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/Map/S");
        output.collect(key, record);

      } else if (keys.get("RECORD_TYPE").equalsIgnoreCase("MapAttempt")
          && keys.containsKey("FINISH_TIME")) {
        // MapAttempt TASK_TYPE="MAP"
        // TASKID="tip_200804210403_0005_m_005494"
        // TASK_ATTEMPT_ID="task_200804210403_0005_m_005494_0"
        // TASK_STATUS="SUCCESS"
        // FINISH_TIME="1208760624124"
        // HOSTNAME="tracker_xxxx.yyyy.com:xxx.yyy.com/xxx.xxx.xxx.xxx:55491"

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/Map/" + keys.get("JOBID") + "/"
            + keys.get("FINISH_TIME"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("FINISH_TIME")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("FINISH_TIME", keys.get("FINISH_TIME"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/Map/E");
        output.collect(key, record);
      }

      else if (keys.get("RECORD_TYPE").equalsIgnoreCase("ReduceAttempt")
          && keys.containsKey("START_TIME")) {
        // ReduceAttempt TASK_TYPE="REDUCE"
        // TASKID="tip_200804210403_0005_r_000138"
        // TASK_ATTEMPT_ID="task_200804210403_0005_r_000138_0"
        // START_TIME="1208760454885"
        // HOSTNAME="tracker_xxxx.yyyy.com:xxx.yyy.com/xxx.xxx.xxx.xxx:51947"

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/SHUFFLE/" + keys.get("JOBID") + "/"
            + keys.get("START_TIME"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("START_TIME")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("START_TIME", keys.get("START_TIME"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/SHUFFLE/S");
        output.collect(key, record);

      } else if (keys.get("RECORD_TYPE").equalsIgnoreCase("ReduceAttempt")
          && keys.containsKey("FINISH_TIME")) {
        // ReduceAttempt TASK_TYPE="REDUCE"
        // TASKID="tip_200804210403_0005_r_000138"
        // TASK_ATTEMPT_ID="task_200804210403_0005_r_000138_0"
        // TASK_STATUS="SUCCESS" SHUFFLE_FINISHED="1208760787167"
        // SORT_FINISHED="1208760787354" FINISH_TIME="1208760802395"
        // HOSTNAME="tracker__xxxx.yyyy.com:xxx.yyy.com/xxx.xxx.xxx.xxx:51947"

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/SHUFFLE/" + keys.get("JOBID") + "/"
            + keys.get("SHUFFLE_FINISHED"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("SHUFFLE_FINISHED")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("SHUFFLE_FINISHED", keys.get("SHUFFLE_FINISHED"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/SHUFFLE/E");
        output.collect(key, record);

        // SORT
        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/SORT/" + keys.get("JOBID") + "/"
            + keys.get("SHUFFLE_FINISHED"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("SHUFFLE_FINISHED")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("START_TIME", keys.get("SHUFFLE_FINISHED"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/SORT/S");
        output.collect(key, record);

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/SORT/" + keys.get("JOBID") + "/"
            + keys.get("SORT_FINISHED"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("SORT_FINISHED")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("SORT_FINISHED", keys.get("SORT_FINISHED"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/SORT/E");
        output.collect(key, record);

        // Reduce
        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/REDUCE/" + keys.get("JOBID") + "/"
            + keys.get("SORT_FINISHED"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("SORT_FINISHED")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("START_TIME", keys.get("SORT_FINISHED"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/REDUCE/S");
        output.collect(key, record);

        key = new ChukwaRecordKey();
        key.setKey("JobLogHist/REDUCE/" + keys.get("JOBID") + "/"
            + keys.get("FINISH_TIME"));
        key.setReduceType("JobLogHistoryReduceProcessor");
        record = new ChukwaRecord();
        record.setTime(Long.parseLong(keys.get("SORT_FINISHED")));
        record.add("JOBID", keys.get("JOBID"));
        record.add("FINISH_TIME", keys.get("SORT_FINISHED"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("JobLogHist/REDUCE/E");
        output.collect(key, record);

      } else if (keys.get("RECORD_TYPE").equalsIgnoreCase("Job")) {
        // 1
        // Job JOBID="job_200809062051_0001" JOBNAME="wordcount" USER="xxx"
        // SUBMIT_TIME="1208760906812"
        // JOBCONF=
        // "/user/xxx/mapredsystem/563976.yyy.zzz.com/job_200809062051_0001/job.xml"

        // 2
        // Job JOBID="job_200809062051_0001" LAUNCH_TIME="1208760906816"
        // TOTAL_MAPS="3" TOTAL_REDUCES="7"

        // 3
        // Job JOBID="job_200804210403_0005" FINISH_TIME="1208760906826"
        // JOB_STATUS="SUCCESS" FINISHED_MAPS="5912"
        // FINISHED_REDUCES="739" FAILED_MAPS="0" FAILED_REDUCES="0"
        // COUNTERS="File Systems.Local bytes read:1735053407244,File
        // Systems.Local bytes written:2610106384012,File Systems.HDFS
        // bytes read:801605644910,File Systems.HDFS bytes
        // written:44135800,
        // Job Counters .Launched map tasks:5912,Job Counters .Launched
        // reduce tasks:739,Job Counters .Data-local map tasks:5573,Job
        // Counters .Rack-local map tasks:316,Map-Reduce Framework.
        // Map input records:9410696067,Map-Reduce Framework.Map output
        // records:9410696067,Map-Reduce Framework.Map input
        // bytes:801599188816,Map-Reduce Framework.Map output
        // bytes:784427968116,
        // Map-Reduce Framework.Combine input records:0,Map-Reduce
        // Framework.Combine output records:0,Map-Reduce
        // Framework.Reduce input groups:477265,Map-Reduce
        // Framework.Reduce input records:739000,
        // Map-Reduce Framework.Reduce output records:739000"

        record = new ChukwaRecord();
        key = new ChukwaRecordKey();
        buildGenericRecord(record, null, Long
            .parseLong(keys.get("FINISH_TIME")), "MRJob");
        if (keys.containsKey("COUNTERS")) {
          extractCounters(record, keys.get("COUNTERS"));
        }

        key = new ChukwaRecordKey();
        key.setKey("MRJob/" + keys.get("JOBID"));
        key.setReduceType("MRJobReduceProcessor");

        record = new ChukwaRecord();
        record.add(Record.tagsField, chunk.getTags());
        if (keys.containsKey("SUBMIT_TIME")) {
          record.setTime(Long.parseLong(keys.get("SUBMIT_TIME")));
        } else if (keys.containsKey("LAUNCH_TIME")) {
          record.setTime(Long.parseLong(keys.get("LAUNCH_TIME")));
        } else if (keys.containsKey("FINISH_TIME")) {
          record.setTime(Long.parseLong(keys.get("FINISH_TIME")));
        }

        Iterator<String> it = keys.keySet().iterator();
        while (it.hasNext()) {
          String field = it.next();
          record.add(field, keys.get(field));
        }

        output.collect(key, record);
      }

      if (keys.containsKey("TASK_TYPE")
          && keys.containsKey("COUNTERS")
          && (keys.get("TASK_TYPE").equalsIgnoreCase("REDUCE") || keys.get(
              "TASK_TYPE").equalsIgnoreCase("MAP"))) {
        // MAP
        // Task TASKID="tip_200804210403_0005_m_000154" TASK_TYPE="MAP"
        // TASK_STATUS="SUCCESS" FINISH_TIME="1208760463883"
        // COUNTERS="File Systems.Local bytes read:159265655,File
        // Systems.Local bytes written:318531310,
        // File Systems.HDFS bytes read:145882417,Map-Reduce
        // Framework.Map input records:1706604,
        // Map-Reduce Framework.Map output records:1706604,Map-Reduce
        // Framework.Map input bytes:145882057,
        // Map-Reduce Framework.Map output bytes:142763253,Map-Reduce
        // Framework.Combine input records:0,Map-Reduce
        // Framework.Combine output records:0"

        // REDUCE
        // Task TASKID="tip_200804210403_0005_r_000524"
        // TASK_TYPE="REDUCE" TASK_STATUS="SUCCESS"
        // FINISH_TIME="1208760877072"
        // COUNTERS="File Systems.Local bytes read:1179319677,File
        // Systems.Local bytes written:1184474889,File Systems.HDFS
        // bytes written:59021,
        // Map-Reduce Framework.Reduce input groups:684,Map-Reduce
        // Framework.Reduce input records:1000,Map-Reduce
        // Framework.Reduce output records:1000"

        record = new ChukwaRecord();
        key = new ChukwaRecordKey();
        buildGenericRecord(record, null, Long
            .parseLong(keys.get("FINISH_TIME")), "SizeVsFinishTime");
        extractCounters(record, keys.get("COUNTERS"));
        record.add("JOBID", keys.get("JOBID"));
        record.add("TASKID", keys.get("TASKID"));
        record.add("TASK_TYPE", keys.get("TASK_TYPE"));
        record.add(Record.tagsField, chunk.getTags());
        // log.info("MR_Graph +1");
        output.collect(key, record);

      }
    } catch (IOException e) {
      log.warn("Unable to collect output in JobLogHistoryProcessor ["
          + recordEntry + "]", e);
      e.printStackTrace();
      throw e;
    }

  }

  protected void extractCounters(ChukwaRecord record, String input) {

    String[] data = null;
    String[] counters = input.split(",");

    for (String counter : counters) {
      data = counter.split(":");
      record.add(data[0].replaceAll(" ", "_").replaceAll("\\.", "_")
          .toUpperCase(), data[1]);
    }
  }

  public String getDataType() {
    return JobLogHistoryProcessor.recordType;
  }
}
TOP

Related Classes of org.apache.hadoop.chukwa.extraction.demux.processor.mapper.JobLogHistoryProcessor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.