Source Code of edu.berkeley.chukwa_xtrace.XtrIndex$MapClass

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.berkeley.chukwa_xtrace;


import java.io.IOException;
import java.util.*;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.chukwa.extraction.engine.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import edu.berkeley.xtrace.reporting.Report;


/**
 * Builds a start-end index for xtrace graphs.
 * 
 * Input is a sequence file, with task ID as byteswritable for key
 * and an ArrayWritable of Texts for value; one Report per Text.
 * 
 *  Map output is a bytesWritable for key [task ID]
 *  and a an ArrayWritable of Texts for value.
 *  Each text is the distribution for a single start/stop pair
 *
 * 
 *
 */
public class XtrIndex extends Configured implements Tool {
  
  /**
   * Hadoop docs say to do this if you pass an ArrayWritable to reduce.
   */
  public static class TextArrayWritable extends ArrayWritable {
      public TextArrayWritable() { super(Text.class); } 
    } 
 
  public static class MapClass extends Mapper<BytesWritable, ArrayWritable, BytesWritable, TextArrayWritable>  {
    
    @Override
    protected void map(BytesWritable key, ArrayWritable value, 
        Mapper<BytesWritable, ArrayWritable,BytesWritable, TextArrayWritable>.Context context)
        throws IOException, InterruptedException 
    {
      Map<String, Report> reports = new LinkedHashMap<String, Report>();
      
      Writable[] repts = value.get();
      if(repts.length == 0 || !(repts[0] instanceof Text)) {
        System.out.println("error: bad input.");
        return; //bail out more drastically
      }
      Text[] repts_as_text = (Text[]) repts;
      for(Text t: repts_as_text) {
        Report r = Report.createFromString(t.toString());
        reports.put(r.getMetadata().getOpIdString(), r);
      }
      
      Text[] indexed = indexGraph(reports);
      TextArrayWritable output = new TextArrayWritable();
      output.set(indexed);
      
      context.write(key, output);
    }


  }
  
  
  
  /** 
   * Indexes a set of reports, using Start and End tags
   * output is a list of entries of the form:
   *   A: time1,time2,time3
   *   
   *   If no matches, will return an empty array
   */
  @SuppressWarnings("unchecked")
  public static Text[] indexGraph(Map<String, Report> reports) {
    org.apache.commons.collections.MultiMap index = new 
    org.apache.commons.collections.MultiHashMap();
      //map from start tag to opIds of nodes containing the ends
    
    
    for(Map.Entry<String, Report> report: reports.entrySet()) {
      Report start = report.getValue();
      List<String> starts = start.get("Start");
      if(starts != null) {
        for(String s: starts) {
          Report end = findMatchingEnd(reports, start, s);
          if(end == null)
            continue;
          List<String> endTL = end.get("Timestamp");
          List<String> staTL = start.get("Timestamp");
          if(staTL != null && endTL != null && staTL.size() > 0 && endTL.size() > 0) {
            
            //FIXME: perhaps parse more cleverly?
            double startT = Double.parseDouble(staTL.get(0));
            double endT = Double.parseDouble(endTL.get(0));
            
            Long diff = new Long( (long) (1000 * (endT - startT)));
            index.put(s, diff);
          }
        }
      }
    }
    
    Text[] out = new Text[index.size()];
    int i = 0;
    for(Object k: index.keySet()) {
      StringBuilder sb = new StringBuilder();
      sb.append(k.toString());
      sb.append(' ');
      Collection coll = (Collection) index.get(k);
      for(Object v: coll) {
        assert v instanceof Long: "how did a non-Long get into my collection?";
        sb.append(v.toString());
        sb.append(",");
      }
      sb.deleteCharAt(sb.length() -1);
      Text t = new Text(sb.toString());
      out[i++] = t;
    }
    
    return out;
  }
  
  //do a BFS find closest report to start with endTag
  static Report findMatchingEnd(Map<String, Report> reports,
      Report start, String endTag) {
    
    LinkedList<Report> bfsQ = new LinkedList<Report>();
    Set<String> seen = new HashSet<String>();
    bfsQ.add(start);
    
    while(!bfsQ.isEmpty()) {
      Report cur = bfsQ.poll();
      List<String> ends = cur.get("End");
      if(ends != null && ends.contains(endTag))
        return cur;


      List<String> outlinks = start.get(XtrExtract.OUTLINK_FIELD);
      if(outlinks == null)
        return null;
      for(String s: outlinks) {
        if(seen.contains(s))
          continue;
        else
          seen.add(s);
        Report r = reports.get(s);
        if(r != null)
          bfsQ.add(r);
      }
    }
    return null;
  }


  @Override
  public int run(String[] arg) throws Exception {
    Job extractor = new Job(getConf());
    extractor.setMapperClass(MapClass.class);
    //no reduce, just identity


    extractor.setJobName("x-trace indexer");
    extractor.setJarByClass(this.getClass());
    
    extractor.setMapOutputKeyClass(BytesWritable.class);
    extractor.setMapOutputValueClass(TextArrayWritable.class);
    
    extractor.setOutputKeyClass(BytesWritable.class);
    extractor.setOutputValueClass(TextArrayWritable.class);
    
    extractor.setInputFormatClass(SequenceFileInputFormat.class);
    extractor.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(extractor, new Path(arg[0]));
    FileOutputFormat.setOutputPath(extractor, new Path(arg[1]));
    System.out.println("looks OK.  Submitting.");
    extractor.submit();
//    extractor.waitForCompletion(false);
    return 0;


  }


  public static void main(String[] args) throws Exception {
    int res = ToolRunner.run(new Configuration(),
        new XtrExtract(), args);
    System.exit(res);
  }


}
Source Code of edu.berkeley.chukwa_xtrace.XtrIndex$MapClass

Related Classes of edu.berkeley.chukwa_xtrace.XtrIndex$MapClass