/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.berkeley.chukwa_xtrace;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.chukwa.extraction.engine.Record;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import edu.berkeley.xtrace.reporting.Report;
/**
* Builds a start-end index for xtrace graphs.
*
* Input is a sequence file, with task ID as byteswritable for key
* and an ArrayWritable of Texts for value; one Report per Text.
*
* Map output is a bytesWritable for key [task ID]
* and a an ArrayWritable of Texts for value.
* Each text is the distribution for a single start/stop pair
*
*
*
*/
public class XtrIndex extends Configured implements Tool {
/**
* Hadoop docs say to do this if you pass an ArrayWritable to reduce.
*/
public static class TextArrayWritable extends ArrayWritable {
public TextArrayWritable() { super(Text.class); }
}
public static class MapClass extends Mapper<BytesWritable, ArrayWritable, BytesWritable, TextArrayWritable> {
@Override
protected void map(BytesWritable key, ArrayWritable value,
Mapper<BytesWritable, ArrayWritable,BytesWritable, TextArrayWritable>.Context context)
throws IOException, InterruptedException
{
Map<String, Report> reports = new LinkedHashMap<String, Report>();
Writable[] repts = value.get();
if(repts.length == 0 || !(repts[0] instanceof Text)) {
System.out.println("error: bad input.");
return; //bail out more drastically
}
Text[] repts_as_text = (Text[]) repts;
for(Text t: repts_as_text) {
Report r = Report.createFromString(t.toString());
reports.put(r.getMetadata().getOpIdString(), r);
}
Text[] indexed = indexGraph(reports);
TextArrayWritable output = new TextArrayWritable();
output.set(indexed);
context.write(key, output);
}
}
/**
* Indexes a set of reports, using Start and End tags
* output is a list of entries of the form:
* A: time1,time2,time3
*
* If no matches, will return an empty array
*/
@SuppressWarnings("unchecked")
public static Text[] indexGraph(Map<String, Report> reports) {
org.apache.commons.collections.MultiMap index = new
org.apache.commons.collections.MultiHashMap();
//map from start tag to opIds of nodes containing the ends
for(Map.Entry<String, Report> report: reports.entrySet()) {
Report start = report.getValue();
List<String> starts = start.get("Start");
if(starts != null) {
for(String s: starts) {
Report end = findMatchingEnd(reports, start, s);
if(end == null)
continue;
List<String> endTL = end.get("Timestamp");
List<String> staTL = start.get("Timestamp");
if(staTL != null && endTL != null && staTL.size() > 0 && endTL.size() > 0) {
//FIXME: perhaps parse more cleverly?
double startT = Double.parseDouble(staTL.get(0));
double endT = Double.parseDouble(endTL.get(0));
Long diff = new Long( (long) (1000 * (endT - startT)));
index.put(s, diff);
}
}
}
}
Text[] out = new Text[index.size()];
int i = 0;
for(Object k: index.keySet()) {
StringBuilder sb = new StringBuilder();
sb.append(k.toString());
sb.append(' ');
Collection coll = (Collection) index.get(k);
for(Object v: coll) {
assert v instanceof Long: "how did a non-Long get into my collection?";
sb.append(v.toString());
sb.append(",");
}
sb.deleteCharAt(sb.length() -1);
Text t = new Text(sb.toString());
out[i++] = t;
}
return out;
}
//do a BFS find closest report to start with endTag
static Report findMatchingEnd(Map<String, Report> reports,
Report start, String endTag) {
LinkedList<Report> bfsQ = new LinkedList<Report>();
Set<String> seen = new HashSet<String>();
bfsQ.add(start);
while(!bfsQ.isEmpty()) {
Report cur = bfsQ.poll();
List<String> ends = cur.get("End");
if(ends != null && ends.contains(endTag))
return cur;
List<String> outlinks = start.get(XtrExtract.OUTLINK_FIELD);
if(outlinks == null)
return null;
for(String s: outlinks) {
if(seen.contains(s))
continue;
else
seen.add(s);
Report r = reports.get(s);
if(r != null)
bfsQ.add(r);
}
}
return null;
}
@Override
public int run(String[] arg) throws Exception {
Job extractor = new Job(getConf());
extractor.setMapperClass(MapClass.class);
//no reduce, just identity
extractor.setJobName("x-trace indexer");
extractor.setJarByClass(this.getClass());
extractor.setMapOutputKeyClass(BytesWritable.class);
extractor.setMapOutputValueClass(TextArrayWritable.class);
extractor.setOutputKeyClass(BytesWritable.class);
extractor.setOutputValueClass(TextArrayWritable.class);
extractor.setInputFormatClass(SequenceFileInputFormat.class);
extractor.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(extractor, new Path(arg[0]));
FileOutputFormat.setOutputPath(extractor, new Path(arg[1]));
System.out.println("looks OK. Submitting.");
extractor.submit();
// extractor.waitForCompletion(false);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(),
new XtrExtract(), args);
System.exit(res);
}
}