/* Copyright (C) 2012 Intel Corporation.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more about this software visit:
* http://www.01.org/GraphBuilder
*/
package com.intel.hadoop.graphbuilder.preprocess.mapreduce;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import com.intel.hadoop.graphbuilder.preprocess.functional.Functional;
import com.intel.hadoop.graphbuilder.preprocess.mapreduce.keyvalue.VertexEdgeUnionType;
import com.intel.hadoop.graphbuilder.util.Pair;
/**
* The Reducer class applies user defined {@code Functional}s to reduce
* duplicate edges and vertices. If no such {@code Functional} is provide, it
* outputs the first instance and discards the rest with the same identifier. It
* also discards self edges: v - > v. An option for discard bidirectional edge
* is provided by {@code cleanBidirectionalEdge(boolean)}.
* <p>
* Output directory structure:
* <ul>
* <li>$outputdir/edata contains edge data output</li>
* <li>$outputdir/vdata contains vertex data output</li>
* </ul>
* </p>
*
*/
public class CreateGraphReducer extends MapReduceBase implements
Reducer<IntWritable, VertexEdgeUnionType, Text, Text> {
public static enum CREATE_GRAPH_COUNTER {
NUM_VERTICES, NUM_EDGES
};
@Override
public void configure(JobConf job) {
super.configure(job);
this.valClass = job.getMapOutputValueClass();
this.noBidir = job.getBoolean("noBidir", false);
try {
if (job.get("EdgeFunc") != null) {
this.EdgeFunc = (Functional) Class.forName(job.get("EdgeFunc"))
.newInstance();
this.EdgeFunc.configure(job);
}
if (job.get("VertexFunc") != null) {
this.VertexFunc = (Functional) Class.forName(job.get("VertexFunc"))
.newInstance();
this.VertexFunc.configure(job);
}
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void reduce(IntWritable key, Iterator<VertexEdgeUnionType> iter,
OutputCollector<Text, Text> out, Reporter reporter) throws IOException {
VertexEdgeUnionType next;
HashMap<Pair<Object, Object>, Writable> edgeset = new HashMap();
HashMap<Object, Writable> vertexset = new HashMap();
while (iter.hasNext()) {
next = iter.next();
// Apply reduce on vertex
if (next.flag() == VertexEdgeUnionType.VERTEXVAL) {
Object vid = next.vertex().vid();
if (vertexset.containsKey(vid)) { // duplicate vertex
if (VertexFunc != null)
vertexset.put(vid,
VertexFunc.reduce(next.vertex().vdata(), vertexset.get(vid)));
} else {
if (VertexFunc != null)
vertexset.put(vid,
VertexFunc.reduce(next.vertex().vdata(), VertexFunc.base()));
else
vertexset.put(vid, next.vertex().vdata());
}
} else {
// Apply reduce on edges, remove self and (or merge) duplicate edges.
// Optionally remove bidirectional edge.
Pair p = new Pair(next.edge().source(), next.edge().target());
// self edge
if (p.getL().equals(p.getR()))
continue;
// duplicate edge
if (edgeset.containsKey(p)) {
if (EdgeFunc != null)
edgeset.put(p,
EdgeFunc.reduce(next.edge().EdgeData(), edgeset.get(p)));
} else {
if (EdgeFunc != null)
edgeset.put(p,
EdgeFunc.reduce(next.edge().EdgeData(), EdgeFunc.base()));
else
edgeset.put(p, next.edge().EdgeData());
}
}
}
int nverts = 0;
int nedges = 0;
// Output vertex records
Iterator<Entry<Object, Writable>> vertexiter = vertexset.entrySet()
.iterator();
while (vertexiter.hasNext()) {
Entry e = vertexiter.next();
out.collect(new Text("vdata"), new Text(e.getKey().toString() + "\t"
+ e.getValue().toString()));
nverts++;
}
reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_VERTICES, nverts);
// Output edge records
Iterator<Entry<Pair<Object, Object>, Writable>> edgeiter = edgeset
.entrySet().iterator();
while (edgeiter.hasNext()) {
Entry<Pair<Object, Object>, Writable> e = edgeiter.next();
if (noBidir && edgeset.containsKey(e.getKey().reverse())) {
continue;
} else {
out.collect(new Text("edata"), new Text(e.getKey().getL() + "\t"
+ e.getKey().getR() + "\t" + e.getValue().toString()));
}
nedges++;
}
reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_EDGES, nedges);
}
protected boolean noBidir;
protected Class keyClass;
protected Class valClass;
protected Functional EdgeFunc;
protected Functional VertexFunc;
}