/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.recordJobs.graph;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.functions.MapFunction;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.test.recordJobs.graph.triangleEnumUtil.EdgeInputFormat;
import eu.stratosphere.test.recordJobs.graph.triangleEnumUtil.EdgeWithDegreesOutputFormat;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;
public class ComputeEdgeDegrees implements Program, ProgramDescription {
private static final long serialVersionUID = 1L;
// --------------------------------------------------------------------------------------------
// Vertex Degree Computation
// --------------------------------------------------------------------------------------------
/**
* Duplicates each edge such that: (u,v) becomes (u,v),(v,u)
*/
public static final class ProjectEdge extends MapFunction implements Serializable {
private static final long serialVersionUID = 1L;
private final Record copy = new Record();
@Override
public void map(Record record, Collector<Record> out) throws Exception {
this.copy.setField(0, record.getField(1, IntValue.class));
this.copy.setField(1, record.getField(0, IntValue.class));
out.collect(this.copy);
out.collect(record);
}
}
/**
* Creates for all records in the group a record of the form (v1, v2, c1, c2), where
* v1 is the lexicographically smaller vertex id and the count for the vertex that
* was the key contains the number of edges associated with it. The other count is zero.
* This reducer also eliminates duplicate edges.
*/
public static final class CountEdges extends ReduceFunction implements Serializable {
private static final long serialVersionUID = 1L;
private final Record result = new Record();
private final IntValue firstVertex = new IntValue();
private final IntValue secondVertex = new IntValue();
private final IntValue firstCount = new IntValue();
private final IntValue secondCount = new IntValue();
private int[] vals = new int[1024];
@Override
public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception {
int[] vals = this.vals;
int len = 0;
int key = -1;
// collect all values
while (records.hasNext()) {
final Record rec = records.next();
final int id = rec.getField(1, IntValue.class).getValue();
if (key == -1) {
key = rec.getField(0, IntValue.class).getValue();
}
if (len >= vals.length) {
vals = new int[vals.length * 2];
System.arraycopy(this.vals, 0, vals, 0, this.vals.length);
this.vals = vals;
}
vals[len++] = id;
}
// sort the values to and uniquify them
Arrays.sort(vals, 0, len);
int k = 0;
for (int curr = -1, i = 0; i < len; i++) {
int val = vals[i];
if (val != curr) {
curr = val;
vals[k] = vals[i];
k++;
}
else {
vals[k] = vals[i];
}
}
len = k;
// create such that the vertex with the lower id is always the first
// both vertices contain a count, which is zero for the non-key vertices
for (int i = 0; i < len; i++) {
final int e2 = vals[i];
if (key <= e2) {
firstVertex.setValue(key);
secondVertex.setValue(e2);
firstCount.setValue(len);
secondCount.setValue(0);
} else {
firstVertex.setValue(e2);
secondVertex.setValue(key);
firstCount.setValue(0);
secondCount.setValue(len);
}
this.result.setField(0, firstVertex);
this.result.setField(1, secondVertex);
this.result.setField(2, firstCount);
this.result.setField(3, secondCount);
out.collect(result);
}
}
}
/**
* Takes the two separate edge entries (v1, v2, c1, 0) and (v1, v2, 0, c2)
* and creates an entry (v1, v2, c1, c2).
*/
public static final class JoinCountsAndUniquify extends ReduceFunction implements Serializable {
private static final long serialVersionUID = 1L;
private final IntValue count1 = new IntValue();
private final IntValue count2 = new IntValue();
@Override
public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception {
Record rec = null;
int c1 = 0, c2 = 0;
int numValues = 0;
while (records.hasNext()) {
rec = records.next();
final int f1 = rec.getField(2, IntValue.class).getValue();
final int f2 = rec.getField(3, IntValue.class).getValue();
c1 += f1;
c2 += f2;
numValues++;
}
if (numValues != 2 || c1 == 0 || c2 == 0) {
throw new RuntimeException("JoinCountsAndUniquify Problem: key1=" +
rec.getField(0, IntValue.class).getValue() + ", key2=" +
rec.getField(1, IntValue.class).getValue() +
"values=" + numValues + ", c1=" + c1 + ", c2=" + c2);
}
count1.setValue(c1);
count2.setValue(c2);
rec.setField(2, count1);
rec.setField(3, count2);
out.collect(rec);
}
}
// --------------------------------------------------------------------------------------------
// Triangle Enumeration
// --------------------------------------------------------------------------------------------
/**
* Assembles the Plan of the triangle enumeration example Pact program.
*/
@Override
public Plan getPlan(String... args) {
// parse job parameters
final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
final String edgeInput = args.length > 1 ? args[1] : "";
final String output = args.length > 2 ? args[2] : "";
final char delimiter = args.length > 3 ? (char) Integer.parseInt(args[3]) : ',';
FileDataSource edges = new FileDataSource(new EdgeInputFormat(), edgeInput, "Input Edges");
edges.setParameter(EdgeInputFormat.ID_DELIMITER_CHAR, delimiter);
MapOperator projectEdge = MapOperator.builder(new ProjectEdge())
.input(edges).name("Project Edge").build();
ReduceOperator edgeCounter = ReduceOperator.builder(new CountEdges(), IntValue.class, 0)
.input(projectEdge).name("Count Edges for Vertex").build();
ReduceOperator countJoiner = ReduceOperator.builder(new JoinCountsAndUniquify())
.keyField(IntValue.class, 0)
.keyField(IntValue.class, 1)
.input(edgeCounter)
.name("Join Counts")
.build();
FileDataSink triangles = new FileDataSink(new EdgeWithDegreesOutputFormat(), output, countJoiner, "Unique Edges With Degrees");
Plan p = new Plan(triangles, "Normalize Edges and compute Vertex Degrees");
p.setDefaultParallelism(numSubTasks);
return p;
}
@Override
public String getDescription() {
return "Parameters: [noSubStasks] [input file] [output file] [vertex delimiter]";
}
}