Package com.intel.hadoop.graphbuilder.preprocess.mapreduce

Source Code of com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphReducer

/* Copyright (C) 2012 Intel Corporation.
*     All rights reserved.
*          
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*       http://www.apache.org/licenses/LICENSE-2.0
*
*   Unless required by applicable law or agreed to in writing, software
*   distributed under the License is distributed on an "AS IS" BASIS,
*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*   See the License for the specific language governing permissions and
*   limitations under the License.
*
* For more about this software visit:
*      http://www.01.org/GraphBuilder
*/
package com.intel.hadoop.graphbuilder.preprocess.mapreduce;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import com.intel.hadoop.graphbuilder.preprocess.functional.Functional;
import com.intel.hadoop.graphbuilder.preprocess.mapreduce.keyvalue.VertexEdgeUnionType;
import com.intel.hadoop.graphbuilder.util.Pair;

/**
* The Reducer class applies user defined {@code Functional}s to reduce
* duplicate edges and vertices. If no such {@code Functional} is provide, it
* outputs the first instance and discards the rest with the same identifier. It
* also discards self edges: v - > v. An option for discard bidirectional edge
* is provided by {@code cleanBidirectionalEdge(boolean)}.
* <p>
* Output directory structure:
* <ul>
* <li>$outputdir/edata contains edge data output</li>
* <li>$outputdir/vdata contains vertex data output</li>
* </ul>
* </p>
*
*/
public class CreateGraphReducer extends MapReduceBase implements
    Reducer<IntWritable, VertexEdgeUnionType, Text, Text> {
  public static enum CREATE_GRAPH_COUNTER {
    NUM_VERTICES, NUM_EDGES
  };

  @Override
  public void configure(JobConf job) {
    super.configure(job);
    this.valClass = job.getMapOutputValueClass();
    this.noBidir = job.getBoolean("noBidir", false);
    try {
      if (job.get("EdgeFunc") != null) {
        this.EdgeFunc = (Functional) Class.forName(job.get("EdgeFunc"))
            .newInstance();
        this.EdgeFunc.configure(job);
      }
      if (job.get("VertexFunc") != null) {
        this.VertexFunc = (Functional) Class.forName(job.get("VertexFunc"))
            .newInstance();
        this.VertexFunc.configure(job);
      }
    } catch (InstantiationException e) {
      e.printStackTrace();
    } catch (IllegalAccessException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  @Override
  public void reduce(IntWritable key, Iterator<VertexEdgeUnionType> iter,
      OutputCollector<Text, Text> out, Reporter reporter) throws IOException {

    VertexEdgeUnionType next;
    HashMap<Pair<Object, Object>, Writable> edgeset = new HashMap();
    HashMap<Object, Writable> vertexset = new HashMap();

    while (iter.hasNext()) {
      next = iter.next();
      // Apply reduce on vertex
      if (next.flag() == VertexEdgeUnionType.VERTEXVAL) {
        Object vid = next.vertex().vid();
        if (vertexset.containsKey(vid)) { // duplicate vertex
          if (VertexFunc != null)
            vertexset.put(vid,
                VertexFunc.reduce(next.vertex().vdata(), vertexset.get(vid)));
        } else {
          if (VertexFunc != null)
            vertexset.put(vid,
                VertexFunc.reduce(next.vertex().vdata(), VertexFunc.base()));
          else
            vertexset.put(vid, next.vertex().vdata());
        }
      } else {
        // Apply reduce on edges, remove self and (or merge) duplicate edges.
        // Optionally remove bidirectional edge.
        Pair p = new Pair(next.edge().source(), next.edge().target());

        // self edge
        if (p.getL().equals(p.getR()))
          continue;

        // duplicate edge
        if (edgeset.containsKey(p)) {
          if (EdgeFunc != null)
            edgeset.put(p,
                EdgeFunc.reduce(next.edge().EdgeData(), edgeset.get(p)));
        } else {
          if (EdgeFunc != null)
            edgeset.put(p,
                EdgeFunc.reduce(next.edge().EdgeData(), EdgeFunc.base()));
          else
            edgeset.put(p, next.edge().EdgeData());
        }
      }
    }

    int nverts = 0;
    int nedges = 0;

    // Output vertex records
    Iterator<Entry<Object, Writable>> vertexiter = vertexset.entrySet()
        .iterator();
    while (vertexiter.hasNext()) {
      Entry e = vertexiter.next();
      out.collect(new Text("vdata"), new Text(e.getKey().toString() + "\t"
          + e.getValue().toString()));
      nverts++;
    }
    reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_VERTICES, nverts);

    // Output edge records
    Iterator<Entry<Pair<Object, Object>, Writable>> edgeiter = edgeset
        .entrySet().iterator();
    while (edgeiter.hasNext()) {
      Entry<Pair<Object, Object>, Writable> e = edgeiter.next();
      if (noBidir && edgeset.containsKey(e.getKey().reverse())) {
        continue;
      } else {
        out.collect(new Text("edata"), new Text(e.getKey().getL() + "\t"
            + e.getKey().getR() + "\t" + e.getValue().toString()));
      }
      nedges++;
    }
    reporter.incrCounter(CREATE_GRAPH_COUNTER.NUM_EDGES, nedges);
  }

  protected boolean noBidir;
  protected Class keyClass;
  protected Class valClass;
  protected Functional EdgeFunc;
  protected Functional VertexFunc;
}
TOP

Related Classes of com.intel.hadoop.graphbuilder.preprocess.mapreduce.CreateGraphReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.