Package com.intel.hadoop.graphbuilder.idnormalize.mapreduce

Source Code of com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortEdgeMR

/* Copyright (C) 2012 Intel Corporation.
*     All rights reserved.
*          
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*       http://www.apache.org/licenses/LICENSE-2.0
*
*   Unless required by applicable law or agreed to in writing, software
*   distributed under the License is distributed on an "AS IS" BASIS,
*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*   See the License for the specific language governing permissions and
*   limitations under the License.
*
* For more about this software visit:
*      http://www.01.org/GraphBuilder
*/
package com.intel.hadoop.graphbuilder.idnormalize.mapreduce;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.log4j.Logger;

import com.intel.hadoop.graphbuilder.graph.Edge;
import com.intel.hadoop.graphbuilder.parser.FieldParser;
import com.intel.hadoop.graphbuilder.parser.GraphParser;

/**
* This class partition the edge list input by the hash of the source vertex.
*
*/
public class SortEdgeMR {
  private static final Logger LOG = Logger.getLogger(SortEdgeMR.class);

  /**
   * This mapper class maps each edge into (h(edge.source), edge). The hash
   * function depends on "numChunks" passed through the {@code JobConf}.
   *
   * @author Haijie Gu
   */
  public static class SortEdgeMapper extends MapReduceBase implements
      Mapper<LongWritable, Text, IntWritable, Text> {

    public void configure(JobConf conf) {
      super.configure(conf);
      numChunks = conf.getInt("numChunks", 256);
      try {
        this.graphparser = (GraphParser) Class.forName(conf.get("GraphParser"))
            .newInstance();
        this.vidparser = (FieldParser) Class.forName(conf.get("VidParser"))
            .newInstance();
        this.edataparser = (FieldParser) Class.forName(conf.get("EdataParser"))
            .newInstance();
      } catch (InstantiationException e) {
        e.printStackTrace();
      } catch (IllegalAccessException e) {
        e.printStackTrace();
      } catch (ClassNotFoundException e) {
        e.printStackTrace();
      }
    }

    @Override
    public void map(LongWritable key, Text val,
        OutputCollector<IntWritable, Text> out, Reporter arg3)
        throws IOException {
      Edge e = graphparser.parseEdge(val.toString(), vidparser, edataparser);
      int hash = e.source().hashCode() % numChunks;
      if (hash < 0)
        hash += numChunks;
      out.collect(new IntWritable(hash), val);
    }

    private int numChunks;
    private GraphParser graphparser;
    private FieldParser vidparser;
    private FieldParser edataparser;
  }

  /**
   * This reducer class takes the input (hashval, edge) from mapper and outputs
   * edge directly.
   *
   * @author Haijie Gu
   */
  public static class SortEdgeReducer extends MapReduceBase implements
      Reducer<IntWritable, Text, IntWritable, Text> {

    @Override
    public void reduce(IntWritable key, Iterator<Text> iter,
        OutputCollector<IntWritable, Text> out, Reporter reporter)
        throws IOException {
      while (iter.hasNext()) {
        out.collect(null, iter.next());
      }
    }
  }

  public SortEdgeMR(int numChunks, GraphParser graphparser,
      FieldParser vidparser, FieldParser edataparser) {
    this.numChunks = numChunks;
    this.graphparser = graphparser;
    this.vidparser = vidparser;
    this.edataparser = edataparser;
  }

  public void run(String inputpath, String outputpath) throws IOException {

    JobConf conf = new JobConf(SortEdgeMR.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SortEdgeMapper.class);
    conf.setReducerClass(SortEdgeReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setInt("numChunks", numChunks);
    conf.set("GraphParser", graphparser.getClass().getName());
    conf.set("VidParser", vidparser.getClass().getName());
    conf.set("EdataParser", edataparser.getClass().getName());

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    LOG.info("==== Job: Partition the input edges by hash(sourceid) =========");
    LOG.info("Input = " + inputpath);
    LOG.info("Output = " + outputpath);
    LOG.debug("numChunks = " + numChunks);
    LOG.debug("GraphParser = " + graphparser.getClass().getName());
    LOG.debug("VidParser = " + vidparser.getClass().getName());
    LOG.debug("EdataParser = " + edataparser.getClass().getName());
    LOG.info("===============================================================");

    JobClient.runJob(conf);
    LOG.info("=================== Done ====================================\n");
  }

  private int numChunks;
  private GraphParser graphparser;
  private FieldParser vidparser;
  private FieldParser edataparser;

}
TOP

Related Classes of com.intel.hadoop.graphbuilder.idnormalize.mapreduce.SortEdgeMR

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.