Package org.apache.giraph.io.formats

Source Code of org.apache.giraph.io.formats.WattsStrogatzVertexInputFormat

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.io.formats;

import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;

import java.io.IOException;
import java.util.List;
import java.util.Random;

import org.apache.giraph.bsp.BspInputSplit;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.OutEdges;
import org.apache.giraph.edge.ReusableEdge;
import org.apache.giraph.graph.Vertex;
import org.apache.giraph.io.VertexInputFormat;
import org.apache.giraph.io.VertexReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/**
* Generates a random Watts-Strogatz graph by re-wiring a ring lattice.
* The resulting graph is a random graph with high clustering coefficient
* and low average path length. The graph has these two characteristics that
* are typical of small-world scale-free graphs, however the degree
* distribution is more similar to a random graph.
* It supports a seed for pseudo-random generation.
*/
public class WattsStrogatzVertexInputFormat extends
  VertexInputFormat<LongWritable, DoubleWritable, DoubleWritable> {
  /** The number of vertices in the graph */
  private static final String AGGREGATE_VERTICES =
      "wattsStrogatz.aggregateVertices";
  /** The number of outgoing edges per vertex */
  private static final String EDGES_PER_VERTEX =
      "wattsStrogatz.edgesPerVertex";
  /** The probability to re-wire an outgoing edge from the ring lattice */
  private static final String BETA =
      "wattsStrogatz.beta";
  /** The seed to generate random values for pseudo-randomness */
  private static final String SEED =
      "wattsStrogatz.seed";

  @Override
  public void checkInputSpecs(Configuration conf) { }

  @Override
  public final List<InputSplit> getSplits(final JobContext context,
      final int minSplitCountHint) throws IOException, InterruptedException {
    return PseudoRandomUtils.getSplits(minSplitCountHint);
  }

  @Override
  public VertexReader<LongWritable, DoubleWritable, DoubleWritable>
  createVertexReader(InputSplit split,
      TaskAttemptContext context) throws IOException {
    return new WattsStrogatzVertexReader();
  }

  /**
   * Vertex reader used to generate the graph
   */
  private static class WattsStrogatzVertexReader extends
    VertexReader<LongWritable, DoubleWritable, DoubleWritable> {
    /** the re-wiring probability */
    private float beta = 0;
    /** The total number of vertices */
    private long aggregateVertices = 0;
    /** The starting vertex id for this split */
    private long startingVertexId = -1;
    /** The number of vertices read so far */
    private long verticesRead = 0;
    /** The total number of vertices in the split */
    private long totalSplitVertices = -1;
    /** the total number of outgoing edges per vertex */
    private int edgesPerVertex = -1;
    /** The target ids of the outgoing edges */
    private final LongSet destVertices = new LongOpenHashSet();
    /** The random values generator */
    private Random rnd;
    /** The reusable edge */
    private ReusableEdge<LongWritable, DoubleWritable> reusableEdge = null;

    /**
     * Default constructor
     */
    public WattsStrogatzVertexReader() { }

    @Override
    public void initialize(InputSplit inputSplit,
        TaskAttemptContext context) throws IOException {
      beta = getConf().getFloat(
          BETA, 0.0f);
      aggregateVertices = getConf().getLong(
          AGGREGATE_VERTICES, 0);
      BspInputSplit bspInputSplit = (BspInputSplit) inputSplit;
      long extraVertices = aggregateVertices % bspInputSplit.getNumSplits();
      totalSplitVertices = aggregateVertices / bspInputSplit.getNumSplits();
      if (bspInputSplit.getSplitIndex() < extraVertices) {
        ++totalSplitVertices;
      }
      startingVertexId = bspInputSplit.getSplitIndex() *
          (aggregateVertices / bspInputSplit.getNumSplits()) +
          Math.min(bspInputSplit.getSplitIndex(), extraVertices);
      edgesPerVertex = getConf().getInt(
          EDGES_PER_VERTEX, 0);
      if (getConf().reuseEdgeObjects()) {
        reusableEdge = getConf().createReusableEdge();
      }
      int seed = getConf().getInt(SEED, -1);
      if (seed != -1) {
        rnd = new Random(seed);
      } else {
        rnd = new Random();
      }
    }

    @Override
    public boolean nextVertex() throws IOException, InterruptedException {
      return totalSplitVertices > verticesRead;
    }

    /**
     * Return a long value uniformly distributed between 0 (inclusive) and n.
     *
     * @param n the upper bound for the random long value
     * @return the random value
     */
    private long nextLong(long n) {
      long bits;
      long val;
      do {
        bits = (rnd.nextLong() << 1) >>> 1;
        val = bits % n;
      } while (bits - val + (n - 1) < 0L);
      return val;
    }

    /**
     * Get a destination id that is not already in the neighborhood and
     * that is not the vertex itself (no self-loops). For the second condition
     * it expects destVertices to contain the own id already.
     *
     * @return the destination vertex id
     */
    private long getRandomDestination() {
      long randomId;
      do {
        randomId = nextLong(aggregateVertices);
      } while (!destVertices.add(randomId));
      return randomId;
    }

    @Override
    public Vertex<LongWritable, DoubleWritable, DoubleWritable>
    getCurrentVertex() throws IOException, InterruptedException {
      Vertex<LongWritable, DoubleWritable, DoubleWritable> vertex =
          getConf().createVertex();
      long vertexId = startingVertexId + verticesRead;
      OutEdges<LongWritable, DoubleWritable> edges =
          getConf().createOutEdges();
      edges.initialize(edgesPerVertex);
      destVertices.clear();
      destVertices.add(vertexId);
      long destVertexId = vertexId - edgesPerVertex / 2;
      if (destVertexId < 0) {
        destVertexId = aggregateVertices + destVertexId;
      }
      for (int i = 0; i < edgesPerVertex + 1; ++i) {
        if (destVertexId != vertexId) {
          Edge<LongWritable, DoubleWritable> edge =
              (reusableEdge == null) ? getConf().createEdge() : reusableEdge;
          edge.getTargetVertexId().set(
              rnd.nextFloat() < beta ? getRandomDestination() : destVertexId);
          edge.getValue().set(rnd.nextDouble());
          edges.add(edge);
        }
        destVertexId = (destVertexId + 1) % aggregateVertices;
      }
      vertex.initialize(new LongWritable(vertexId),
          new DoubleWritable(rnd.nextDouble()), edges);
      ++verticesRead;
      return vertex;
    }

    @Override
    public void close() throws IOException { }

    @Override
    public float getProgress() throws IOException {
      return verticesRead * 100.0f / totalSplitVertices;
    }
  }
}
TOP

Related Classes of org.apache.giraph.io.formats.WattsStrogatzVertexInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.