Package com.tinkerpop.gremlin.algorithm.generator

Source Code of com.tinkerpop.gremlin.algorithm.generator.CommunityGenerator$Builder

package com.tinkerpop.gremlin.algorithm.generator;

import com.tinkerpop.gremlin.structure.Edge;
import com.tinkerpop.gremlin.structure.Graph;
import com.tinkerpop.gremlin.structure.Vertex;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Supplier;

/**
* Generates a synthetic network with a community structure, that is, several densely connected
* sub-networks that are loosely connected with one another.
*
* @author Matthias Broecheler (me@matthiasb.com)
* @author Stephen Mallette (http://stephen.genoprime.com)
*/
public class CommunityGenerator extends AbstractGenerator {

    public static final double DEFAULT_CROSS_COMMUNITY_PERCENTAGE = 0.1;
    public static final int DEFAULT_NUMBER_OF_COMMUNITIES = 2;

    private final Distribution communitySize;
    private final Distribution edgeDegree;
    private final double crossCommunityPercentage;
    private final Iterable<Vertex> vertices;
    private final int expectedNumCommunities;
    private final int expectedNumEdges;

    private final Random random;

    private CommunityGenerator(final Graph g, final String label, final Optional<Consumer<Edge>> edgeProcessor,
                               final Optional<BiConsumer<Vertex, Map<String, Object>>> vertexProcessor,
                               final Supplier<Long> seedGenerator, final Distribution communitySize,
                               final Distribution edgeDegree, final double crossCommunityPercentage,
                               final Iterable<Vertex> vertices, final int expectedNumCommunities,
                               final int expectedNumEdges) {
        super(g, label, edgeProcessor, vertexProcessor, seedGenerator);
        random = new Random(this.seedSupplier.get());
        this.communitySize = communitySize;
        this.edgeDegree = edgeDegree;
        this.crossCommunityPercentage = crossCommunityPercentage;
        this.vertices = vertices;
        this.expectedNumCommunities = expectedNumCommunities;
        this.expectedNumEdges = expectedNumEdges;
    }

    /**
     * Generates a synthetic network for provided vertices in the given graph such that the provided expected number
     * of communities are generated with the specified expected number of edges.
     *
     * @return The actual number of edges generated. May be different from the expected number.
     */
    @Override
    public int generate() {
        int numVertices = SizableIterable.sizeOf(vertices);
        final Iterator<Vertex> iter = vertices.iterator();
        final ArrayList<ArrayList<Vertex>> communities = new ArrayList<>(expectedNumCommunities);
        final Distribution communityDist = communitySize.initialize(expectedNumCommunities, numVertices);
        final Map<String, Object> context = new HashMap<>();
        while (iter.hasNext()) {
            final int nextSize = communityDist.nextValue(random);
            context.put("communityIndex", communities.size());
            final ArrayList<Vertex> community = new ArrayList<>(nextSize);
            for (int i = 0; i < nextSize && iter.hasNext(); i++) {
                community.add(processVertex(iter.next(), context));
            }
            if (!community.isEmpty()) communities.add(community);
        }

        final double inCommunityPercentage = 1.0 - crossCommunityPercentage;
        final Distribution degreeDist = edgeDegree.initialize(numVertices, expectedNumEdges);
        if (crossCommunityPercentage > 0 && communities.size() < 2)
            throw new IllegalArgumentException("Cannot have cross links with only one community");
        int addedEdges = 0;

        //System.out.println("Generating links on communities: "+communities.size());

        for (ArrayList<Vertex> community : communities) {
            for (Vertex v : community) {
                final int randomDegree = degreeDist.nextValue(random);
                final int degree = Math.min(randomDegree, (int) Math.ceil((community.size() - 1) / inCommunityPercentage) - 1);
                final Set<Vertex> inlinks = new HashSet<>();
                final Set<Vertex> outlinks = new HashSet<>();
                for (int i = 0; i < degree; i++) {
                    Vertex selected = null;
                    if (random.nextDouble() < crossCommunityPercentage || (community.size() - 1 <= inlinks.size())) {
                        //Cross community
                        int tries = 0;
                        ArrayList<Vertex> othercomm = null;

                        // this limit on the number of tries prevents infinite loop where the selected vertex to
                        // link to doesn't exist given the nature and structure of the graph.
                        while (null == selected && tries < 100) {
                            // choose another community to connect to and make sure it's not in the current
                            // community of the current vertex
                            while (null == othercomm) {
                                othercomm = communities.get(random.nextInt(communities.size()));
                                if (othercomm.equals(community)) othercomm = null;
                            }
                            selected = othercomm.get(random.nextInt(othercomm.size()));
                            if (outlinks.contains(selected)) selected = null;

                            tries++;
                        }

                        // if tries expires then the value of selected is null in which case it should not be added.
                        if (selected != null) outlinks.add(selected);
                    } else {
                        //In community
                        int tries = 0;
                        while (selected == null && tries < 100) {
                            selected = community.get(random.nextInt(community.size()));
                            if (v.equals(selected) || inlinks.contains(selected)) selected = null;
                            tries++;
                        }

                        if (selected != null) inlinks.add(selected);
                    }

                    // only add an edge if the vertex was actually selected.
                    if (selected != null) {
                        addEdge(v, selected);
                        addedEdges++;
                    }
                }
            }
        }
        return addedEdges;
    }

    public static Builder build(final Graph g) {
        return new Builder(g);
    }

    public static class Builder extends AbstractGeneratorBuilder<Builder> {
        private final Graph g;
        private Distribution communitySize = null;
        private Distribution edgeDegree = null;
        private double crossCommunityPercentage = DEFAULT_CROSS_COMMUNITY_PERCENTAGE;
        private Iterable<Vertex> vertices;
        private int expectedNumCommunities = DEFAULT_NUMBER_OF_COMMUNITIES;
        private int expectedNumEdges;

        private Builder(final Graph g) {
            super(Builder.class);
            this.g = g;
            final List<Vertex> allVertices = g.V().toList();
            this.vertices = allVertices;
            this.expectedNumEdges = allVertices.size() * 2;
        }

        public Builder verticesToGenerateEdgesFor(final Iterable<Vertex> vertices) {
            this.vertices = vertices;
            return this;
        }

        public Builder expectedNumCommunities(final int expectedNumCommunities) {
            this.expectedNumCommunities = expectedNumCommunities;
            return this;
        }

        public Builder expectedNumEdges(final int expectedNumEdges) {
            this.expectedNumEdges = expectedNumEdges;
            return this;
        }

        /**
         * Sets the distribution to be used to generate the sizes of communities.
         */
        public Builder communityDistribution(final Distribution community) {
            this.communitySize = community;
            return this;
        }

        /**
         * Sets the distribution to be used to generate the out-degrees of vertices.
         */
        public Builder degreeDistribution(final Distribution degree) {
            this.edgeDegree = degree;
            return this;
        }

        /**
         * Sets the percentage of edges that cross a community, i.e. connect a vertex to a vertex in
         * another community. The lower this value, the higher the modularity of the generated communities.
         *
         * @param percentage Percentage of community crossing edges. Must be in [0,1]
         */
        public Builder crossCommunityPercentage(final double percentage) {
            if (percentage < 0.0 || percentage > 1.0)
                throw new IllegalArgumentException("Percentage must be between 0 and 1");
            this.crossCommunityPercentage = percentage;
            return this;
        }

        public CommunityGenerator create() {
            if (null == communitySize)
                throw new IllegalStateException("Need to initialize community size distribution");
            if (null == edgeDegree) throw new IllegalStateException("Need to initialize degree distribution");
            return new CommunityGenerator(this.g, this.label, this.edgeProcessor, this.vertexProcessor, this.seedSupplier,
                    this.communitySize, this.edgeDegree, crossCommunityPercentage, vertices,
                    expectedNumCommunities, expectedNumEdges);
        }
    }
}
TOP

Related Classes of com.tinkerpop.gremlin.algorithm.generator.CommunityGenerator$Builder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.