Package edu.cmu.graphchi.queries

Source Code of edu.cmu.graphchi.queries.VertexQuery$Shard

package edu.cmu.graphchi.queries;

import java.io.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.logging.Logger;

import edu.cmu.graphchi.ChiFilenames;
import edu.cmu.graphchi.ChiLogger;
import edu.cmu.graphchi.datablocks.BytesToValueConverter;
import edu.cmu.graphchi.engine.auxdata.DegreeData;
import edu.cmu.graphchi.engine.auxdata.VertexDegree;
import edu.cmu.graphchi.io.CompressedIO;
import edu.cmu.graphchi.shards.ShardIndex;
import edu.cmu.graphchi.vertexdata.VertexIdValue;
import ucar.unidata.io.RandomAccessFile;

/**
* Disk-based queries of out-edges of a vertex.
* <b>Note:</b> all vertex-ids in *internal* vertex id space.
* @author Aapo Kyrola
*/
public class VertexQuery {

    private static final int NTHREADS = 4;

    private static final Logger logger = ChiLogger.getLogger("vertexquery");
    private ArrayList<Shard> shards;
    private ExecutorService executor;

    public VertexQuery(String baseFilename, int numShards) throws IOException{
        shards = new ArrayList<Shard>();
        for(int i=0; i<numShards; i++) {
            shards.add(new Shard(baseFilename, i, numShards));
        }
        executor = Executors.newFixedThreadPool(NTHREADS);
    }


    /**
     * Queries all out neighbors of given vertices and returns a hashmap with (vertex-id, count),
     * where count is the number of queryAndCombine vertices who had the vertex-id as neighbor.
     * @param queryVertices
     * @return
     */
    public HashMap<Integer, Integer> queryOutNeighborsAndCombine(final Collection<Integer> queryVertices) {
        HashMap<Integer, Integer> results;
        List<Future<HashMap<Integer, Integer>>> queryFutures = new ArrayList<Future<HashMap<Integer, Integer>>>();

        /* Check which ones are in cache */
        long st = System.currentTimeMillis();
        HashMap<Integer, Integer> fromCache = new HashMap<Integer, Integer>(1000000);

        logger.info("Cached queries took: " + (System.currentTimeMillis() - st));

        /* Execute queries in parallel */
        for(Shard shard : shards) {
            final Shard _shard = shard;
            queryFutures.add(executor.submit(new Callable<HashMap<Integer, Integer>>() {
                @Override
                public HashMap<Integer, Integer> call() throws Exception {
                    HashMap<Integer, Integer> edges = _shard.queryAndCombine(queryVertices);
                    return edges;
                }
            }));
        }

        /* Combine
        */
        try {
            results = fromCache;

            for(int i=0; i < queryFutures.size(); i++) {
                HashMap<Integer, Integer> shardResults = queryFutures.get(i).get();

                for(Map.Entry<Integer, Integer> e : shardResults.entrySet()) {
                    if (results.containsKey(e.getKey())) {
                        results.put(e.getKey(), e.getValue() + results.get(e.getKey()));
                    } else {
                        results.put(e.getKey(), e.getValue());
                    }
                }
            }

        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        }


        return  results;
    }

    /**
     * Queries out=neighbors for a given set of vertices.
     * @param queryVertices
     * @return
     */
    public HashMap<Integer, ArrayList<Integer>> queryOutNeighbors(final Collection<Integer> queryVertices) {
        HashMap<Integer,  ArrayList<Integer>> results;
        List<Future<HashMap<Integer, ArrayList<Integer>> >> queryFutures
                = new ArrayList<Future<HashMap<Integer, ArrayList<Integer>> >>();

        /* Check which ones are in cache */
        long st = System.currentTimeMillis();
        HashMap<Integer, ArrayList<Integer>> fromCache = new HashMap<Integer, ArrayList<Integer>>(1000);


        /* Execute queries in parallel */
        for(Shard shard : shards) {
            final Shard _shard = shard;
            queryFutures.add(executor.submit(new Callable<HashMap<Integer, ArrayList<Integer>>>() {
                @Override
                public HashMap<Integer, ArrayList<Integer>> call() throws Exception {
                    HashMap<Integer, ArrayList<Integer>>  edges = _shard.query(queryVertices);
                    return edges;
                }
            }));
        }

        /* Combine
        */
        try {
            results = fromCache;

            for(int i=0; i < queryFutures.size(); i++) {
                HashMap<Integer,  ArrayList<Integer>> shardResults = queryFutures.get(i).get();

                for(Map.Entry<Integer,  ArrayList<Integer>> e : shardResults.entrySet()) {
                    ArrayList<Integer> existing = results.get(e.getKey());
                    if (existing == null) {
                        results.put(e.getKey(), e.getValue());
                    } else {
                        existing.addAll(e.getValue());
                    }
                }
            }

        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        }

        return  results;
    }


    /**
     * Return out-neighbors of given vertex
     * @param internalId
     * @return
     * @throws IOException
     */
    public HashSet<Integer> queryOutNeighbors(final int internalId) throws IOException  {
        HashSet<Integer> friends;
        List<Future<HashSet<Integer>>> queryFutures = new ArrayList<Future<HashSet<Integer>>>();

        /* Query from shards in parallel */
        for(Shard shard : shards) {
            final Shard _shard = shard;
            queryFutures.add(executor.submit(new Callable<HashSet<Integer>>() {
                @Override
                public HashSet<Integer> call() throws Exception {
                    return _shard.query(internalId);
                }
            }));
        }
        try {
            friends = queryFutures.get(0).get();

            for(int i=1; i < queryFutures.size(); i++) {
                HashSet<Integer> shardFriends = queryFutures.get(i).get();
                for(Integer fr : shardFriends) {
                    friends.add(fr);
                }
            }
        } catch (Exception err) {
            throw new RuntimeException(err);
        }
        return friends;
    }

    /**
     * Shutdowns the executor threads.
     */
    public void shutdown() {
        executor.shutdown();
    }


    static class Shard {
        RandomAccessFile adjFile;
        ShardIndex index;
        int shardNum;
        int numShards;
        String fileName;


        private Shard(String fileName, int shardNum, int numShards) throws IOException {
            this.shardNum = shardNum;
            this.numShards = numShards;
            this.fileName = fileName;
            File f = new File(ChiFilenames.getFilenameShardsAdj(fileName, shardNum, numShards));
            adjFile = new RandomAccessFile(f.getAbsolutePath(), "r", 64 * 1024);
            index = new ShardIndex(f);
        }

        /**
         * Query efficiently all vertices
         * @param queryIds
         * @return
         * @throws IOException
         */
        public HashMap<Integer, Integer> queryAndCombine(Collection<Integer> queryIds) throws IOException {
            /* Sort the ids because the index-entries will be in same order */
            ArrayList<Integer> sortedIds = new ArrayList<Integer>(queryIds);
            Collections.sort(sortedIds);

            ArrayList<ShardIndex.IndexEntry> indexEntries = new ArrayList<ShardIndex.IndexEntry>(sortedIds.size());
            for(Integer a : sortedIds) {
                indexEntries.add(index.lookup(a));
            }

            HashMap<Integer, Integer> results = new HashMap<Integer, Integer>(5000);
            ShardIndex.IndexEntry entry = null, lastEntry = null;
            int curvid=0, adjOffset=0;
            for(int qIdx=0; qIdx < sortedIds.size(); qIdx++) {
                entry = indexEntries.get(qIdx);
                int vertexId = sortedIds.get(qIdx);

                /* If consecutive vertices are in same indexed block, i.e their
                   index entries are the same, then we just continue.
                 */
                if (qIdx == 0 || !entry.equals(lastEntry))   {
                    curvid = entry.vertex;
                    adjOffset = entry.fileOffset;
                    adjFile.seek(adjOffset);
                }
                while(curvid <= vertexId) {
                    int n;
                    int ns = adjFile.readUnsignedByte();
                    assert(ns >= 0);
                    adjOffset++;

                    if (ns == 0) {
                        curvid++;
                        int nz = adjFile.readUnsignedByte();

                        adjOffset++;
                        assert(nz >= 0);
                        curvid += nz;
                        continue;
                    }

                    if (ns == 0xff) {
                        n = adjFile.readInt();
                        adjOffset += 4;
                    } else {
                        n = ns;
                    }

                    if (curvid == vertexId) {
                        while (--n >= 0) {
                            int target = adjFile.readInt();
                            Integer curCount = results.get(target);
                            if (curCount == null) {
                                results.put(target, 1);
                            } else {
                                results.put(target, 1 + curCount);
                            }
                        }
                    } else {
                        adjFile.skipBytes(n * 4);
                    }
                    curvid++;
                }
            }
            return results;
        }


        public HashMap<Integer, ArrayList<Integer>> query(Collection<Integer> queryIds) throws IOException {
            /* Sort the ids because the index-entries will be in same order */
            ArrayList<Integer> sortedIds = new ArrayList<Integer>(queryIds);
            Collections.sort(sortedIds);

            ArrayList<ShardIndex.IndexEntry> indexEntries = new ArrayList<ShardIndex.IndexEntry>(sortedIds.size());
            for(Integer a : sortedIds) {
                indexEntries.add(index.lookup(a));
            }

            HashMap<Integer, ArrayList<Integer>> results = new HashMap<Integer, ArrayList<Integer>>(queryIds.size());

            ShardIndex.IndexEntry entry = null, lastEntry = null;
            int curvid=0, adjOffset=0;
            for(int qIdx=0; qIdx < sortedIds.size(); qIdx++) {
                entry = indexEntries.get(qIdx);
                int vertexId = sortedIds.get(qIdx);

                 boolean found = false;

                /* If consecutive vertices are in same indexed block, i.e their
                   index entries are the same, then we just continue.
                 */
                if (qIdx == 0 || !entry.equals(lastEntry))   {
                    curvid = entry.vertex;
                    adjOffset = entry.fileOffset;
                    adjFile.seek(adjOffset);
                }
                while(curvid <= vertexId) {
                    int n;
                    int ns = adjFile.readUnsignedByte();
                    assert(ns >= 0);
                    adjOffset++;

                    if (ns == 0) {
                        curvid++;
                        int nz = adjFile.readUnsignedByte();

                        adjOffset++;
                        assert(nz >= 0);
                        curvid += nz;
                        continue;
                    }

                    if (ns == 0xff) {
                        n = adjFile.readInt();
                        adjOffset += 4;
                    } else {
                        n = ns;
                    }

                    if (curvid == vertexId) {
                        ArrayList<Integer> nbrs = new ArrayList<Integer>(n);
                        found = true;

                        while (--n >= 0) {
                            int target = adjFile.readInt();
                            nbrs.add(target);
                        }
                        results.put(vertexId, nbrs);
                    } else {
                        adjFile.skipBytes(n * 4);
                    }
                    curvid++;
                }
                if (!found) {
                    results.put(vertexId, new ArrayList<Integer>(0));
                }
            }
            return results;
        }

        public HashSet<Integer> query(int vertexId) throws IOException {
            return new HashSet<Integer>(query(Collections.singletonList(vertexId)).get(vertexId));
        }

        public <VT> List<VertexIdValue<VT>> queryWithValues(int vertexId, BytesToValueConverter<VT> conv) throws
                IOException {

            List<VertexIdValue<VT>> results = new ArrayList<VertexIdValue<VT>>();
            ShardIndex.IndexEntry entry = index.lookup(vertexId);

            int curvid = entry.vertex;
            int adjOffset = entry.fileOffset;
            int edgeOffset = entry.edgePointer;
            String edataShardName = ChiFilenames.getFilenameShardEdata(fileName, conv, shardNum, numShards);
            int fileSize = ChiFilenames.getShardEdataSize(edataShardName);


            adjFile.seek(adjOffset);

            /* Edge data block*/
            int blockSize = ChiFilenames.getBlocksize(conv.sizeOf());

            byte[] edgeDataBlock = new byte[blockSize];
            int curBlockId = (-1);
            byte[] tmp = new byte[conv.sizeOf()];

            while(curvid <= vertexId) {
                int n;
                int ns = adjFile.readUnsignedByte();
                assert(ns >= 0);
                adjOffset++;

                if (ns == 0) {
                    curvid++;
                    int nz = adjFile.readUnsignedByte();

                    adjOffset++;
                    assert(nz >= 0);
                    curvid += nz;
                    continue;
                }

                if (ns == 0xff) {
                    n = adjFile.readInt();
                    adjOffset += 4;
                } else {
                    n = ns;
                }

                if (curvid == vertexId) {
                    while (--n >= 0) {
                        int target = adjFile.readInt();

                        int blockId = edgeOffset * conv.sizeOf() / blockSize;
                        if (blockId != curBlockId) {
                            String blockFileName = ChiFilenames.getFilenameShardEdataBlock(
                                    edataShardName,
                                    blockId, blockSize);
                            curBlockId = blockId;
                            int len = Math.min(blockSize, fileSize - blockId * blockSize);
                            CompressedIO.readCompressed(new File(blockFileName), edgeDataBlock, len);
                        }
                        System.arraycopy(edgeDataBlock, (edgeOffset * conv.sizeOf()) % blockSize, tmp, 0, conv.sizeOf());
                        VT value = conv.getValue(tmp);
                        results.add(new VertexIdValue<VT>(target, value));
                        edgeOffset++;
                    }
                } else {
                    adjFile.skipBytes(n * 4);
                    edgeOffset += n;
                }
                curvid++;
            }
            return results;
        }


    }


}
TOP

Related Classes of edu.cmu.graphchi.queries.VertexQuery$Shard

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.