Package org.grouplens.lenskit.data.snapshot

Source Code of org.grouplens.lenskit.data.snapshot.PackedPreferenceDataBuilder

/*
* LensKit, an open source recommender systems toolkit.
* Copyright 2010-2014 LensKit Contributors.  See CONTRIBUTORS.md.
* Work on LensKit has been funded by the National Science Foundation under
* grants IIS 05-34939, 08-08692, 08-12148, and 10-17697.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.grouplens.lenskit.data.snapshot;

import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntHeapPriorityQueue;
import org.apache.commons.lang3.builder.Builder;
import org.grouplens.lenskit.data.pref.Preference;
import org.grouplens.lenskit.indexes.MutableIdIndexMapping;

import java.util.Arrays;
import java.util.Random;

import static org.grouplens.lenskit.data.snapshot.PackedPreferenceData.*;

/**
* Build a packed rating data structure.
*
* @author <a href="http://www.grouplens.org">GroupLens Research</a>
* @since 0.11
*/
class PackedPreferenceDataBuilder implements Builder<PackedPreferenceData> {
    static final int INITIAL_CHUNK_COUNT = 512;

    private int[][] users;
    private int[][] items;
    private double[][] values;
    private int nprefs = 0;

    private MutableIdIndexMapping itemIndex;
    private MutableIdIndexMapping userIndex;
   
    private IntHeapPriorityQueue freeList;

    public PackedPreferenceDataBuilder() {
        itemIndex = new MutableIdIndexMapping();
        userIndex = new MutableIdIndexMapping();
        freeList = new IntHeapPriorityQueue();
        allocate(INITIAL_CHUNK_COUNT);
    }

    /**
     * Allocate (or re-allocate) the internal packed storage.
     *
     * @param nchunks The number of chunks to have space for.
     */
    private void allocate(int nchunks) {
        if (users != null && nchunks == users.length) {
            return;
        }
        int[][] utmp = new int[nchunks][];
        int[][] itmp = new int[nchunks][];
        double[][] vtmp = new double[nchunks][];
        if (users != null) {
            assert items != null;
            assert values != null;
            int n = Math.min(nchunks, users.length);
            System.arraycopy(users, 0, utmp, 0, n);
            System.arraycopy(items, 0, itmp, 0, n);
            System.arraycopy(values, 0, vtmp, 0, n);
        }
        users = utmp;
        items = itmp;
        values = vtmp;
    }

    /**
     * Get the size of this data pack.
     *
     * @return The number of preferences in the data pack.
     */
    public int size() {
        return nprefs - freeList.size();
    }

    /**
     * Internal set method that takes individual indexes.
     *
     * @param ci   The chunk index.
     * @param ei   The element index.
     * @param pref The preference data to set.
     */
    private void set(int ci, int ei, Preference pref) {
        users[ci][ei] = userIndex.internId(pref.getUserId());
        items[ci][ei] = itemIndex.internId(pref.getItemId());
        values[ci][ei] = pref.getValue();
    }

    /**
     * Set the preference data at a particular index.
     *
     * @param idx  The index.
     * @param pref The preference data.
     */
    public void set(int idx, Preference pref) {
        Preconditions.checkElementIndex(idx, nprefs);
        final int ci = chunk(idx);
        final int ei = element(idx);
        set(ci, ei, pref);
    }

    /**
     * Add a preference to the pack.
     *
     * @param pref The preference to add
     * @return The index of the newly-added preference.
     */
    public int add(Preference pref) {
        assert users != null;
        assert items != null;
        assert values != null;
        assert users.length == items.length;
        assert values.length == users.length;

        final int idx = freeList.isEmpty() ? nprefs : freeList.dequeueInt();
        if (idx == Integer.MAX_VALUE) {
            throw new RuntimeException("data pack full");
        }
        final int ci = chunk(idx);
        final int ei = element(idx);

        if (ci >= users.length) {
            // must resize
            allocate(users.length * 2);
        }
        if (users[ci] == null) {
            assert items[ci] == null;
            assert values[ci] == null;
            users[ci] = new int[CHUNK_SIZE];
            items[ci] = new int[CHUNK_SIZE];
            values[ci] = new double[CHUNK_SIZE];
        }

        set(ci, ei, pref);
        if (idx == nprefs) {
            nprefs += 1;
        }
        return idx;
    }

    /**
     * Release the specified index. The index can then be re-used by a later call
     * to {@link #add(Preference)}.
     *
     * @param idx The index to remove.
     */
    public void release(int idx) {
        Preconditions.checkElementIndex(idx, nprefs);
        freeList.enqueue(idx);
    }

    private PackedPreferenceData internalBuild() {
        return new PackedPreferenceData(users, items, values, nprefs,
                                        userIndex.immutableCopy(),
                                        itemIndex.immutableCopy());
    }

    private void repack() {
        assert users.length == items.length;
        assert items.length == values.length;

        // if there are no free items, no reallocation is needed
        if (freeList.isEmpty()) {
            return;
        }

        // create an internal PRD so we can use preferences
        PackedPreferenceData tmpPack = internalBuild();
        // create an internal flyweight
        // after this point, this method does no allocation
        PackedPreferenceData.IndirectPreference pref = tmpPack.preference(-1);

        /*
         * we have to do this backwards so we don't copy free slots from the
         * end of the arrays. So, we first create an array of free indices
         * in reverse order (the greatest index is first).
         */
        int[] fidxes = new int[freeList.size()];
        for (int i = freeList.size() - 1; !freeList.isEmpty(); i--) {
            assert i >= 0;
            fidxes[i] = freeList.dequeueInt();
        }

        int n = nprefs;

        /*
         * We then start with the last free index, and copy into it if it
         * isn't at the end of the array.
         */
        for (int i : fidxes) {
            final int lasti = n - 1;    // the index of the last preference
            assert i <= lasti;          // only way for this to fail is duplicate fidxes
            if (i < lasti) {
                // if it is not the last element, move the last to it
                pref.setIndex(lasti);
                set(i, pref);
            }
            // finally, we decrease our count by 1
            n -= 1;
        }

        // Now that we have used all free indexes, we can update nprefs
        assert nprefs == n + fidxes.length;
        assert freeList.isEmpty();
        nprefs = n;
    }

    private void swap(int i, int j) {
        if (i == j) {
            return;
        }

        int ci = chunk(i);
        int ei = element(i);
        int cj = chunk(j);
        int ej = element(j);

        int tidx;
        double tv;

        tidx = users[ci][ei];
        users[ci][ei] = users[cj][ej];
        users[cj][ej] = tidx;

        tidx = items[ci][ei];
        items[ci][ei] = items[cj][ej];
        items[cj][ej] = tidx;

        tv = values[ci][ei];
        values[ci][ei] = values[cj][ej];
        values[cj][ej] = tv;
    }

    /**
     * Shuffle the data. This uses a Fischer-Yates shuffle to uniformly permute
     * (subject to limitations of the PRNG) the data. The arrays are repacked
     * to eliminate free slots prior to shuffling.
     *
     * @param rng The random number generator to use.
     */
    public void shuffle(Random rng) {
        repack();
        // do a reverse Fisher-Yates shuffle on the arrays
        final int np = nprefs;
        for (int i = 0; i < np - 1; i++) {
            // swap w/ j s.t. i <= j < end
            int j = i + rng.nextInt(np - i);
            assert j >= i;
            assert j < np;
            swap(i, j);
        }
    }

    /**
     * Trim the arrays.
     */
    private void trim() {
        // shortcut out if nprefs == 0
        if (nprefs == 0) {
            allocate(0);
            return;
        }

        // how many chunks? and how many in last?
        // but be careful to avoid integer overflow
        int nchunks = nprefs >> CHUNK_SHIFT;
        int nlast = nprefs & CHUNK_MASK;
        if (nlast == 0) {
            // we we can't have 0 - we must have CHUNK_SIZE
            nlast = CHUNK_SIZE;
        } else {
            // then we undercounted the number of chunks
            nchunks += 1;
        }
        assert nchunks * CHUNK_SIZE - CHUNK_SIZE + nlast == nprefs;

        // trim chunks
        allocate(nchunks);

        // trim last element
        if (nlast < CHUNK_SIZE) {
            final int lci = nchunks - 1;
            users[lci] = Arrays.copyOf(users[lci], nlast);
            items[lci] = Arrays.copyOf(items[lci], nlast);
            values[lci] = Arrays.copyOf(values[lci], nlast);
        }
    }

    /**
     * Build the packed rating data. This first moves records from the end to fill
     * any released but not reused slots.
     *
     * @return The packed rating data structure.
     */
    @Override
    public PackedPreferenceData build() {
        repack();
        trim();
        return internalBuild();
    }
}
TOP

Related Classes of org.grouplens.lenskit.data.snapshot.PackedPreferenceDataBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.