Package net.agkn.hll

Source Code of net.agkn.hll.ExplicitHLLTest

package net.agkn.hll;

/*
* Copyright 2013 Aggregate Knowledge, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import static org.powermock.reflect.Whitebox.getInternalState;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;

import java.util.HashSet;
import java.util.Random;

import net.agkn.hll.serialization.ISchemaVersion;
import net.agkn.hll.serialization.SerializationUtil;
import org.testng.annotations.Test;

/**
* Tests {@link HLL} of type {@link HLLType#EXPLICIT}.
*
* @author timon
*/
public class ExplicitHLLTest {
    /**
     * Tests basic set semantics of {@link HLL#addRaw(long)}.
     */
    @Test
    public void addBasicTest() {
        { // Adding a single positive value to an empty set should work.
            final HLL hll = newHLL(128/*arbitrary*/);
            hll.addRaw(1L/*positive*/);
            assertEquals(hll.cardinality(), 1L);
        }
        { // Adding a single negative value to an empty set should work.
            final HLL hll = newHLL(128/*arbitrary*/);
            hll.addRaw(-1L/*negative*/);
            assertEquals(hll.cardinality(), 1L);
        }
        { // Adding a duplicate value to a set should be a no-op.
            final HLL hll = newHLL(128/*arbitrary*/);
            hll.addRaw(1L/*positive*/);
            assertEquals(hll.cardinality(), 1L/*arbitrary*/);
            assertEquals(hll.cardinality(), 1L/*dupe*/);
        }
    }

    // ------------------------------------------------------------------------
    /**
     * Tests {@link HLL#union(HLL)}.
     */
    @Test
    public void unionTest() {
        {// Unioning two distinct sets should work
            final HLL hllA = newHLL(128/*arbitrary*/);
            final HLL hllB = newHLL(128/*arbitrary*/);
            hllA.addRaw(1L);
            hllA.addRaw(2L);
            hllB.addRaw(3L);

            hllA.union(hllB);
            assertEquals(hllA.cardinality(), 3);
        }
        {// Unioning two sets whose union doesn't exceed the cardinality cap should not promote
            final HLL hllA = newHLL(128/*arbitrary*/);
            final HLL hllB = newHLL(128/*arbitrary*/);
            hllA.addRaw(1L);
            hllA.addRaw(2L);
            hllB.addRaw(1L);

            hllA.union(hllB);
            assertEquals(hllA.cardinality(), 2);
        }
        {// unioning two sets whose union exceeds the cardinality cap should promote
            final HLL hllA = newHLL(128/*arbitrary*/);
            final HLL hllB = newHLL(128/*arbitrary*/);

            // fill up sets to explicitThreshold
            for(long i=0; i<128/*explicitThreshold*/; i++) {
                hllA.addRaw(i);
                hllB.addRaw(i + 128);
            }

            hllA.union(hllB);
            assertEquals(hllA.getType(), HLLType.SPARSE);
        }
    }

    // ------------------------------------------------------------------------
    /**
     * Tests {@link HLL#clear()}
     */
    @Test
    public void clearTest() {
        final HLL hll = newHLL(128/*arbitrary*/);
        hll.addRaw(1L);
        assertEquals(hll.cardinality(), 1L);
        hll.clear();
        assertEquals(hll.cardinality(), 0L);
    }

    // ------------------------------------------------------------------------
    /**
     * Tests {@link LongSetSlab#toBytes(int, ISchemaVersion)} and
     * {@link LongSetSlab#fromBytes(int, byte[], ISchemaVersion)}.
     */
    @Test
    public void toFromBytesTest() {
        final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
        final HLLType type = HLLType.EXPLICIT;
        final int padding = schemaVersion.paddingBytes(type);
        final int bytesPerWord = 8;

        {// Should work on an empty set
            final HLL hll = newHLL(128/*arbitrary*/);

            final byte[] bytes = hll.toBytes(schemaVersion);

            // assert output has correct byte length
            assertEquals(bytes.length, padding/*no elements, just padding*/);

            final HLL inHLL = HLL.fromBytes(bytes);

            assertElementsEqual(hll, inHLL);
        }
        {// Should work on a partially filled set
            final HLL hll = newHLL(128/*arbitrary*/);

            for(int i=0; i<3; i++) {
                hll.addRaw(i);
            }

            final byte[] bytes = hll.toBytes(schemaVersion);

            // assert output has correct byte length
            assertEquals(bytes.length, padding + (bytesPerWord * 3/*elements*/));

            final HLL inHLL = HLL.fromBytes(bytes);

            assertElementsEqual(hll, inHLL);
        }
        {// Should work on a full set
            final int explicitThreshold = 128;
            final HLL hll = newHLL(explicitThreshold);

            for(int i=0; i<explicitThreshold; i++) {
                hll.addRaw(27 + i/*arbitrary*/);
            }

            final byte[] bytes = hll.toBytes(schemaVersion);

            // assert output has correct byte length
            assertEquals(bytes.length, padding + (bytesPerWord * explicitThreshold/*elements*/));

            final HLL inHLL = HLL.fromBytes(bytes);

            assertElementsEqual(hll, inHLL);
        }
    }

    // ------------------------------------------------------------------------
    /**
     * Tests correctness against {@link java.util.HashSet}.
     */
    @Test
    public void randomValuesTest() {
        final int explicitThreshold = 4096;
        final HashSet<Long> canonical = new HashSet<Long>();
        final HLL hll = newHLL(explicitThreshold);

        final long seed = 1L/*constant so results are reproducible*/;
        final Random random = new Random(seed);
        for(int i=0;i<explicitThreshold;i++){
            long randomLong = random.nextLong();
            canonical.add(new Long(randomLong));
            hll.addRaw(randomLong);
        }
        final int canonicalCardinality = canonical.size();
        assertEquals(hll.cardinality(), canonicalCardinality);
    }

    // ------------------------------------------------------------------------
    /**
     * Tests promotion to {@link HLLType#SPARSE} and {@link HLLType#FULL}.
     */
    @Test
    public void promotionTest() {
        { // locally scoped for sanity
            final int explicitThreshold = 128;
            final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold*/, HLLType.EXPLICIT);

            for(int i=0;i<explicitThreshold + 1;i++){
                hll.addRaw(i);
            }
            assertEquals(hll.getType(), HLLType.SPARSE);
        }
        { // locally scoped for sanity
            final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, 4/*expthresh => explicitThreshold = 8*/, false/*sparseon*/, HLLType.EXPLICIT);

            for(int i=0;i<9/* > explicitThreshold */;i++){
                hll.addRaw(i);
            }
            assertEquals(hll.getType(), HLLType.FULL);
        }
    }

    // ************************************************************************
    // assertion helpers
    /**
     * Asserts that values in both sets are exactly equal.
     */
    private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
        final LongOpenHashSet internalSetA = (LongOpenHashSet)getInternalState(hllA, "explicitStorage");
        final LongOpenHashSet internalSetB = (LongOpenHashSet)getInternalState(hllB, "explicitStorage");

        assertTrue(internalSetA.equals(internalSetB));
    }

    /**
     * Builds a {@link HLLType#EXPLICIT} {@link HLL} instance with the specified
     * explicit threshold.
     *
     * @param  explicitThreshold explicit threshold to use for the constructed
     *         {@link HLL}. This must be greater than zero.
     * @return a default-sized {@link HLLType#EXPLICIT} empty {@link HLL} instance.
     *         This will never be <code>null</code>.
     */
    private static HLL newHLL(final int explicitThreshold) {
        return new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold, arbitrary, unused*/, HLLType.EXPLICIT);
    }
}
TOP

Related Classes of net.agkn.hll.ExplicitHLLTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.