Package net.agkn.hll

Source Code of net.agkn.hll.IntegrationTestGenerator

package net.agkn.hll;

/*
* Copyright 2013 Aggregate Knowledge, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;

import net.agkn.hll.HLL;
import net.agkn.hll.HLLType;
import net.agkn.hll.serialization.ISchemaVersion;
import net.agkn.hll.serialization.SerializationUtil;
import net.agkn.hll.util.NumberUtil;
import static net.agkn.hll.ProbabilisticTestUtil.constructHLLValue;

/**
* Generates test files for testing other implementations of HLL
* serialization/deserialization, namely the PostgreSQL implementation.
*
* @author timon
*/
public class IntegrationTestGenerator {
    // ************************************************************************
    // directory to output the generated tests
    private static final String OUTPUT_DIRECTORY = "/tmp/hll_test/";
    // seed to make results reproducible
    private static final long SEED = 1L;

    // ------------------------------------------------------------------------
    // configurations for HLLs, should mirror settings in PostgreSQL impl. tests
    private static final int REGWIDTH = 5;
    private static final int LOG2M = 11;
    // NOTE:  This differs from the PostgreSQL impl. parameter 'expthresh'. This
    //        is a literal threshold to use in the promotion hierarchy, implying
    //        that both EXPLICIT representation should be used and it should
    //        NOT be automatically computed. This is done to ensure that the
    //        parameters of the test are very explicitly defined.
    private static final int EXPLICIT_THRESHOLD = 256;
    // NOTE:  This is not the PostgreSQL impl. parameter 'sparseon'. 'sparseon'
    //        is assumed to be true and this is a literal register-count threshold
    //        to use in the promotion hierarchy. This is done to ensure that the
    //        parameters of the test are very explicitly defined.
    private static final int SPARSE_THRESHOLD = 850;

    // ------------------------------------------------------------------------
    // computed constants
    private static final int REGISTER_COUNT = (1 << LOG2M);
    private static final int REGISTER_MAX_VALUE = (1 << REGWIDTH) - 1;

    // ========================================================================
    // Tests
    /**
     * Cumulatively adds random values to a FULL HLL through the small range
     * correction, uncorrected range, and large range correction of the HLL's
     * cardinality estimator.
     *
     * Format: cumulative add
     * Tests:
     * - FULL cardinality computation
     */
    private static void fullCardinalityCorrectionTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "cardinality_correction", TestType.ADD);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.FULL);
        initLineAdd(output, hll, schemaVersion);

        // run through some values in the small range correction
        for(int i=0; i<((1 << LOG2M) - 1); i++) {
            final long rawValue = constructHLLValue(LOG2M, i, 1);
            cumulativeAddLine(output, hll, rawValue, schemaVersion);
        }

        // run up past some values in the uncorrected range
        for(int i=0; i<(1 << LOG2M); i++) {
            final long rawValue = constructHLLValue(LOG2M, i, 7);
            cumulativeAddLine(output, hll, rawValue, schemaVersion);
        }

        // run through some values in the large range correction
        for(int i=0; i<(1 << LOG2M); i++) {
            final long rawValue = constructHLLValue(LOG2M, i, 30);
            cumulativeAddLine(output, hll, rawValue, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Cumulatively adds random values to an EMPTY HLL.
     *
     * Format: cumulative add
     * Tests:
     * - EMPTY, EXPLICIT, SPARSE, PROBABILSTIC addition
     * - EMPTY to EXPLICIT promotion
     * - EXPLICIT to SPARSE promotion
     * - SPARSE to FULL promotion
     */
    private static void globalStepTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "comprehensive_promotion", TestType.ADD);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        initLineAdd(output, hll, schemaVersion);

        for(int i=0; i<10000/*arbitrary*/; i++) {
            cumulativeAddLine(output, hll, random.nextLong(), schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Cumulatively unions "underpopulated" FULL HLLs into the
     * accumulator to verify the correct behavior from the PostgreSQL implementation.
     * The PostgreSQL implementation's representations of probabilistic HLLs should
     * depend exclusively on the chosen SPARSE-to-FULL cutoff.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U "underpopulated" FULL => SPARSE
     * - SPARSE U "underpopulated" FULL => SPARSE
     * - SPARSE U "barely underpopulated" FULL => FULL
     */
    private static void sparseFullRepresentationTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_full_representation", TestType.UNION);

        final HLL emptyHLL1 = newHLL(HLLType.EMPTY);
        final HLL emptyHLL2 = newHLL(HLLType.EMPTY);

        cumulativeUnionLine(output, emptyHLL1, emptyHLL2, schemaVersion);

        // NOTE:  In this test the sparseReference will be the "expected" value
        //        from the C representation, since it doesn't choose representation
        //        based on original encoding, but rather on the promotion rules
        //        and the declared type of the "receiving" field.
        //        It is the manually-constructed union result.

        // "underpopulated" FULL U EMPTY => SPARSE
        final HLL fullHLL = newHLL(HLLType.FULL);
        fullHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/));

        final HLL sparseHLL = newHLL(HLLType.SPARSE);
        sparseHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/));

        output.write(stringCardinality(fullHLL) + "," + toByteA(fullHLL, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
        output.flush();

        // "underpopulated" FULL (small) U SPARSE (small) => SPARSE
        final HLL fullHLL2 = newHLL(HLLType.FULL);
        fullHLL2.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/));

        sparseHLL.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/));

        output.write(stringCardinality(fullHLL2) + "," + toByteA(fullHLL2, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
        output.flush();

        // "underpopulated" FULL (just on edge) U SPARSE (small) => FULL
        final HLL fullHLL3 = newHLL(HLLType.FULL);
        for(int i=2; i<(SPARSE_THRESHOLD + 1); i++) {
            fullHLL3.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/));
            sparseHLL.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/));
        }

        output.write(stringCardinality(fullHLL3) + "," + toByteA(fullHLL3, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
        output.flush();
    }

    /**
     * Cumulatively sets successive registers to:
     *
     *     <code>(registerIndex % REGISTER_MAX_VALUE) + 1</code>
     *
     * by adding specifically constructed values to a SPARSE HLL.
     * Does not induce promotion.
     *
     * Format: cumulative add
     * Tests:
     * - SPARSE addition (predictable)
     */
    private static void sparseStepTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_step", TestType.ADD);

        // the accumulator, starts empty sparse probabilistic
        final HLL hll = newHLL(HLLType.SPARSE);
        initLineAdd(output, hll, schemaVersion);

        for(int i=0; i<SPARSE_THRESHOLD; i++) {
            final long rawValue = constructHLLValue(LOG2M, i, ((i % REGISTER_MAX_VALUE) + 1));
            cumulativeAddLine(output, hll, rawValue, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Cumulatively sets random registers of a SPARSE HLL to
     * random values by adding random values. Does not induce promotion.
     *
     * Format: cumulative add
     * Tests:
     * - SPARSE addition (random)
     */
    private static void sparseRandomTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_random", TestType.ADD);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.SPARSE);
        initLineAdd(output, hll, schemaVersion);

        for(int i=0; i<SPARSE_THRESHOLD; i++) {
            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);

            cumulativeAddLine(output, hll, rawValue, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Cumulatively sets the first register (index 0) to value 2, the last
     * register (index m-1) to value 2, and then sets registers with indices in
     * the range 2 to (sparseCutoff + 2) to value 1 to trigger promotion.
     *
     * This tests for register alignment in the promotion from SPARSE
     * to FULL.
     *
     * Format: cumulative add
     * Tests:
     * - SPARSE addition
     * - SPARSE to FULL promotion
     */
    private static void sparseEdgeTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_edge", TestType.ADD);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.SPARSE);
        initLineAdd(output, hll, schemaVersion);

        final long firstValue = constructHLLValue(LOG2M, 0, 2);
        cumulativeAddLine(output, hll, firstValue, schemaVersion);

        final long lastValue = constructHLLValue(LOG2M, (1 << LOG2M) - 1, 2);
        cumulativeAddLine(output, hll, lastValue, schemaVersion);

        for(int i=2; i<(SPARSE_THRESHOLD + 2); i++) {
            final long middleValue = constructHLLValue(LOG2M, i, 1);

            cumulativeAddLine(output, hll, middleValue, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with EXPLICIT HLLs, each containing a
     * single random value.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U EXPLICIT
     * - EXPLICIT U EXPLICIT
     * - EXPLICIT to SPARSE promotion
     * - SPARSE U EXPLICIT
     */
    private static void explicitPromotionTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "explicit_promotion", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);
        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);

        for(int i=0; i<(EXPLICIT_THRESHOLD+500)/*should be greater than promotion cutoff*/; i++) {
            // make an EXPLICIT set and populate with cardinality 1
            final HLL explicitHLL = newHLL(HLLType.EXPLICIT);
            explicitHLL.addRaw(random.nextLong());

            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with SPARSE HLLs, each
     * having one register set.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U SPARSE
     * - SPARSE U SPARSE
     * - SPARSE promotion
     * - SPARSE U FULL
     */
    private static void sparseProbabilisticPromotionTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_promotion", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);
        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);


        for(int i=0; i<(SPARSE_THRESHOLD + 1000)/*should be greater than promotion cutoff*/; i++) {
            // make a SPARSE set and populate with cardinality 1
            final HLL sparseHLL = newHLL(HLLType.SPARSE);

            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);
            sparseHLL.addRaw(rawValue);

            cumulativeUnionLine(output, hll, sparseHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with EXPLICIT HLLs, each having a single
     * random value, twice in a row to verify that the set properties are
     * satisfied.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U EXPLICIT
     * - EXPLICIT U EXPLICIT
     */
    private static void explicitOverlapTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "explicit_explicit", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);

        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);

        for(int i=0; i<EXPLICIT_THRESHOLD; i++) {
            // make an EXPLICIT set and populate with cardinality 1
            final HLL explicitHLL = newHLL(HLLType.EXPLICIT);
            explicitHLL.addRaw(random.nextLong());

            // union it into the accumulator twice, to test overlap (cardinality should not change)
            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with SPARSE HLLs, each
     * having a single register set, twice in a row to verify that the set
     * properties are satisfied.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U SPARSE
     * - SPARSE U SPARSE
     */
    private static void sparseProbabilisticOverlapTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "sparse_sparse", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);

        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);

        for(int i=0; i<SPARSE_THRESHOLD; i++) {
            // make a SPARSE set and populate with cardinality 1
            final HLL sparseHLL = newHLL(HLLType.SPARSE);
            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);
            sparseHLL.addRaw(rawValue);

            cumulativeUnionLine(output, hll, sparseHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with FULL HLLs, each having
     * many registers set, twice in a row to verify that the set properties are
     * satisfied.
     *
     * Format: cumulative union
     * Tests:
     * - EMPTY U FULL
     * - FULL U FULL
     */
    private static void probabilisticUnionTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "probabilistic_probabilistic", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);
        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);

        for(int i=0; i<1000/*number of rows to generate*/; i++) {
            // make a FULL set and populate with
            final HLL fullHLL = newHLL(HLLType.FULL);
            final int elementCount = random.nextInt(10000/*arbitrary maximum cardinality*/);
            for(int j=0;j<elementCount;j++) {
                fullHLL.addRaw(random.nextLong());
            }

            cumulativeUnionLine(output, hll, fullHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    /**
     * Unions an EMPTY accumulator with random HLLs.
     *
     * Format: cumulative union
     * Tests:
     * - hopefully all union possibilities
     */
    private static void globalUnionTest(final ISchemaVersion schemaVersion) throws IOException {
        final FileWriter output = openOutput(schemaVersion, "comprehensive", TestType.UNION);

        final Random random = new Random(SEED);

        // the accumulator, starts empty
        final HLL hll = newHLL(HLLType.EMPTY);
        final HLL emptyHLL = newHLL(HLLType.EMPTY);

        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);

        for(int i=0; i<1000/*number of rows to generate*/; i++) {
            final HLL randomHLL = generateRandomHLL(random);
            cumulativeUnionLine(output, hll, randomHLL, schemaVersion);
        }

        output.flush();
        output.close();
    }

    // ========================================================================
    // Main
    public static void fullSuite(final ISchemaVersion schemaVersion) throws IOException {
        fullCardinalityCorrectionTest(schemaVersion);
        globalUnionTest(schemaVersion);
        globalStepTest(schemaVersion);
        probabilisticUnionTest(schemaVersion);
        explicitPromotionTest(schemaVersion);
        explicitOverlapTest(schemaVersion);
        sparseFullRepresentationTest(schemaVersion);
        sparseStepTest(schemaVersion);
        sparseRandomTest(schemaVersion);
        sparseEdgeTest(schemaVersion);
        sparseProbabilisticPromotionTest(schemaVersion);
        sparseProbabilisticOverlapTest(schemaVersion);
    }

    public static void main(String[] args) throws IOException {
        fullSuite(SerializationUtil.VERSION_ONE);
    }

    // ************************************************************************
    // Helpers
    /**
     * Shortcut for testing constructor, which uses the constants defined at
     * the top of the file as default parameters.
     *
     * @return a new {@link HLL} of specified type, which uses the parameters
     *         ({@link #LOG2M}, {@link #REGWIDTH}, {@link #EXPLICIT_THRESHOLD},
     *         and {@link #SPARSE_THRESHOLD}) specified above.
     */
    private static HLL newHLL(final HLLType type) {
        return newHLL(type);
    }

    /**
     * Returns the algorithm-specific cardinality of the specified {@link HLL}
     * as a {@link String} appropriate for comparison with the algorithm-specific
     * cardinality provided by the PostgreSQL implementation.
     *
     * @param  hll the HLL whose algorithm-specific cardinality is to be printed.
     *         This cannot be <code>null</code>.
     * @return the algorithm-specific cardinality of the instance as a PostgreSQL-
     *         compatible String. This will never be <code>null</code>
     */
    private static String stringCardinality(final HLL hll) {
        switch(hll.getType()) {
            case EMPTY:
                return "0";
            case EXPLICIT:/*promotion has not yet occurred*/
                return Long.toString(hll.cardinality());
            case SPARSE:
                return Double.toString(hll.sparseProbabilisticAlgorithmCardinality());
            case FULL:
                return Double.toString(hll.fullProbabilisticAlgorithmCardinality());
            default:
                throw new RuntimeException("Unknown HLL type " + hll.getType());
        }
    }

    /**
     * Generates a random HLL and populates it with random values.
     *
     * @param  random the {@link Random random number generator} used to populate
     *         the HLL. This cannot be <code>null</code>.
     * @return the populated HLL. This will never be <code>null</code>.
     */
    public static HLL generateRandomHLL(final Random random) {
        final int randomTypeInt = random.nextInt(HLLType.values().length);
        final HLLType type;
        switch(randomTypeInt) {
            case 0:
                type = HLLType.EMPTY;
                break;
            case 1:
                type = HLLType.EXPLICIT;
                break;
            case 2:
                type = HLLType.FULL;
                break;
            case 3:
                type = HLLType.EMPTY;
                break;
            case 4:
                type = HLLType.SPARSE;
                break;
            default:
                throw new RuntimeException("Unassigned type int " + randomTypeInt);
        }

        final int cardinalityCap;
        final int cardinalityBaseline;

        switch(type) {
            case EMPTY:
                return newHLL(HLLType.EMPTY);
            case EXPLICIT:
                cardinalityCap = EXPLICIT_THRESHOLD;
                cardinalityBaseline = 1;
                break;
            case SPARSE:
                cardinalityCap = SPARSE_THRESHOLD;
                cardinalityBaseline = (EXPLICIT_THRESHOLD + 1);
                break;
            case FULL:
                cardinalityCap = 100000;
                cardinalityBaseline = (SPARSE_THRESHOLD*10);
                break;
            default:
                throw new RuntimeException("We should never be here.");
        }

        final HLL hll = newHLL(HLLType.EMPTY);
        for(int i=0; i<cardinalityBaseline; i++) {
            hll.addRaw(random.nextLong());
        }
        for(int i=0; i<random.nextInt(cardinalityCap - cardinalityBaseline); i++) {
            hll.addRaw(random.nextLong());
        }

        return hll;
    }

    /**
     * Opens a {@link FileWriter} and writes out an appropriate CSV header.
     *
     * @param  schemaVersion Schema version of the output. This cannot be
     *         <code>null</code>.
     * @param  description Description string used to build the filename.
     *         This cannot be <code>null</code>.
     * @param  type {@link TestType type} of the test file to be written.
     *         This cannot be <code>null</code>.
     * @return The opened {@link FileWriter writer}. This will never be <code>null</code>.
     */
    private static FileWriter openOutput(final ISchemaVersion schemaVersion, final String description, final TestType type) throws IOException {
        final String schemaVersionPrefix = "v"+ schemaVersion.schemaVersionNumber() + "_";
        final String header;
        final String filename;
        switch(type) {
            case ADD:
                header = "cardinality,raw_value,HLL\n";
                filename = schemaVersionPrefix + "cumulative_add_" + description + ".csv";
                break;
            case UNION:
                header = "cardinality,HLL,union_cardinality,union_HLL\n";
                filename = schemaVersionPrefix + "cumulative_union_" + description + ".csv";
                break;
            default:
                throw new RuntimeException("Unknown test type " + type);
        }

        final FileWriter output = new FileWriter(OUTPUT_DIRECTORY + filename);
        output.write(header);
        output.flush();
        return output;
    }

    /**
     * Writes out a {@link TestType#ADD}-formatted test line.
     *
     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
     * @param  rawValue The raw value added to the HLL.
     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
     *         be <code>null</code>.
     */
    private static void cumulativeAddLine(final FileWriter output, final HLL hll, final long rawValue, final ISchemaVersion schemaVersion) throws IOException {
        hll.addRaw(rawValue);
        final String accumulatorCardinality = stringCardinality(hll);

        output.write(accumulatorCardinality + "," + rawValue + "," + toByteA(hll, schemaVersion) + "\n");
        output.flush();
    }

    /**
     * Writes an initial line for a {@link TestType#ADD}-formatted test.
     *
     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
     * @param  rawValue The raw value added to the HLL.
     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
     *         be <code>null</code>.
     */
    private static void initLineAdd(final FileWriter output, final HLL hll, final ISchemaVersion schemaVersion) throws IOException {
        output.write(0 + "," + 0 + "," + toByteA(hll, schemaVersion) + "\n");
        output.flush();
    }

    /**
     * Writes out a {@link TestType#UNION}-formatted test line.
     *
     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
     * @param  increment The "increment" HLL instance which will be unioned into
     *         the accumulator. This cannot be <code>null</code>.
     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
     *         be <code>null</code>.
     */
    private static void cumulativeUnionLine(final FileWriter output, final HLL hll, final HLL increment, final ISchemaVersion schemaVersion) throws IOException {
        hll.union(increment);

        final String incrementCardinality = stringCardinality(increment);
        final String accumulatorCardinality = stringCardinality(hll);
        output.write(incrementCardinality + "," + toByteA(increment, schemaVersion) + "," + accumulatorCardinality + "," + toByteA(hll, schemaVersion) + "\n");
        output.flush();
    }

    /**
     * Serializes a HLL to Postgres 9 'bytea' hex-format, for CSV ingest.
     *
     * @param  hll the HLL to serialize. This cannot be <code>null</code>.
     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
     *         be <code>null</code>.
     * @return a PostgreSQL 'bytea' string representing the HLL.
     */
    private static String toByteA(final HLL hll, final ISchemaVersion schemaVersion) {
        final byte[] bytes = hll.toBytes(schemaVersion);
        return ("\\x" + NumberUtil.toHex(bytes, 0, bytes.length));
    }

    /**
     * Indicates what kind of test output a test will generate.
     */
    private static enum TestType {
        /**
         * This type of test is characterized by values being added to an
         * accumulator HLL whose serialized representation (after the value is added)
         * is printed to each line along with the cardinality and added value.
         */
        ADD,
        /**
         * This type of test is characterized by HLLs being unioned into an
         * accumulator HLL whose serialized representation (after the HLL is
         * union'd) is printed to each line along with the cardinalities and the
         * serialized representation of the HLL union'd in.
         */
        UNION;
    }
}
TOP

Related Classes of net.agkn.hll.IntegrationTestGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.