Package com.facebook.presto.operator.aggregation

Source Code of com.facebook.presto.operator.aggregation.ApproximateSetAggregation$ApproximateSetGroupedAccumulator

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation;

import com.facebook.presto.operator.GroupByIdBlock;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.block.BlockCursor;
import com.facebook.presto.spi.type.HyperLogLogType;
import com.facebook.presto.spi.type.Type;
import com.facebook.presto.util.array.ObjectBigArray;
import com.google.common.base.Optional;
import io.airlift.stats.cardinality.HyperLogLog;

import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.DoubleType.DOUBLE;
import static com.facebook.presto.spi.type.HyperLogLogType.HYPER_LOG_LOG;
import static com.facebook.presto.spi.type.VarcharType.VARCHAR;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;

public class ApproximateSetAggregation
        extends SimpleAggregationFunction
{
    private static final int NUMBER_OF_BUCKETS = 4096;
    private final Type parameterType;

    public ApproximateSetAggregation(Type parameterType)
    {
        super(HYPER_LOG_LOG, HYPER_LOG_LOG, parameterType);

        checkArgument(parameterType.equals(BIGINT) || parameterType.equals(DOUBLE) || parameterType.equals(VARCHAR),
                "Expected parameter type to be BIGINT, DOUBLE, or VARCHAR, but was %s",
                parameterType);

        this.parameterType = parameterType;
    }

    @Override
    protected GroupedAccumulator createGroupedAccumulator(Optional<Integer> maskChannel, Optional<Integer> sampleWeightChannel, double confidence, int valueChannel)
    {
        checkArgument(confidence == 1.0, "confidence level must be 1.0");
        return new ApproximateSetGroupedAccumulator(parameterType, valueChannel, maskChannel, sampleWeightChannel);
    }

    @Override
    protected Accumulator createAccumulator(Optional<Integer> maskChannel, Optional<Integer> sampleWeightChannel, double confidence, int valueChannel)
    {
        checkArgument(confidence == 1.0, "confidence level must be 1.0");
        return new ApproximateSetAccumulator(parameterType, valueChannel, maskChannel);
    }

    static class ApproximateSetAccumulator
            extends SimpleAccumulator
    {
        private final Type parameterType;

        private HyperLogLog estimator;

        public ApproximateSetAccumulator(Type parameterType, int valueChannel, Optional<Integer> maskChannel)
        {
            super(valueChannel, HyperLogLogType.HYPER_LOG_LOG, HyperLogLogType.HYPER_LOG_LOG, maskChannel, Optional.<Integer>absent());
            this.parameterType = parameterType;
        }

        @Override
        protected void processInput(Block block, Optional<Block> maskBlock, Optional<Block> sampleWeightBlock)
        {
            BlockCursor values = block.cursor();
            BlockCursor masks = null;
            if (maskBlock.isPresent()) {
                masks = maskBlock.get().cursor();
            }

            for (int position = 0; position < block.getPositionCount(); position++) {
                checkState(values.advanceNextPosition());
                checkState(masks == null || masks.advanceNextPosition());
                if (!values.isNull() && (masks == null || masks.getBoolean())) {
                    if (estimator == null) {
                        estimator = HyperLogLog.newInstance(NUMBER_OF_BUCKETS);
                    }

                    add(values, parameterType, estimator);
                }
            }
        }

        @Override
        protected void processIntermediate(Block block)
        {
            BlockCursor intermediates = block.cursor();

            for (int position = 0; position < block.getPositionCount(); position++) {
                checkState(intermediates.advanceNextPosition());
                if (!intermediates.isNull()) {
                    HyperLogLog instance = HyperLogLog.newInstance(intermediates.getSlice());

                    if (estimator == null) {
                        estimator = instance;
                    }
                    else {
                        estimator.mergeWith(instance);
                    }
                }
            }
        }

        @Override
        public void evaluateIntermediate(BlockBuilder out)
        {
            evaluateFinal(out);
        }

        @Override
        public void evaluateFinal(BlockBuilder out)
        {
            if (estimator == null) {
                out.appendNull();
            }
            else {
                out.appendSlice(estimator.serialize());
            }
        }
    }

    private static void add(BlockCursor cursor, Type parameterType, HyperLogLog estimator)
    {
        if (parameterType.equals(BIGINT)) {
            estimator.add(cursor.getLong());
        }
        else if (parameterType.equals(DOUBLE)) {
            estimator.add(Double.doubleToLongBits(cursor.getDouble()));
        }
        else if (parameterType.equals(VARCHAR)) {
            estimator.add(cursor.getSlice());
        }
        else {
            throw new IllegalArgumentException("Expected parameter type to be BIGINT, DOUBLE, or VARCHAR");
        }
    }

    static class ApproximateSetGroupedAccumulator
            extends SimpleGroupedAccumulator
    {
        private final ObjectBigArray<HyperLogLog> estimators = new ObjectBigArray<>();
        private final Type parameterType;
        private long sizeOfValues;

        public ApproximateSetGroupedAccumulator(Type parameterType, int valueChannel, Optional<Integer> maskChannel, Optional<Integer> sampleWeightChannel)
        {
            super(valueChannel, HYPER_LOG_LOG, HYPER_LOG_LOG, maskChannel, sampleWeightChannel);
            this.parameterType = parameterType;
        }

        @Override
        public long getEstimatedSize()
        {
            return sizeOfValues;
        }

        @Override
        protected void processInput(GroupByIdBlock groupIdsBlock, Block valuesBlock, Optional<Block> maskBlock, Optional<Block> sampleWeightBlock)
        {
            estimators.ensureCapacity(groupIdsBlock.getGroupCount());

            BlockCursor values = valuesBlock.cursor();
            BlockCursor masks = null;
            if (maskBlock.isPresent()) {
                masks = maskBlock.get().cursor();
            }

            for (int position = 0; position < groupIdsBlock.getPositionCount(); position++) {
                checkState(values.advanceNextPosition());
                checkState(masks == null || masks.advanceNextPosition());

                // skip null values
                if (!values.isNull() && (masks == null || masks.getBoolean())) {
                    long groupId = groupIdsBlock.getGroupId(position);

                    HyperLogLog hll = estimators.get(groupId);
                    if (hll == null) {
                        hll = HyperLogLog.newInstance(NUMBER_OF_BUCKETS);
                        estimators.set(groupId, hll);
                        sizeOfValues += hll.estimatedInMemorySize();
                    }

                    sizeOfValues -= hll.estimatedInMemorySize();
                    add(values, parameterType, hll);
                    sizeOfValues += hll.estimatedInMemorySize();
                }
            }
            checkState(!values.advanceNextPosition(), "group id and value blocks have different number of entries");
        }

        @Override
        protected void processIntermediate(GroupByIdBlock groupIdsBlock, Block valuesBlock)
        {
            estimators.ensureCapacity(groupIdsBlock.getGroupCount());

            BlockCursor intermediates = valuesBlock.cursor();

            for (int position = 0; position < groupIdsBlock.getPositionCount(); position++) {
                checkState(intermediates.advanceNextPosition());

                // skip null values
                if (!intermediates.isNull()) {
                    long groupId = groupIdsBlock.getGroupId(position);

                    HyperLogLog input = HyperLogLog.newInstance(intermediates.getSlice());

                    HyperLogLog previous = estimators.get(groupId);
                    if (previous == null) {
                        estimators.set(groupId, input);
                        sizeOfValues += input.estimatedInMemorySize();
                    }
                    else {
                        sizeOfValues -= previous.estimatedInMemorySize();
                        previous.mergeWith(input);
                        sizeOfValues += previous.estimatedInMemorySize();
                    }
                }
            }
            checkState(!intermediates.advanceNextPosition());
        }

        @Override
        public void evaluateIntermediate(int groupId, BlockBuilder output)
        {
            evaluateFinal(groupId, output);
        }

        @Override
        public void evaluateFinal(int groupId, BlockBuilder output)
        {
            HyperLogLog estimator = estimators.get(groupId);
            if (estimator == null) {
                output.appendNull();
            }
            else {
                output.appendSlice(estimator.serialize());
            }
        }
    }
}
TOP

Related Classes of com.facebook.presto.operator.aggregation.ApproximateSetAggregation$ApproximateSetGroupedAccumulator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.