Package com.liveramp.cascading_ext.combiner.lib

Source Code of com.liveramp.cascading_ext.combiner.lib.HyperLogLogExactAggregator

/**
*  Copyright 2012 LiveRamp
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package com.liveramp.cascading_ext.combiner.lib;

import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import com.clearspring.analytics.stream.cardinality.CardinalityMergeException;
import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus;
import com.clearspring.analytics.stream.cardinality.ICardinality;
import com.liveramp.cascading_ext.Bytes;
import com.liveramp.cascading_ext.combiner.ExactAggregator;
import org.apache.hadoop.io.BytesWritable;

import java.io.IOException;

public class HyperLogLogExactAggregator implements ExactAggregator<ICardinality> {
 
  public static enum OutputOption {CARDINALITY_AND_HLL_OBJECT, CARDINALITY, HLL_OBJECT};
  public static final int DEFAULT_PRECISION = 16;
  public static final OutputOption DEFAULT_OUTPUT_OPTION = OutputOption.CARDINALITY;
 
  private final int precision;
  private OutputOption outputOption;
 
  public HyperLogLogExactAggregator() {
    this(DEFAULT_PRECISION, DEFAULT_OUTPUT_OPTION);
  }
 
  public HyperLogLogExactAggregator(int precision) {
    this(precision, DEFAULT_OUTPUT_OPTION);
  }
 
  public HyperLogLogExactAggregator(OutputOption outputOption) {
    this(DEFAULT_PRECISION, outputOption);
  }
 
  public HyperLogLogExactAggregator(int precision, OutputOption outputOption) {
    this.precision = precision;
    this.outputOption = outputOption;
  }

  @Override
  public ICardinality initialize() {
    return new HyperLogLogPlus(precision);
  }

  @Override
  public ICardinality partialAggregate(ICardinality aggregate, TupleEntry nextValue) {
    aggregate.offer(nextValue.getTuple());
    return aggregate;
  }

  @Override
  public Tuple toPartialTuple(ICardinality aggregate) {
    try {
      return new Tuple(new BytesWritable(aggregate.getBytes()));
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Override
  public ICardinality finalAggregate(ICardinality aggregate, TupleEntry partialAggregate) {
    try {
      ICardinality hll = HyperLogLogPlus.Builder.build(Bytes.getBytes((BytesWritable) partialAggregate.getObject(0)));
      aggregate.merge(hll);
      return aggregate;
    } catch (CardinalityMergeException cme) {
      throw new RuntimeException(cme);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  @Override
  public Tuple toFinalTuple(ICardinality aggregate) {
    try {
      // Choose what tuple to return according to the outputOption
      switch (outputOption) {
      case CARDINALITY_AND_HLL_OBJECT:
        return new Tuple(new BytesWritable(aggregate.getBytes()), aggregate.cardinality());
       
      case HLL_OBJECT:
        return new Tuple(new BytesWritable(aggregate.getBytes()));
       
      case CARDINALITY:
        return new Tuple(aggregate.cardinality());
       
      default:
        throw new IllegalArgumentException("Invalid value for OutputOption: " + outputOption.toString());
      }
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
}
TOP

Related Classes of com.liveramp.cascading_ext.combiner.lib.HyperLogLogExactAggregator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.