Package com.clearspring.analytics.stream.cardinality

Examples of com.clearspring.analytics.stream.cardinality.HyperLogLog


    }

    @Override
    public void prepare(FlowProcess flowProcess, OperationCall operationCall) {
      JobConf conf = (JobConf) flowProcess.getConfigCopy();
      approxCounter = new HyperLogLog(BloomProps.getHllErr(conf));
      sampleRate = BloomProps.getKeySampleRate(conf);
      tupleSerializationUtil = new TupleSerializationUtil((JobConf) flowProcess.getConfigCopy());
    }
View Full Code Here


    List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();

    long totalSum = 0;
    while (in.hasNext()) {
      TupleEntry tuple = in.next();
      HyperLogLog card = HyperLogLog.Builder.build(Bytes.getBytes((BytesWritable) tuple.getObject("bytes")));
      countParts.add(card);
      totalSum += card.cardinality();
    }

    HyperLogLog merged = (HyperLogLog) new HyperLogLog(BloomProps.getHllErr(conf)).merge(countParts.toArray(new ICardinality[countParts.size()]));
    long cardinality = merged.cardinality();

    //  HLL estimation doesn't work over 2^32, and the cardinality code just returns 0.
    //  Honestly if you get this high, your bloom filter is probably saturated anyway, so just return that max.
    if (cardinality == 0 && totalSum != 0) {
      LOG.info("HyperLogLog likely reached its max estimation of 2^32! Returning that max, but true count likely higher.");
View Full Code Here

TOP

Related Classes of com.clearspring.analytics.stream.cardinality.HyperLogLog

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.