Package io.druid.query.aggregation.cardinality

Source Code of io.druid.query.aggregation.cardinality.CardinalityAggregator

/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013, 2014  Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package io.druid.query.aggregation.cardinality;

import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import io.druid.query.aggregation.Aggregator;
import io.druid.query.aggregation.hyperloglog.HyperLogLogCollector;
import io.druid.segment.DimensionSelector;
import io.druid.segment.data.IndexedInts;

import java.util.Arrays;
import java.util.List;

public class CardinalityAggregator implements Aggregator
{
  private static final String NULL_STRING = "\u0000";

  private final String name;
  private final List<DimensionSelector> selectorList;
  private final boolean byRow;

  private static final HashFunction hashFn = Hashing.murmur3_128();
  public static final char SEPARATOR = '\u0001';

  protected static void hashRow(List<DimensionSelector> selectorList, HyperLogLogCollector collector)
  {
    final Hasher hasher = hashFn.newHasher();
    for (int k = 0; k < selectorList.size(); ++k) {
      if (k != 0) {
        hasher.putByte((byte) 0);
      }
      final DimensionSelector selector = selectorList.get(k);
      final IndexedInts row = selector.getRow();
      final int size = row.size();
      // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases.
      if (size == 1) {
        final String value = selector.lookupName(row.get(0));
        hasher.putUnencodedChars(value != null ? value : NULL_STRING);
      } else if (size != 0) {
        final String[] values = new String[size];
        for (int i = 0; i < size; ++i) {
          final String value = selector.lookupName(row.get(i));
          values[i] = value != null ? value : NULL_STRING;
        }
        // Values need to be sorted to ensure consistent multi-value ordering across different segments
        Arrays.sort(values);
        for (int i = 0; i < size; ++i) {
          if (i != 0) {
            hasher.putChar(SEPARATOR);
          }
          hasher.putUnencodedChars(values[i]);
        }
      }
    }
    collector.add(hasher.hash().asBytes());
  }

  protected static void hashValues(final List<DimensionSelector> selectors, HyperLogLogCollector collector)
  {
    for (final DimensionSelector selector : selectors) {
      for (final Integer index : selector.getRow()) {
        final String value = selector.lookupName(index);
        collector.add(hashFn.hashUnencodedChars(value == null ? NULL_STRING : value).asBytes());
      }
    }
  }

  private HyperLogLogCollector collector;

  public CardinalityAggregator(
      String name,
      List<DimensionSelector> selectorList,
      boolean byRow
  )
  {
    this.name = name;
    this.selectorList = selectorList;
    this.collector = HyperLogLogCollector.makeLatestCollector();
    this.byRow = byRow;
  }

  @Override
  public void aggregate()
  {
    if (byRow) {
      hashRow(selectorList, collector);
    } else {
      hashValues(selectorList, collector);
    }
  }

  @Override
  public void reset()
  {
    collector = HyperLogLogCollector.makeLatestCollector();
  }

  @Override
  public Object get()
  {
    return collector;
  }

  @Override
  public float getFloat()
  {
    throw new UnsupportedOperationException("CardinalityAggregator does not support getFloat()");
  }

  @Override
  public String getName()
  {
    return name;
  }

  @Override
  public Aggregator clone()
  {
    return new CardinalityAggregator(name, selectorList, byRow);
  }

  @Override
  public void close()
  {
    // no resources to cleanup
  }
}
TOP

Related Classes of io.druid.query.aggregation.cardinality.CardinalityAggregator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.