Package io.druid.query.aggregation.histogram

Source Code of io.druid.query.aggregation.histogram.ApproximateHistogramErrorBenchmark

/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013  Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package io.druid.query.aggregation.histogram;

import com.google.common.primitives.Floats;
import io.druid.query.aggregation.Histogram;

import java.util.Arrays;
import java.util.Random;

public class ApproximateHistogramErrorBenchmark
{
  private boolean debug = true;
  private int numBuckets = 20;
  private int numBreaks = numBuckets + 1;
  private int numPerHist = 50;
  private int numHists = 10;
  private int resolution = 50;
  private int combinedResolution = 100;
  private Random rand = new Random(2);

  public ApproximateHistogramErrorBenchmark setDebug(boolean debug)
  {
    this.debug = debug;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setNumBuckets(int numBuckets)
  {
    this.numBuckets = numBuckets;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setNumBreaks(int numBreaks)
  {
    this.numBreaks = numBreaks;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setNumPerHist(int numPerHist)
  {
    this.numPerHist = numPerHist;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setNumHists(int numHists)
  {
    this.numHists = numHists;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setResolution(int resolution)
  {
    this.resolution = resolution;
    return this;
  }

  public ApproximateHistogramErrorBenchmark setCombinedResolution(int combinedResolution)
  {
    this.combinedResolution = combinedResolution;
    return this;
  }


  public static void main(String[] args)
  {
    ApproximateHistogramErrorBenchmark approxHist = new ApproximateHistogramErrorBenchmark();
    System.out.println(
        Arrays.toString(
            approxHist.setDebug(true)
                      .setNumPerHist(50)
                      .setNumHists(10000)
                      .setResolution(50)
                      .setCombinedResolution(100)
                      .getErrors()
        )
    );


    ApproximateHistogramErrorBenchmark approxHist2 = new ApproximateHistogramErrorBenchmark();
    int[] numHistsArray = new int[]{10, 100, 1000, 10000, 100000};
    float[] errs1 = new float[numHistsArray.length];
    float[] errs2 = new float[numHistsArray.length];
    for (int i = 0; i < numHistsArray.length; ++i) {
      float[] tmp = approxHist2.setDebug(false).setNumHists(numHistsArray[i]).setCombinedResolution(100).getErrors();
      errs1[i] = tmp[0];
      errs2[i] = tmp[1];
    }

    System.out
          .format("Number of histograms for folding                           : %s \n", Arrays.toString(numHistsArray));
    System.out.format("Errors for approximate histogram                           : %s \n", Arrays.toString(errs1));
    System.out.format("Errors for approximate histogram, ruleFold                 : %s \n", Arrays.toString(errs2));
  }

  private float[] getErrors()
  {
    final int numValues = numHists * numPerHist;
    final float[] values = new float[numValues];

    for (int i = 0; i < numValues; ++i) {
      values[i] = (float) rand.nextGaussian();
    }

    float min = Floats.min(values);
    min = (float) (min < 0 ? 1.02 : .98) * min;
    float max = Floats.max(values);
    max = (float) (max < 0 ? .98 : 1.02) * max;
    final float stride = (max - min) / numBuckets;
    final float[] breaks = new float[numBreaks];
    for (int i = 0; i < numBreaks; i++) {
      breaks[i] = min + stride * i;
    }

    Histogram h = new Histogram(breaks);
    for (float v : values) {
      h.offer(v);
    }
    double[] hcounts = h.asVisual().counts;

    ApproximateHistogram ah1 = new ApproximateHistogram(resolution);
    ApproximateHistogram ah2 = new ApproximateHistogram(combinedResolution);
    ApproximateHistogram tmp = new ApproximateHistogram(resolution);
    for (int i = 0; i < numValues; ++i) {
      tmp.offer(values[i]);
      if ((i + 1) % numPerHist == 0) {
        ah1.fold(tmp);
        ah2.foldRule(tmp, null, null);
        tmp = new ApproximateHistogram(resolution);
      }
    }
    double[] ahcounts1 = ah1.toHistogram(breaks).getCounts();
    double[] ahcounts2 = ah2.toHistogram(breaks).getCounts();

    float err1 = 0;
    float err2 = 0;
    for (int j = 0; j < hcounts.length; j++) {
      err1 += Math.abs((hcounts[j] - ahcounts1[j]) / numValues);
      err2 += Math.abs((hcounts[j] - ahcounts2[j]) / numValues);
    }

    if (debug) {
      float sum = 0;
      for (double v : hcounts) {
        sum += v;
      }
      System.out.println("Exact Histogram Sum:");
      System.out.println(sum);
      sum = 0;
      for (double v : ahcounts1) {
        sum += v;
      }
      System.out.println("Approximate Histogram Sum:");
      System.out.println(sum);
      sum = 0;
      for (double v : ahcounts2) {
        sum += v;
      }
      System.out.println("Approximate Histogram Rule Fold Sum:");
      System.out.println(sum);
      System.out.println("Exact Histogram:");
      System.out.println(h.asVisual());
      System.out.println("Approximate Histogram:");
      System.out.println(ah1.toHistogram(breaks));
      System.out.println("Approximate Histogram Rule Fold:");
      System.out.println(ah2.toHistogram(breaks));
      System.out.format("Error for approximate histogram: %s \n", err1);
      System.out.format("Error for approximate histogram, ruleFold: %s \n", err2);
      System.out.format("Error ratio for AHRF: %s \n", err2 / err1);
    }
    return new float[]{err1, err2, err2 / err1};
  }

}
TOP

Related Classes of io.druid.query.aggregation.histogram.ApproximateHistogramErrorBenchmark

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.