Package de.lmu.ifi.dbs.elki.math.histograms

Source Code of de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram$Adapter

package de.lmu.ifi.dbs.elki.math.histograms;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.ArrayList;
import java.util.Iterator;

import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;

/**
* Histogram with flexible size, guaranteed to be in [bin, 2*bin[
*
* @author Erich Schubert
*
* @apiviz.composedOf de.lmu.ifi.dbs.elki.math.FlexiHistogram.Adapter
*
* @param <T> Type of data in histogram
* @param <D> Type of input data
*/
public class FlexiHistogram<T, D> extends AggregatingHistogram<T, D> {
  /**
   * Adapter class, extended "maker".
   */
  private Adapter<T, D> downsampler;

  /**
   * Cache for elements when not yet initialized.
   */
  private ArrayList<DoubleObjPair<D>> tempcache = null;

  /**
   * Destination (minimum) size of the structure. At most 2*destsize bins are
   * allowed.
   */
  private int destsize;

  /**
   * Adapter interface to specify bin creation, data caching and combination.
   *
   * @author Erich Schubert
   *
   * @param <T> Type of data in histogram
   * @param <D> Type of input data
   */
  public static abstract class Adapter<T, D> extends AggregatingHistogram.Adapter<T, D> {
    /**
     * Rule to combine two bins into one.
     *
     * first and second MAY be modified and returned.
     *
     * @param first First bin value
     * @param second Second bin value
     * @return combined bin value
     */
    public abstract T downsample(T first, T second);

    /**
     * Clone a data passed to the algorithm for computing the initial size.
     *
     * @param data Data to be cloned
     * @return cloned data
     */
    public abstract D cloneForCache(D data);
  }

  /**
   * Create a new histogram for an unknown data range.
   *
   * The generated histogram is guaranteed to have within {@code bins} and
   * {@code 2*bins} bins in length.
   *
   * @param bins Target number of bins
   * @param adapter Adapter for data types and combination rules.
   */
  public FlexiHistogram(int bins, Adapter<T, D> adapter) {
    super(bins, 0.0, 1.0, adapter);
    this.destsize = bins;
    this.downsampler = adapter;
    tempcache = new ArrayList<DoubleObjPair<D>>(this.destsize * 2);
  }

  private synchronized void materialize() {
    // already materialized?
    if(tempcache == null) {
      return;
    }
    // we can't really initialize, but since we have to, we'll just stick
    // to 0.0 and 1.0 as used in the constructor.
    if(tempcache.size() <= 0) {
      tempcache = null;
      return;
    }
    double min = Double.MAX_VALUE;
    double max = Double.MIN_VALUE;
    for(DoubleObjPair<D> pair : tempcache) {
      min = Math.min(min, pair.first);
      max = Math.max(max, pair.first);
    }
    // use the LinearScale magic to round to "likely suiteable" step sizes.
    LinearScale scale = new LinearScale(min, max);
    min = scale.getMin();
    max = scale.getMax();
    this.base = min;
    this.max = max;
    this.binsize = (max - min) / this.destsize;
    // initialize array
    this.data = new ArrayList<T>(this.destsize * 2);
    for(int i = 0; i < this.destsize; i++) {
      this.data.add(downsampler.make());
    }
    // re-insert data we have
    for(DoubleObjPair<D> pair : tempcache) {
      super.aggregate(pair.first, pair.second);
    }
    // delete cache, signal that we're initialized
    tempcache = null;
  }

  @Override
  public synchronized void replace(double coord, T d) {
    materialize();
    // super class put will already handle histogram resizing
    super.replace(coord, d);
    // but not resampling
    testResample();
  }

  private void testResample() {
    while(super.size >= 2 * this.destsize) {
      // Resampling.
      ArrayList<T> newdata = new ArrayList<T>(this.destsize * 2);
      for(int i = 0; i < super.size; i += 2) {
        if(i + 1 < super.size) {
          newdata.add(downsampler.downsample(super.data.get(i), super.data.get(i + 1)));
        }
        else {
          newdata.add(downsampler.downsample(super.data.get(i), super.make()));
        }
      }
      // recalculate histogram base.
      double base = super.base - super.offset * super.binsize;
      // update data
      super.data = newdata;
      // update sizes
      super.base = base;
      super.offset = 0;
      super.size = newdata.size();
      super.binsize = super.binsize * 2;
      super.max = super.base + super.binsize * super.size;
    }
  }

  @Override
  public T get(double coord) {
    materialize();
    return super.get(coord);
  }

  @Override
  public double getBinsize() {
    materialize();
    return super.getBinsize();
  }

  @Override
  public double getCoverMaximum() {
    materialize();
    return super.getCoverMaximum();
  }

  @Override
  public double getCoverMinimum() {
    materialize();
    return super.getCoverMinimum();
  }

  @Override
  public ArrayList<T> getData() {
    materialize();
    return super.getData();
  }

  @Override
  public int getNumBins() {
    materialize();
    return super.getNumBins();
  }

  @Override
  public double getBinMean(int bin) {
    materialize();
    return super.getBinMean(bin);
  }

  @Override
  public double getBinMin(int bin) {
    materialize();
    return super.getBinMin(bin);
  }

  @Override
  public double getBinMax(int bin) {
    materialize();
    return super.getBinMax(bin);
  }

  @Override
  public Iterator<DoubleObjPair<T>> iterator() {
    materialize();
    return super.iterator();
  }

  @Override
  public Iterator<DoubleObjPair<T>> reverseIterator() {
    materialize();
    return super.reverseIterator();
  }

  @Override
  public void aggregate(double coord, D value) {
    if(tempcache != null) {
      if(tempcache.size() < this.destsize * 2) {
        tempcache.add(new DoubleObjPair<D>(coord, downsampler.cloneForCache(value)));
        return;
      }
      else {
        materialize();
        // .. and continue below!
      }
    }
    // super class put will already handle histogram resizing
    super.aggregate(coord, value);
    // but not resampling
    testResample();
  }

  /**
   * Convenience constructor for Integer-based Histograms. Uses a constructor to
   * initialize bins with Integer(0)
   *
   * @param bins Number of bins
   * @return New histogram for Integer.
   */
  public static FlexiHistogram<Integer, Integer> IntSumHistogram(int bins) {
    return new FlexiHistogram<Integer, Integer>(bins, new Adapter<Integer, Integer>() {
      @Override
      public Integer make() {
        return new Integer(0);
      }

      @Override
      public Integer cloneForCache(Integer data) {
        // no need to clone, Integer are singletons
        return data;
      }

      @Override
      public Integer downsample(Integer first, Integer second) {
        return first + second;
      }

      @Override
      public Integer aggregate(Integer existing, Integer data) {
        return existing + data;
      }
    });
  }

  /**
   * Convenience constructor for Long-based Histograms. Uses a constructor to
   * initialize bins with Long(0)
   *
   * @param bins Number of bins
   * @return New histogram for Long.
   */
  public static FlexiHistogram<Long, Long> LongSumHistogram(int bins) {
    return new FlexiHistogram<Long, Long>(bins, new Adapter<Long, Long>() {
      @Override
      public Long make() {
        return new Long(0);
      }

      @Override
      public Long cloneForCache(Long data) {
        // no need to clone, Long are singletons
        return data;
      }

      @Override
      public Long downsample(Long first, Long second) {
        return first + second;
      }

      @Override
      public Long aggregate(Long existing, Long data) {
        return existing + data;
      }
    });
  }

  /**
   * Convenience constructor for Double-based Histograms. Uses a constructor to
   * initialize bins with Double(0), and downsampling is done by summation.
   *
   * @param bins Number of bins
   * @return New histogram for Doubles.
   */
  public static FlexiHistogram<Double, Double> DoubleSumHistogram(int bins) {
    return new FlexiHistogram<Double, Double>(bins, new Adapter<Double, Double>() {
      @Override
      public Double make() {
        return new Double(0.0);
      }

      @Override
      public Double cloneForCache(Double data) {
        // no need to clone, Doubles are singletons
        return data;
      }

      @Override
      public Double downsample(Double first, Double second) {
        return first + second;
      }

      @Override
      public Double aggregate(Double existing, Double data) {
        return existing + data;
      }
    });
  }

  /**
   * Convenience constructor for {@link MeanVariance}-based Histograms. Uses a
   * constructor to initialize bins with new {@link MeanVariance} objects
   *
   * @param bins Number of bins
   * @return New histogram for {@link MeanVariance}.
   */
  public static FlexiHistogram<MeanVariance, Double> MeanVarianceHistogram(int bins) {
    return new FlexiHistogram<MeanVariance, Double>(bins, new Adapter<MeanVariance, Double>() {
      @Override
      public MeanVariance make() {
        return new MeanVariance();
      }

      @Override
      public Double cloneForCache(Double data) {
        return data;
      }

      @Override
      public MeanVariance downsample(MeanVariance first, MeanVariance second) {
        first.put(second);
        return first;
      }

      @Override
      public MeanVariance aggregate(MeanVariance existing, Double data) {
        existing.put(data);
        return existing;
      }
    });
  }

  /**
   * Histograms that work like two {@link #IntSumHistogram}, component wise.
   *
   * @param bins Number of bins.
   * @return New Histogram object
   */
  public static FlexiHistogram<IntIntPair, IntIntPair> IntSumIntSumHistogram(int bins) {
    return new FlexiHistogram<IntIntPair, IntIntPair>(bins, new Adapter<IntIntPair, IntIntPair>() {
      @Override
      public IntIntPair make() {
        return new IntIntPair(0, 0);
      }

      @Override
      public IntIntPair cloneForCache(IntIntPair data) {
        return new IntIntPair(data.first, data.second);
      }

      @Override
      public IntIntPair downsample(IntIntPair first, IntIntPair second) {
        return new IntIntPair(first.first + second.first, first.second + second.second);
      }

      @Override
      public IntIntPair aggregate(IntIntPair existing, IntIntPair data) {
        existing.first = existing.first + data.first;
        existing.second = existing.second + data.second;
        return existing;
      }
    });
  }

  /**
   * Histograms that work like two {@link #LongSumHistogram}, component wise.
   *
   * @param bins Number of bins.
   * @return New Histogram object
   */
  public static FlexiHistogram<Pair<Long, Long>, Pair<Long, Long>> LongSumLongSumHistogram(int bins) {
    return new FlexiHistogram<Pair<Long, Long>, Pair<Long, Long>>(bins, new Adapter<Pair<Long, Long>, Pair<Long, Long>>() {
      @Override
      public Pair<Long, Long> make() {
        return new Pair<Long, Long>(0L, 0L);
      }

      @Override
      public Pair<Long, Long> cloneForCache(Pair<Long, Long> data) {
        return new Pair<Long, Long>(data.getFirst(), data.getSecond());
      }

      @Override
      public Pair<Long, Long> downsample(Pair<Long, Long> first, Pair<Long, Long> second) {
        return new Pair<Long, Long>(first.getFirst() + second.getFirst(), first.getSecond() + second.getSecond());
      }

      @Override
      public Pair<Long, Long> aggregate(Pair<Long, Long> existing, Pair<Long, Long> data) {
        existing.setFirst(existing.getFirst() + data.getFirst());
        existing.setSecond(existing.getSecond() + data.getSecond());
        return existing;
      }
    });
  }

  /**
   * Histograms that work like two {@link #DoubleSumHistogram}, component wise.
   *
   * @param bins Number of bins.
   * @return New Histogram object
   */
  public static FlexiHistogram<DoubleDoublePair, DoubleDoublePair> DoubleSumDoubleSumHistogram(int bins) {
    return new FlexiHistogram<DoubleDoublePair, DoubleDoublePair>(bins, new Adapter<DoubleDoublePair, DoubleDoublePair>() {
      @Override
      public DoubleDoublePair make() {
        return new DoubleDoublePair(0., 0.);
      }

      @Override
      public DoubleDoublePair cloneForCache(DoubleDoublePair data) {
        return new DoubleDoublePair(data.first, data.second);
      }

      @Override
      public DoubleDoublePair downsample(DoubleDoublePair first, DoubleDoublePair second) {
        return new DoubleDoublePair(first.first + second.first, first.second + second.second);
      }

      @Override
      public DoubleDoublePair aggregate(DoubleDoublePair existing, DoubleDoublePair data) {
        existing.first = existing.first + data.first;
        existing.second = existing.second + data.second;
        return existing;
      }
    });
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram$Adapter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.