Package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash

Source Code of de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHIntervalSplit

package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.HashMap;
import java.util.Map;

import de.lmu.ifi.dbs.elki.data.HyperBoundingBox;
import de.lmu.ifi.dbs.elki.data.ParameterizationFunction;
import de.lmu.ifi.dbs.elki.data.spatial.SpatialUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;

/**
* Supports the splitting of CASH intervals.
*
* @author Elke Achtert
*/
public class CASHIntervalSplit {
  /**
   * The database storing the parameterization functions.
   */
  private Relation<ParameterizationFunction> database;

  /**
   * Caches minimum function values for given intervals, used for better split
   * performance.
   */
  private Map<HyperBoundingBox, Map<DBID, Double>> f_minima;

  /**
   * Caches maximum function values for given intervals, used for better split
   * performance.
   */
  private Map<HyperBoundingBox, Map<DBID, Double>> f_maxima;

  /**
   * Minimum points.
   */
  private int minPts;

  /**
   * The logger of the class.
   */
  private final static Logging logger = Logging.getLogger(CASHIntervalSplit.class);

  /**
   * Initializes the logger and sets the debug status to the given value.
   *
   * @param database the database storing the parameterization functions
   * @param minPts the number of minimum points
   */
  public CASHIntervalSplit(Relation<ParameterizationFunction> database, int minPts) {
    super();

    this.database = database;
    this.minPts = minPts;
    this.f_minima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
    this.f_maxima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
  }

  /**
   * Determines the ids belonging to the given interval, i.e. the
   * parameterization functions falling within the interval.
   *
   * @param superSetIDs a superset of the ids to be determined
   * @param interval the hyper bounding box defining the interval of alpha
   *        values
   * @param d_min the minimum distance value for the interval
   * @param d_max the maximum distance value for the interval
   * @return the ids belonging to the given interval, if the number ids of
   *         exceeds minPts, null otherwise
   */
  public ModifiableDBIDs determineIDs(DBIDs superSetIDs, HyperBoundingBox interval, double d_min, double d_max) {
    StringBuffer msg = new StringBuffer();
    if(logger.isDebugging()) {
      msg.append("interval ").append(interval);
    }

    ModifiableDBIDs childIDs = DBIDUtil.newHashSet(superSetIDs.size());

    Map<DBID, Double> minima = f_minima.get(interval);
    Map<DBID, Double> maxima = f_maxima.get(interval);
    if(minima == null || maxima == null) {
      minima = new HashMap<DBID, Double>();
      f_minima.put(interval, minima);
      maxima = new HashMap<DBID, Double>();
      f_maxima.put(interval, maxima);
    }

    for(DBID id : superSetIDs) {
      Double f_min = minima.get(id);
      Double f_max = maxima.get(id);

      if(f_min == null) {
        ParameterizationFunction f = database.get(id);
        HyperBoundingBox minMax = f.determineAlphaMinMax(interval);
        f_min = f.function(SpatialUtil.getMin(minMax));
        f_max = f.function(SpatialUtil.getMax(minMax));
        minima.put(id, f_min);
        maxima.put(id, f_max);
      }

      if(logger.isDebugging()) {
        msg.append("\n\nf_min ").append(f_min);
        msg.append("\nf_max ").append(f_max);
        msg.append("\nd_min ").append(d_min);
        msg.append("\nd_max ").append(d_max);
      }

      if(f_min - f_max > ParameterizationFunction.DELTA) {
        throw new IllegalArgumentException("Houston, we have a problem: f_min > f_max! " + "\nf_min[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_min + "\nf_max[" + FormatUtil.format(SpatialUtil.centroid(interval)) + "] = " + f_max + "\nf " + database.get(id));
      }

      if(f_min <= d_max && f_max >= d_min) {
        childIDs.add(id);
        if(logger.isDebugging()) {
          msg.append("\nid ").append(id).append(" appended");
        }
      }

      else {
        if(logger.isDebugging()) {
          msg.append("\nid ").append(id).append(" NOT appended");
        }
      }
    }

    if(logger.isDebugging()) {
      msg.append("\nchildIds ").append(childIDs.size());
      logger.debugFine(msg.toString());
    }

    if(childIDs.size() < minPts) {
      return null;
    }
    else {
      return childIDs;
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHIntervalSplit

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.