Package de.lmu.ifi.dbs.elki.index.preprocessed.preference

Source Code of de.lmu.ifi.dbs.elki.index.preprocessed.preference.HiSCPreferenceVectorIndex

package de.lmu.ifi.dbs.elki.index.preprocessed.preference;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.BitSet;
import java.util.Iterator;
import java.util.List;

import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.HiSC;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;

/**
* Preprocessor for HiSC preference vector assignment to objects of a certain
* database.
*
* @author Elke Achtert
*
* @see HiSC
*/
@Title("HiSC Preprocessor")
@Description("Computes the preference vector of objects of a certain database according to the HiSC algorithm.")
public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> {
  /**
   * Logger to use
   */
  protected static final Logging logger = Logging.getLogger(HiSCPreferenceVectorIndex.class);

  /**
   * Holds the value of parameter alpha.
   */
  protected double alpha;

  /**
   * Holds the value of parameter k.
   */
  protected int k;

  /**
   * Constructor.
   *
   * @param relation Relation in use
   * @param alpha Alpha value
   * @param k k value
   */
  public HiSCPreferenceVectorIndex(Relation<V> relation, double alpha, int k) {
    super(relation);
    this.alpha = alpha;
    this.k = k;
  }

  @Override
  protected void preprocess() {
    if(relation == null || relation.size() <= 0) {
      throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY);
    }

    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class);

    StringBuffer msg = new StringBuffer();

    long start = System.currentTimeMillis();
    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null;

    KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k);

    Iterator<DBID> it = relation.iterDBIDs();
    while(it.hasNext()) {
      DBID id = it.next();

      if(logger.isDebugging()) {
        msg.append("\n\nid = ").append(id);
        ///msg.append(" ").append(database.getObjectLabelQuery().get(id));
        msg.append("\n knns: ");
      }

      List<DistanceResultPair<DoubleDistance>> knns = knnQuery.getKNNForDBID(id, k);
      ModifiableDBIDs knnIDs = DBIDUtil.newArray(knns.size());
      for(DistanceResultPair<DoubleDistance> knn : knns) {
        knnIDs.add(knn.getDBID());
        //if(logger.isDebugging()) {
        //  msg.append(database.getObjectLabelQuery().get(knn.getID())).append(" ");
        //}
      }

      BitSet preferenceVector = determinePreferenceVector(relation, id, knnIDs, msg);
      storage.put(id, preferenceVector);

      if(progress != null) {
        progress.incrementProcessed(logger);
      }
    }
    if(progress != null) {
      progress.ensureCompleted(logger);
    }

    if(logger.isDebugging()) {
      logger.debugFine(msg.toString());
    }

    long end = System.currentTimeMillis();
    // TODO: re-add timing code!
    if(logger.isVerbose()) {
      long elapsedTime = end - start;
      logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds.");
    }
  }

  /**
   * Determines the preference vector according to the specified neighbor ids.
   *
   * @param relation the database storing the objects
   * @param id the id of the object for which the preference vector should be
   *        determined
   * @param neighborIDs the ids of the neighbors
   * @param msg a string buffer for debug messages
   * @return the preference vector
   */
  private BitSet determinePreferenceVector(Relation<V> relation, DBID id, DBIDs neighborIDs, StringBuffer msg) {
    // variances
    double[] variances = DatabaseUtil.variances(relation, relation.get(id), neighborIDs);

    // preference vector
    BitSet preferenceVector = new BitSet(variances.length);
    for(int d = 0; d < variances.length; d++) {
      if(variances[d] < alpha) {
        preferenceVector.set(d);
      }
    }

    if(msg != null && logger.isDebugging()) {
      msg.append("\nalpha " + alpha);
      msg.append("\nvariances ");
      msg.append(FormatUtil.format(variances, ", ", 4));
      msg.append("\npreference ");
      msg.append(FormatUtil.format(variances.length, preferenceVector));
    }

    return preferenceVector;
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  @Override
  public String getLongName() {
    return "HiSC Preference Vectors";
  }

  @Override
  public String getShortName() {
    return "hisc-pref";
  }

  /**
   * Factory class
   *
   * @author Erich Schubert
   *
   * @apiviz.stereotype factory
   * @apiviz.uses HiSCPreferenceVectorIndex oneway - - «create»
   *
   * @param <V> Vector type
   */
  public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> {
    /**
     * The default value for alpha.
     */
    public static final double DEFAULT_ALPHA = 0.01;

    /**
     * The maximum absolute variance along a coordinate axis. Must be in the
     * range of [0.0, 1.0).
     * <p>
     * Default value: {@link #DEFAULT_ALPHA}
     * </p>
     * <p>
     * Key: {@code -hisc.alpha}
     * </p>
     */
    public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hisc.alpha", "The maximum absolute variance along a coordinate axis.");

    /**
     * The number of nearest neighbors considered to determine the preference
     * vector. If this value is not defined, k is set to three times of the
     * dimensionality of the database objects.
     * <p>
     * Key: {@code -hisc.k}
     * </p>
     * <p>
     * Default value: three times of the dimensionality of the database objects
     * </p>
     */
    public static final OptionID K_ID = OptionID.getOrCreateOptionID("hisc.k", "The number of nearest neighbors considered to determine the preference vector. If this value is not defined, k ist set to three times of the dimensionality of the database objects.");

    /**
     * Holds the value of parameter {@link #ALPHA_ID}.
     */
    protected double alpha;

    /**
     * Holds the value of parameter {@link #K_ID}.
     */
    protected Integer k;

    /**
     * Constructor.
     *
     * @param alpha Alpha
     * @param k k
     */
    public Factory(double alpha, Integer k) {
      super();
      this.alpha = alpha;
      this.k = k;
    }

    @Override
    public HiSCPreferenceVectorIndex<V> instantiate(Relation<V> relation) {
      final int usek;
      if(k == null) {
        usek = 3 * DatabaseUtil.dimensionality(relation);
      }
      else {
        usek = k;
      }
      return new HiSCPreferenceVectorIndex<V>(relation, alpha, usek);
    }

    /**
     * Parameterization class.
     *
     * @author Erich Schubert
     *
     * @apiviz.exclude
     */
    public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer {
      /**
       * Holds the value of parameter {@link #ALPHA_ID}.
       */
      protected double alpha;

      /**
       * Holds the value of parameter {@link #K_ID}.
       */
      protected Integer k;

     @Override
      protected void makeOptions(Parameterization config) {
        super.makeOptions(config);
        final DoubleParameter ALPHA_PARAM = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA);
        if(config.grab(ALPHA_PARAM)) {
          alpha = ALPHA_PARAM.getValue();
        }

        final IntParameter K_PARAM = new IntParameter(K_ID, new GreaterConstraint(0), true);
        if(config.grab(K_PARAM)) {
          k = K_PARAM.getValue();
        }
      }

      @Override
      protected Factory<V> makeInstance() {
        return new Factory<V>(alpha, k);
      }
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.index.preprocessed.preference.HiSCPreferenceVectorIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.