Package de.lmu.ifi.dbs.elki.algorithm.outlier

Source Code of de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP$Parameterizer

package de.lmu.ifi.dbs.elki.algorithm.outlier;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.List;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;

/**
* LoOP: Local Outlier Probabilities
*
* Distance/density based algorithm similar to LOF to detect outliers, but with
* statistical methods to achieve better result stability.
*
* @author Erich Schubert
*
* @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
*/
@Title("LoOP: Local Outlier Probabilities")
@Description("Variant of the LOF algorithm normalized using statistical values.")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195")
public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
  /**
   * The logger for this class.
   */
  private static final Logging logger = Logging.getLogger(LoOP.class);

  /**
   * The distance function to determine the reachability distance between
   * database objects.
   */
  public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");

  /**
   * The distance function to determine the reachability distance between
   * database objects.
   */
  public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");

  /**
   * Parameter to specify the number of nearest neighbors of an object to be
   * considered for computing its LOOP_SCORE, must be an integer greater than 1.
   */
  public static final OptionID KREACH_ID = OptionID.getOrCreateOptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");

  /**
   * Parameter to specify the number of nearest neighbors of an object to be
   * considered for computing its LOOP_SCORE, must be an integer greater than 1.
   */
  public static final OptionID KCOMP_ID = OptionID.getOrCreateOptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");

  /**
   * Parameter to specify the number of nearest neighbors of an object to be
   * considered for computing its LOOP_SCORE, must be an integer greater than 1.
   */
  public static final OptionID LAMBDA_ID = OptionID.getOrCreateOptionID("loop.lambda", "The number of standard deviations to consider for density computation.");

  /**
   * Holds the value of {@link #KREACH_ID}.
   */
  int kreach;

  /**
   * Holds the value of {@link #KCOMP_ID}.
   */
  int kcomp;

  /**
   * Hold the value of {@link #LAMBDA_ID}.
   */
  double lambda;

  /**
   * Preprocessor Step 1
   */
  protected DistanceFunction<? super O, D> reachabilityDistanceFunction;

  /**
   * Preprocessor Step 2
   */
  protected DistanceFunction<? super O, D> comparisonDistanceFunction;

  /**
   * Include object itself in kNN neighborhood.
   */
  static boolean objectIsInKNN = false;

  /**
   * Constructor with parameters.
   *
   * @param kreach
   * @param kcomp
   * @param reachabilityDistanceFunction
   * @param comparisonDistanceFunction
   * @param lambda
   */
  public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) {
    super();
    this.kreach = kreach;
    this.kcomp = kcomp;
    this.reachabilityDistanceFunction = reachabilityDistanceFunction;
    this.comparisonDistanceFunction = comparisonDistanceFunction;
    this.lambda = lambda;
  }

  /**
   * Get the kNN queries for the algorithm.
   *
   * @param database Database
   * @param stepprog Progress logger
   * @return result
   */
  protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
    KNNQuery<O, D> knnComp;
    KNNQuery<O, D> knnReach;
    if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
      // We need each neighborhood twice - use "HEAVY" flag.
      knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
      // No optimized kNN query - use a preprocessor!
      if(knnComp == null) {
        if(stepprog != null) {
          stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", logger);
        }
        MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp);
        database.addIndex(preproc);
        DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
        knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
      }
      else {
        if(stepprog != null) {
          stepprog.beginStep(1, "Optimized neighborhoods provided by database.", logger);
        }
      }
      knnReach = knnComp;
    }
    else {
      if(stepprog != null) {
        stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", logger);
      }
      knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
      knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
    }
    return new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(knnComp, knnReach);
  }

  /**
   * Performs the LoOP algorithm on the given database.
   */
  public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
    final double sqrt2 = Math.sqrt(2.0);

    StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;

    Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O, D> knnComp = pair.getFirst();
    KNNQuery<O, D> knnReach = pair.getSecond();

    // Assert we got something
    if(knnComp == null) {
      throw new AbortException("No kNN queries supported by database for comparison distance function.");
    }
    if(knnReach == null) {
      throw new AbortException("No kNN queries supported by database for density estimation distance function.");
    }

    // Probabilistic distances
    WritableDataStore<Double> pdists = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
    {// computing PRDs
      if(stepprog != null) {
        stepprog.beginStep(3, "Computing pdists", logger);
      }
      FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        List<DistanceResultPair<D>> neighbors = knnReach.getKNNForDBID(id, kreach);
        double sqsum = 0.0;
        // use first kref neighbors as reference set
        int ks = 0;
        for(DistanceResultPair<D> neighbor : neighbors) {
          if(objectIsInKNN || neighbor.getDBID() != id) {
            double d = neighbor.getDistance().doubleValue();
            sqsum += d * d;
            ks++;
            if(ks >= kreach) {
              break;
            }
          }
        }
        Double pdist = lambda * Math.sqrt(sqsum / ks);
        pdists.put(id, pdist);
        if(prdsProgress != null) {
          prdsProgress.incrementProcessed(logger);
        }
      }
    }
    // Compute PLOF values.
    WritableDataStore<Double> plofs = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.class);
    MeanVariance mvplof = new MeanVariance();
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(4, "Computing PLOF", logger);
      }

      FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        List<DistanceResultPair<D>> neighbors = knnComp.getKNNForDBID(id, kcomp);
        MeanVariance mv = new MeanVariance();
        // use first kref neighbors as comparison set.
        int ks = 0;
        for(DistanceResultPair<D> neighbor1 : neighbors) {
          if(objectIsInKNN || neighbor1.getDBID() != id) {
            mv.put(pdists.get(neighbor1.getDBID()));
            ks++;
            if(ks >= kcomp) {
              break;
            }
          }
        }
        double plof = Math.max(pdists.get(id) / mv.getMean(), 1.0);
        if(Double.isNaN(plof) || Double.isInfinite(plof)) {
          plof = 1.0;
        }
        plofs.put(id, plof);
        mvplof.put((plof - 1.0) * (plof - 1.0));

        if(progressPLOFs != null) {
          progressPLOFs.incrementProcessed(logger);
        }
      }
    }

    double nplof = lambda * Math.sqrt(mvplof.getMean());
    if(logger.isDebugging()) {
      logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
    }

    // Compute final LoOP values.
    WritableDataStore<Double> loops = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
    {// compute LOOP_SCORE of each db object
      if(stepprog != null) {
        stepprog.beginStep(5, "Computing LoOP scores", logger);
      }

      FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
      for(DBID id : relation.iterDBIDs()) {
        loops.put(id, MathUtil.erf((plofs.get(id) - 1) / (nplof * sqrt2)));

        if(progressLOOPs != null) {
          progressLOOPs.incrementProcessed(logger);
        }
      }
    }

    if(stepprog != null) {
      stepprog.setCompleted(logger);
    }

    // Build result representation.
    Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
    return new OutlierResult(scoreMeta, scoreResult);
  }

  @Override
  public TypeInformation[] getInputTypeRestriction() {
    final TypeInformation type;
    if(reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
      type = reachabilityDistanceFunction.getInputTypeRestriction();
    }
    else {
      type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction());
    }
    return TypeUtil.array(type);
  }

  @Override
  protected Logging getLogger() {
    return logger;
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
    /**
     * Holds the value of {@link #KREACH_ID}.
     */
    int kreach = 0;

    /**
     * Holds the value of {@link #KCOMP_ID}.
     */
    int kcomp = 0;

    /**
     * Hold the value of {@link #LAMBDA_ID}.
     */
    double lambda = 2.0;

    /**
     * Preprocessor Step 1
     */
    protected DistanceFunction<O, D> reachabilityDistanceFunction = null;

    /**
     * Preprocessor Step 2
     */
    protected DistanceFunction<O, D> comparisonDistanceFunction = null;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      final IntParameter kcompP = new IntParameter(KCOMP_ID, new GreaterConstraint(1));
      if(config.grab(kcompP)) {
        kcomp = kcompP.getValue();
      }

      final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
      if(config.grab(compDistP)) {
        comparisonDistanceFunction = compDistP.instantiateClass(config);
      }

      final IntParameter kreachP = new IntParameter(KREACH_ID, new GreaterConstraint(1), true);
      if(config.grab(kreachP)) {
        kreach = kreachP.getValue();
      }
      else {
        kreach = kcomp;
      }

      final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
      if(config.grab(reachDistP)) {
        reachabilityDistanceFunction = reachDistP.instantiateClass(config);
      }

      // TODO: make default 1.0?
      final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, new GreaterConstraint(0.0), 2.0);
      if(config.grab(lambdaP)) {
        lambda = lambdaP.getValue();
      }
    }

    @Override
    protected LoOP<O, D> makeInstance() {
      DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction;
      return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.