Package de.lmu.ifi.dbs.elki.application.greedyensemble

Source Code of de.lmu.ifi.dbs.elki.application.greedyensemble.VisualizePairwiseGainMatrix

package de.lmu.ifi.dbs.elki.application.greedyensemble;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.awt.image.BufferedImage;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.apache.batik.util.SVGConstants;

import de.lmu.ifi.dbs.elki.application.AbstractApplication;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage;
import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
import de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling;
import de.lmu.ifi.dbs.elki.visualization.VisualizationTask;
import de.lmu.ifi.dbs.elki.visualization.VisualizerContext;
import de.lmu.ifi.dbs.elki.visualization.VisualizerParameterizer;
import de.lmu.ifi.dbs.elki.visualization.gui.SimpleSVGViewer;
import de.lmu.ifi.dbs.elki.visualization.svg.SVGPlot;
import de.lmu.ifi.dbs.elki.visualization.visualizers.Visualization;
import de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer;
import de.lmu.ifi.dbs.elki.workflow.InputStep;

/**
* Class to load an outlier detection summary file, as produced by
* {@link ComputeKNNOutlierScores}, and compute a matrix with the pairwise
* gains. It will have one column / row obtained for each combination.
*
* The gain is always computed in relation to the better of the two input
* methods. Green colors indicate the result has improved, red indicate it
* became worse.
*
* Reference:
* <p>
* E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
* On Evaluation of Outlier Rankings and Outlier Scores<br/>
* In Proceedings of the 12th SIAM International Conference on Data Mining
* (SDM), Anaheim, CA, 2012.
* </p>
*
* @author Erich Schubert
*/
@Reference(authors = "E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel", title = "On Evaluation of Outlier Rankings and Outlier Scores", booktitle = "Proc. 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.")
public class VisualizePairwiseGainMatrix extends AbstractApplication {
  /**
   * Get static logger
   */
  private static final Logging logger = Logging.getLogger(VisualizePairwiseGainMatrix.class);

  /**
   * The data input part.
   */
  private InputStep inputstep;

  /**
   * Parameterizer for visualizers
   */
  private VisualizerParameterizer vispar;

  /**
   * Constructor.
   *
   * @param verbose Verbosity
   * @param inputstep Input step
   * @param vispar Visualizer parameterizer
   */
  public VisualizePairwiseGainMatrix(boolean verbose, InputStep inputstep, VisualizerParameterizer vispar) {
    super(verbose);
    this.inputstep = inputstep;
    this.vispar = vispar;
  }

  @Override
  public void run() {
    final Database database = inputstep.getDatabase();
    final Relation<NumberVector<?, ?>> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
    final DBID firstid = labels.iterDBIDs().next();
    final String firstlabel = labels.get(firstid);
    if(!firstlabel.matches("bylabel")) {
      throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
    }

    // Dimensionality and reference vector
    final int dim = DatabaseUtil.dimensionality(relation);
    final NumberVector<?, ?> refvec = relation.get(firstid);

    // Build the truth vector
    Set<Integer> pos = new TreeSet<Integer>();
    final DoubleIntPair[] combined = new DoubleIntPair[dim];
    {
      for(int d = 0; d < dim; d++) {
        combined[d] = new DoubleIntPair(0, d);
        if(refvec.doubleValue(d + 1) > 0) {
          pos.add(d);
        }
      }
    }

    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    ids.remove(firstid);
    final int size = ids.size();

    double[][] data = new double[size][size];
    DoubleMinMax minmax = new DoubleMinMax();

    for(int a = 0; a < size; a++) {
      final NumberVector<?, ?> veca = relation.get(ids.get(a));
      // Direct AUC score:
      {
        for(int d = 0; d < dim; d++) {
          combined[d].first = veca.doubleValue(d + 1);
          combined[d].second = d;
        }
        Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR));
        double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator()));
        data[a][a] = auc;
        // minmax.put(auc);
        // logger.verbose(auc + " " + labels.get(ids.get(a)));
      }
      // Compare to others, exploiting symmetry
      for(int b = a + 1; b < size; b++) {
        final NumberVector<?, ?> vecb = relation.get(ids.get(b));
        for(int d = 0; d < dim; d++) {
          combined[d].first = veca.doubleValue(d + 1) + vecb.doubleValue(d + 1);
          combined[d].second = d;
        }
        Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR));
        double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator()));
        // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
        // labels.get(ids.get(b)));
        data[a][b] = auc;
        data[b][a] = auc;
        // minmax.put(auc);
      }
    }
    for(int a = 0; a < size; a++) {
      for(int b = a + 1; b < size; b++) {
        double ref = Math.max(data[a][a], data[b][b]);
        data[a][b] = (data[a][b] - ref) / (1 - ref);
        data[b][a] = (data[b][a] - ref) / (1 - ref);
        // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
        // labels.get(ids.get(b)));
        minmax.put(data[a][b]);
      }
    }
    for(int a = 0; a < size; a++) {
      data[a][a] = 0;
    }

    logger.verbose(minmax.toString());

    boolean hasneg = (minmax.getMin() < -1E-3);
    LinearScaling scale;
    if(!hasneg) {
      scale = new LinearScaling(minmax);
    }
    else {
      scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
    }

    BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
    for(int x = 0; x < size; x++) {
      for(int y = x; y < size; y++) {
        double val = data[x][y];
        val = scale.getScaled(val);
        // Compute color:
        final int col;
        {
          if(!hasneg) {
            int ival = 0xFF & (int) (255 * Math.max(0, val));
            col = 0xff000000 | (ival << 16) | (ival << 8) | ival;
          }
          else {
            if(val >= 0) {
              int ival = 0xFF & (int) (255 * val);
              col = 0xff000000 | (ival << 8);
            }
            else {
              int ival = 0xFF & (int) (255 * -val);
              col = 0xff000000 | (ival << 16);
            }
          }
        }
        img.setRGB(x, y, col);
        img.setRGB(y, x, col);
      }
    }
    SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
    database.getHierarchy().add(database, smat);

    VisualizerContext context = vispar.newContext(database);

    // Attach visualizers to results
    SimilarityMatrixVisualizer.Factory factory = new SimilarityMatrixVisualizer.Factory();
    factory.processNewResult(database, database);

    List<VisualizationTask> tasks = ResultUtil.filterResults(database, VisualizationTask.class);
    for(VisualizationTask task : tasks) {
      if(task.getFactory() == factory) {
        showVisualization(context, factory, task);
      }
    }
  }

  /**
   * Show a single visualization.
   *
   * @param context
   *
   * @param factory
   * @param task
   */
  private void showVisualization(VisualizerContext context, SimilarityMatrixVisualizer.Factory factory, VisualizationTask task) {
    SVGPlot plot = new SVGPlot();
    Visualization vis = factory.makeVisualization(task.clone(plot, context, null, 1.0, 1.0));
    plot.getRoot().appendChild(vis.getLayer());
    plot.getRoot().setAttribute(SVGConstants.SVG_WIDTH_ATTRIBUTE, "20cm");
    plot.getRoot().setAttribute(SVGConstants.SVG_HEIGHT_ATTRIBUTE, "20cm");
    plot.getRoot().setAttribute(SVGConstants.SVG_VIEW_BOX_ATTRIBUTE, "0 0 1 1");
    plot.updateStyleElement();

    (new SimpleSVGViewer()).setPlot(plot);
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer extends AbstractApplication.Parameterizer {
    /**
     * Data source
     */
    InputStep inputstep;

    /**
     * Parameterizer for visualizers
     */
    private VisualizerParameterizer vispar;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      // Data input
      inputstep = config.tryInstantiate(InputStep.class);
      // Visualization options
      vispar = config.tryInstantiate(VisualizerParameterizer.class);
    }

    @Override
    protected AbstractApplication makeInstance() {
      return new VisualizePairwiseGainMatrix(verbose, inputstep, vispar);
    }
  }

  /**
   * Main method.
   *
   * @param args Command line parameters.
   */
  public static void main(String[] args) {
    VisualizePairwiseGainMatrix.runCLIApplication(VisualizePairwiseGainMatrix.class, args);
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.application.greedyensemble.VisualizePairwiseGainMatrix

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.