Package de.lmu.ifi.dbs.elki.utilities.referencepoints

Source Code of de.lmu.ifi.dbs.elki.utilities.referencepoints.RandomSampleReferencePoints$Parameterizer

package de.lmu.ifi.dbs.elki.utilities.referencepoints;

/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures

Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;

import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;

/**
* Random-Sampling strategy for picking reference points.
*
* @author Erich Schubert
*
* @param <V> Vector type
*/
public class RandomSampleReferencePoints<V extends NumberVector<? extends V, ?>> implements ReferencePointsHeuristic<V> {
  // TODO: use reproducible Random

  /**
   * Parameter to specify the sample size.
   * <p>
   * Key: {@code -sample.n}
   * </p>
   */
  public static final OptionID N_ID = OptionID.getOrCreateOptionID("sample.n", "The number of samples to draw.");

  /**
   * Constant used in choosing optimal table sizes
   */
  private static final double log4 = Math.log(4);

  /**
   * Holds the value of {@link #N_ID}.
   */
  private int samplesize;

  /**
   * Constructor.
   *
   * @param samplesize
   */
  public RandomSampleReferencePoints(int samplesize) {
    super();
    this.samplesize = samplesize;
  }

  @Override
  public <T extends V> Collection<V> getReferencePoints(Relation<T> db) {
    if(samplesize >= db.size()) {
      LoggingUtil.warning("Sample size is larger than database size!");

      ArrayList<V> selection = new ArrayList<V>(db.size());
      for(DBID id : db.iterDBIDs()) {
        selection.add(db.get(id));
      }
      return selection;
    }

    ArrayList<V> result = new ArrayList<V>(samplesize);
    int dbsize = db.size();

    // Guess the memory requirements of a hashmap.
    // The values are based on Python code, and might need modification for
    // Java!
    // If the hashmap is likely to become too big, lazy-shuffle a list instead.
    int setsize = 21;
    if(samplesize > 5) {
      setsize += 2 << (int) Math.ceil(Math.log(samplesize * 3) / log4);
    }
    // logger.debug("Setsize: "+setsize);
    ArrayDBIDs ids = DBIDUtil.ensureArray(db.getDBIDs());
    boolean fastrandomaccess = false;
    if(ArrayList.class.isAssignableFrom(ids.getClass())) {
      fastrandomaccess = true;
    }
    if(samplesize <= setsize || !fastrandomaccess) {
      // use pool approach
      // if getIDs() is an array list, we don't need to copy it again.
      ArrayModifiableDBIDs pool = ((ArrayModifiableDBIDs.class.isAssignableFrom(ids.getClass())) ? (ArrayModifiableDBIDs) ids : DBIDUtil.newArray(ids));
      for(int i = 0; i < samplesize; i++) {
        int j = (int) Math.floor(Math.random() * (dbsize - i));
        result.add(db.get(pool.get(j)));
        pool.set(j, pool.get(dbsize - i - 1));
      }
      ids = null; // dirty!
    }
    else {
      HashSet<Integer> selected = new HashSet<Integer>();
      for(int i = 0; i < samplesize; i++) {
        int j = (int) Math.floor(Math.random() * dbsize);
        // Redraw from pool.
        while(selected.contains(j)) {
          j = (int) Math.floor(Math.random() * dbsize);
        }
        selected.add(j);
        result.add(db.get(ids.get(j)));
      }
    }
    assert (result.size() == samplesize);
    return result;
  }

  /**
   * Parameterization class.
   *
   * @author Erich Schubert
   *
   * @apiviz.exclude
   */
  public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends AbstractParameterizer {
    /**
     * Holds the value of {@link #N_ID}.
     */
    protected int samplesize;

    @Override
    protected void makeOptions(Parameterization config) {
      super.makeOptions(config);
      IntParameter samplesizeP = new IntParameter(N_ID, new GreaterConstraint(0));
      if(config.grab(samplesizeP)) {
        samplesize = samplesizeP.getValue();
      }
    }

    @Override
    protected RandomSampleReferencePoints<V> makeInstance() {
      return new RandomSampleReferencePoints<V>(samplesize);
    }
  }
}
TOP

Related Classes of de.lmu.ifi.dbs.elki.utilities.referencepoints.RandomSampleReferencePoints$Parameterizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.