Package edu.ucla.sspace.clustering

Source Code of edu.ucla.sspace.clustering.ClutoClustering

/*
* Copyright 2009 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.clustering;

import edu.ucla.sspace.matrix.Matrix;
import edu.ucla.sspace.matrix.MatrixIO;
import edu.ucla.sspace.matrix.SparseMatrix;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.IOError;
import java.io.IOException;

import java.util.Properties;

import java.util.logging.Level;
import java.util.logging.Logger;


/**
* A class for interacting with the <a
* href="http://glaros.dtc.umn.edu/gkhome/cluto/cluto/overview">CLUTO</a>
* clustering library.
*
* @author David Jurgens
*/
public class ClutoClustering implements Clustering {

    /**
     * A property prefix for specifiying options when using Cluto.
     */
    public static final String PROPERTY_PREFIX =
        "edu.ucla.sspace.clustering.ClutoClustering";

    /**
     * The property to set the name of a {@link ClutoClustering.Method} that
     * Cluto should use in clustering the data.
     */
    public static final String CLUSTER_METHOD =
        PROPERTY_PREFIX + ".clusterSimilarity";

    /**
     * The property to set the name of a {@link ClutoClustering.Criterion} that
     * Cluto should use in clustering the data.
     */
    public static final String CLUSTER_CRITERION =
        PROPERTY_PREFIX + ".clusterCriterion";

    /**
     * The method by which CLUTO should cluster the data points
     */
    public enum Method {
       
        REPEATED_BISECTIONS_REPEATED("rbr"),
        KMEANS("direct"),
        AGGLOMERATIVE("agglo"),
        NEAREST_NEIGHBOOR("graph"),
        BAGGLO("bagglo");      

        /**
         * The string abbreviation for each clustering method
         */
        private final String name;

        Method(String name) {
            this.name = name;
        }

        /**
         * Returns the name for this method that CLUTO uses on the command line.
         */
        String getClutoName() {
            return name;
        }
    }

    /**
     * The crition function by which CLUTO should evaluate the clustering
     * assignment.
     */
    public enum Criterion  {
       
        I1("i1"),
        I2("i2"),
        E1("e1"),
        G1("g1"),
        G1P("g1p"),
        H1("h1"),
        H2("h2"),
        SLINK("slink"),
        WSLINK("wslink"),
        CLINK("clink"),
        WCLINK("wclink"),
        UPGMA("upgma"),
        WUPGMA("wupgma");

        /**
         * The string abbreviation for each clustering method
         */
        private final String name;

        Criterion(String name) {
            this.name = name;
        }

        /**
         * Returns the name for this method that CLUTO uses on the command line.
         */
        String getClutoName() {
            return name;
        }
    }

    /**
     * The default clustering method to be used by Cluto.
     */
    private static Method DEFAULT_CLUSTER_METHOD = Method.AGGLOMERATIVE;

    private static Criterion DEFAULT_CRITERION = Criterion.UPGMA;

    /**
     * A logger to track the status of Cluto.
     */
    private static final Logger LOGGER =
        Logger.getLogger(ClutoClustering.class.getName());

    /**
     * Creates a new {@code ClutoClustering} instance.
     */
    public ClutoClustering() { }

    /**
     * Throws an {@link UnsupportedOperationException} if called, as CLUTO
     * requires the number of clusters to be specified.
     */
    public Assignments cluster(Matrix matrix, Properties properties) {
        throw new UnsupportedOperationException(
            "CLUTO requires the number of clusters to be specified and " +
            "therefore cannot be invoked using this method.");
    }

    /**
     * {@inheritDoc}
     *
     * @param properties the properties to use for clustering with CLUTO.  See
     *        {@link ClutoClustering} for the list of supported properties.
     */
    public Assignments cluster(Matrix matrix, int numClusters,
                                Properties properties) {
        Method clmethod = DEFAULT_CLUSTER_METHOD;
        String methodProp = properties.getProperty(CLUSTER_METHOD);
        if (methodProp != null)
            clmethod = Method.valueOf(methodProp);
        Criterion criterion = DEFAULT_CRITERION;
        String criterionProp = properties.getProperty(CLUSTER_CRITERION);
        if (criterionProp != null)
            criterion = Criterion.valueOf(criterionProp);
        return cluster(matrix, numClusters, clmethod, criterion);
    }

    /**
     * Clusters the set of rows in the given {@code Matrix} into a specified
     * number of clusters using the specified CLUTO clustering method.
     *
     * @param matrix the {@link Matrix} containing data points to cluster
     * @param numClusters the number of clusters to generate
     * @param clusterMethod the method by which cluto should cluster the rows
     *
     * @return an array of {@link Assignment} instances that indicate zero or
     *         more clusters to which each row belongs.
     */
    public Assignments cluster(Matrix matrix, int numClusters,
                                Method clusterMethod,
                                Criterion criterionMethod) {
        try {
            String clmethod = clusterMethod.getClutoName();
            String crtmethod = criterionMethod.getClutoName();
            return ClutoWrapper.cluster(matrix, clmethod,
                                        crtmethod, numClusters);
        } catch (IOException ioe) {
            throw new IOError(ioe);
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.clustering.ClutoClustering

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.