Package mia.clustering.ch10

Source Code of mia.clustering.ch10.InterClusterDistances

package mia.clustering.ch10;

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.common.distance.CosineDistanceMeasure;
import org.apache.mahout.common.distance.DistanceMeasure;

public class InterClusterDistances {
 
  public static void main(String args[]) throws Exception {
   
    String inputFile = "reuters-kmeans-clusters/clusters-6/part-r-00000";
   
    Configuration conf = new Configuration();
    Path path = new Path(inputFile);
    System.out.println("Input Path: " + path);
    FileSystem fs = FileSystem.get(path.toUri(), conf);
   
    List<Cluster> clusters = new ArrayList<Cluster>();
   
    SequenceFile.Reader reader = new SequenceFile.Reader(
        fs, path, conf);
    Writable key = (Writable) reader.getKeyClass()
        .newInstance();
    Writable value = (Writable) reader.getValueClass()
        .newInstance();
   
    while (reader.next(key, value)) {
      Cluster cluster = (Cluster) value;
      clusters.add(cluster);
      value = (Writable) reader.getValueClass()
          .newInstance();
    }
   
    DistanceMeasure measure = new CosineDistanceMeasure();
    double max = 0;
    double min = Double.MAX_VALUE;
    double sum = 0;
    int count = 0;
    for (int i = 0; i < clusters.size(); i++) {
      for (int j = i + 1; j < clusters.size(); j++) {
        double d = measure.distance(clusters.get(i)
            .getCenter(), clusters.get(j).getCenter());
        min = Math.min(d, min);
        max = Math.max(d, max);
        sum += d;
        count++;
      }
    }
   
    System.out.println("Maximum Intercluster Distance: "
                       + max);
    System.out.println("Minimum Intercluster Distance: "
                       + min);
    System.out
        .println("Average Intercluster Distance(Scaled): "
                 + (sum / count - min) / (max - min));
  }
}
TOP

Related Classes of mia.clustering.ch10.InterClusterDistances

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.