Package de.jungblut.clustering.mapreduce

Source Code of de.jungblut.clustering.mapreduce.KMeansReducer

package de.jungblut.clustering.mapreduce;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.Reducer;

import de.jungblut.clustering.model.ClusterCenter;
import de.jungblut.clustering.model.VectorWritable;
import de.jungblut.math.DoubleVector;

// calculate a new clustercenter for these vertices
@SuppressWarnings("deprecation")
public class KMeansReducer extends
    Reducer<ClusterCenter, VectorWritable, ClusterCenter, VectorWritable> {

  public static enum Counter {
    CONVERGED
  }

  private final List<ClusterCenter> centers = new ArrayList<>();

  @Override
  protected void reduce(ClusterCenter key, Iterable<VectorWritable> values,
      Context context) throws IOException, InterruptedException {

    List<VectorWritable> vectorList = new ArrayList<>();
    DoubleVector newCenter = null;
    for (VectorWritable value : values) {
      vectorList.add(new VectorWritable(value));
      if (newCenter == null)
        newCenter = value.getVector().deepCopy();
      else
        newCenter = newCenter.add(value.getVector());
    }

    newCenter = newCenter.divide(vectorList.size());
    ClusterCenter center = new ClusterCenter(newCenter);
    centers.add(center);
    for (VectorWritable vector : vectorList) {
      context.write(center, vector);
    }

    if (center.converged(key))
      context.getCounter(Counter.CONVERGED).increment(1);

  }

  @Override
  protected void cleanup(Context context) throws IOException,
      InterruptedException {
    super.cleanup(context);
    Configuration conf = context.getConfiguration();
    Path outPath = new Path(conf.get("centroid.path"));
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outPath, true);
    try (SequenceFile.Writer out = SequenceFile.createWriter(fs,
        context.getConfiguration(), outPath, ClusterCenter.class,
        IntWritable.class)) {
      final IntWritable value = new IntWritable(0);
      for (ClusterCenter center : centers) {
        out.append(center, value);
      }
    }
  }
}
TOP

Related Classes of de.jungblut.clustering.mapreduce.KMeansReducer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.