Package org.apache.mahout.clustering.spectral.eigencuts

Source Code of org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.clustering.spectral.eigencuts;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.clustering.spectral.common.VertexWritable;
import org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob.EigencutsAffinityCutsCombiner;
import org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob.EigencutsAffinityCutsMapper;
import org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob.EigencutsAffinityCutsReducer;
import org.apache.mahout.common.DummyRecordWriter;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.junit.Test;

/**
* <p>Tests the Eigencuts affinity matrix "cut" ability, the core functionality
* of the algorithm responsible for making the clusterings.</p>
*
* <p>Due to the complexity of this section, and the amount of data required,
* there are three steps: the mapper essentially reads in the affinity/cut
* matrices and creating "vertices" of points, the combiner performs the
* actual checks on the sensitivities and zeroes out the necessary affinities,
* and at last the reducer reforms the affinity matrix.</p>
*/
public class TestEigencutsAffinityCutsJob extends MahoutTestCase {
 
  private final double [][] affinity = { {0, 10, 2, 1}, {10, 0, 2, 2},
                                  {2, 2, 0, 10}, {1, 2, 10, 0} };
  private final double [][] sensitivity = { {0, 0, 1, 1}, {0, 0, 1, 1},
                                {1, 1, 0, 0}, {1, 1, 0, 0} };

  /**
   * Testing the mapper is fairly straightforward: there are two matrices
   * to be processed simultaneously (cut matrix of sensitivities, and the
   * affinity matrix), and since both are symmetric, two entries from each
   * will be grouped together with the same key (or, in the case of an
   * entry along the diagonal, only two entries).
   *
   * The correct grouping of these quad or pair vertices is the only
   * output of the mapper.
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsMapper() throws Exception {
    EigencutsAffinityCutsMapper mapper = new EigencutsAffinityCutsMapper();
    Configuration conf = new Configuration();
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);
   
    // set up the writer
    DummyRecordWriter<Text, VertexWritable> writer =
      new DummyRecordWriter<Text, VertexWritable>();
    Mapper<IntWritable, VectorWritable, Text, VertexWritable>.Context context =
      DummyRecordWriter.build(mapper, conf, writer);
   
    // perform the maps
    for (int i = 0; i < this.affinity.length; i++) {
      VectorWritable aff = new VectorWritable(new DenseVector(this.affinity[i]));
      VectorWritable sens = new VectorWritable(new DenseVector(this.sensitivity[i]));
      IntWritable key = new IntWritable(i);
      mapper.map(key, aff, context);
      mapper.map(key, sens, context);
    }
   
    // were the vertices constructed correctly? if so, then for two 4x4
    // matrices, there should be 10 unique keys with 56 total entries
    assertEquals("Number of keys", 10, writer.getKeys().size());
    for (int i = 0; i < this.affinity.length; i++) {
      for (int j = 0; j < this.affinity.length; j++) {
        Text key = new Text(Math.max(i, j) + "_" + Math.min(i,j));
        List<VertexWritable> values = writer.getValue(key);
       
        // if we're on a diagonal, there should only be 2 entries
        // otherwise, there should be 4
        if (i == j) {
          assertEquals("Diagonal entry", 2, values.size());
          for (VertexWritable v : values) {
            assertFalse("Diagonal values are zero", v.getValue() > 0);
          }
        } else {
          assertEquals("Off-diagonal entry", 4, values.size());
          if (i + j == 3) { // all have values greater than 0
            for (VertexWritable v : values) {
              assertTrue("Off-diagonal non-zero entries", v.getValue() > 0);
            }
          }
        }
      }
    }
  }
 
  /**
   * This is by far the trickiest step. However, an easy condition is if
   * we have only two vertices - indicating vertices on the diagonal of the
   * two matrices - then we simply exit (since the algorithm does not operate
   * on the diagonal; it makes no sense to perform cuts by isolating data
   * points from themselves).
   *
   * If there are four points, then first we must separate the two which
   * belong to the affinity matrix from the two that are sensitivities. In theory,
   * each pair should have exactly the same value (symmetry). If the sensitivity
   * is below a certain threshold, then we set the two values of the affinity
   * matrix to 0 (but not before adding the affinity values to the diagonal, so
   * as to maintain the overall sum of the row of the affinity matrix).
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsCombiner() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);
   
    // since we need the working paths to distinguish the vertex types,
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);
    
    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> redWriter =
      new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context
      redContext = DummyRecordWriter.build(combiner, conf, redWriter, Text.class,
      VertexWritable.class);
   
    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
      combiner.reduce(entry.getKey(), entry.getValue(), redContext);
    }
   
    // test the number of cuts, there should be 2
    assertEquals("Number of cuts detected", 4,
        redContext.getCounter(EigencutsAffinityCutsJob.CUTSCOUNTER.NUM_CUTS).getValue());
   
    // loop through all the results; let's see if they match up to our
    // affinity matrix (and all the cuts appear where they should
    Map<Text, List<VertexWritable>> results = redWriter.getData();
    for (Map.Entry<Text, List<VertexWritable>> entry : results.entrySet()) {
      List<VertexWritable> row = entry.getValue();
      IntWritable key = new IntWritable(Integer.parseInt(entry.getKey().toString()));
     
      double calcDiag = 0.0, trueDiag = sumOfRowCuts(key.get(), this.sensitivity);
      for (VertexWritable e : row) {

        // should the value have been cut, e.g. set to 0?
        if (key.get() == e.getCol()) {
          // we have our diagonal
          calcDiag += e.getValue();
        } else if (this.sensitivity[key.get()][e.getCol()] == 0.0) {
          // no, corresponding affinity should have same value as before
          assertEquals("Preserved affinity value",
              this.affinity[key.get()][e.getCol()], e.getValue(),EPSILON);
        } else {
          // yes, corresponding affinity value should be 0
          assertEquals("Cut affinity value", 0.0, e.getValue(),EPSILON);
        }
      }
      // check the diagonal has the correct sum
      assertEquals("Diagonal sum from cuts", trueDiag, calcDiag,EPSILON);
    }
  }
 
  /**
   * Fairly straightforward: the task here is to reassemble the rows of the
   * affinity matrix. The tricky part is that any specific element in the list
   * of elements which does NOT lay on the diagonal will be so because it
   * did not drop below the sensitivity threshold, hence it was not "cut".
   *
   * On the flip side, there will be many entries whose coordinate is now
   * set to the diagonal, indicating they were previously affinity entries
   * whose sensitivities were below the threshold, and hence were "cut" -
   * set to 0 at their original coordinates, and had their values added to
   * the diagonal entry (hence the numerous entries with the coordinate of
   * the diagonal).
   *
   * @throws Exception
   */
  @Test
  public void testEigencutsAffinityCutsReducer() throws Exception {
    Configuration conf = new Configuration();
    Path affinity = new Path("affinity");
    Path sensitivity = new Path("sensitivity");
    conf.set(EigencutsKeys.AFFINITY_PATH, affinity.getName());
    conf.setInt(EigencutsKeys.AFFINITY_DIMENSIONS, this.affinity.length);
   
    // since we need the working paths to distinguish the vertex types,
    // we can't use the mapper (since we have no way of manually setting
    // the Context.workingPath() )
    Map<Text, List<VertexWritable>> data = buildMapData(affinity, sensitivity, this.sensitivity);
    
    // now, set up the combiner
    EigencutsAffinityCutsCombiner combiner = new EigencutsAffinityCutsCombiner();
    DummyRecordWriter<Text, VertexWritable> comWriter =
      new DummyRecordWriter<Text, VertexWritable>();
    Reducer<Text, VertexWritable, Text, VertexWritable>.Context
      comContext = DummyRecordWriter.build(combiner, conf, comWriter, Text.class,
      VertexWritable.class);
   
    // perform the combining
    for (Map.Entry<Text, List<VertexWritable>> entry : data.entrySet()) {
      combiner.reduce(entry.getKey(), entry.getValue(), comContext);
    }
   
    // finally, set up the reduction writers
    EigencutsAffinityCutsReducer reducer = new EigencutsAffinityCutsReducer();
    DummyRecordWriter<IntWritable, VectorWritable> redWriter = new
      DummyRecordWriter<IntWritable, VectorWritable>();
    Reducer<Text, VertexWritable, IntWritable, VectorWritable>.Context
      redContext = DummyRecordWriter.build(reducer, conf, redWriter,
      Text.class, VertexWritable.class);
   
    // perform the reduction
    for (Text key : comWriter.getKeys()) {
      reducer.reduce(key, comWriter.getValue(key), redContext);
    }
   
    // now, check that the affinity matrix is correctly formed
    for (IntWritable row : redWriter.getKeys()) {
      List<VectorWritable> results = redWriter.getValue(row);
      // there should only be 1 vector
      assertEquals("Only one vector with a given row number", 1, results.size());
      Vector therow = results.get(0).get();
      for (Vector.Element e : therow) {
        // check the diagonal
        if (row.get() == e.index()) {
          assertEquals("Correct diagonal sum of cuts", sumOfRowCuts(row.get(),
              this.sensitivity), e.get(),EPSILON);
        } else {
          // not on the diagonal...if it was an element labeled to be cut,
          // it should have a value of 0. Otherwise, it should have kept its
          // previous value
          if (this.sensitivity[row.get()][e.index()] == 0.0) {
            // should be what it was originally
            assertEquals("Preserved element", this.affinity[row.get()][e.index()], e.get(), EPSILON);
          } else {
            // should be 0
            assertEquals("Cut element", 0.0, e.get(), EPSILON);
          }
        }
      }
    }
  }
 
  /**
   * Utility method for simulating the Mapper behavior.
   * @param affinity
   * @param sensitivity
   * @param array
   * @return
   */
  private Map<Text, List<VertexWritable>> buildMapData(Path affinity,
      Path sensitivity, double [][] array) {
    Map<Text, List<VertexWritable>> map = new HashMap<Text, List<VertexWritable>>();
    for (int i = 0; i < this.affinity.length; i++) {
      for (int j = 0; j < this.affinity[i].length; j++) {
        Text key = new Text(Math.max(i, j) + "_" + Math.min(i, j));
        List<VertexWritable> toAdd = new ArrayList<VertexWritable>();
        if (map.containsKey(key)) {
          toAdd = map.get(key);
          map.remove(key);
        }
        toAdd.add(new VertexWritable(i, j, this.affinity[i][j], affinity.getName()));
        toAdd.add(new VertexWritable(i, j, array[i][j], sensitivity.getName()));
        map.put(key, toAdd);
      }
    }
    return map;
  }
 
  /**
   * Utility method for calculating the new diagonal on the specified row of the
   * affinity matrix after a single iteration, given the specified cut matrix
   * @param row
   * @param cuts
   * @return
   */
  private double sumOfRowCuts(int row, double [][] cuts) {
    double retval = 0.0;
    for (int j = 0; j < this.affinity[row].length; j++) {
      if (cuts[row][j] != 0.0) {
        retval += this.affinity[row][j];
      }
    }
    return retval;
  }
}
TOP

Related Classes of org.apache.mahout.clustering.spectral.eigencuts.TestEigencutsAffinityCutsJob

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.