package ch.akuhn.isomap;
import ch.akuhn.graph.DijkstraAlgorithm2;
import ch.akuhn.graph.Graph;
import ch.akuhn.matrix.DenseMatrix;
import ch.akuhn.matrix.Function;
import ch.akuhn.matrix.SymmetricMatrix;
import ch.akuhn.matrix.eigenvalues.Eigenvalues;
import ch.akuhn.org.ggobi.plugins.ggvis.Points;
/** Embeds n-dimensional data in two dimensions.
*<P>
* The Isomap algorithm takes as input the distances d_x(i,j) between all
* pairs i, j from N data points in the high-dimensional input space X,
* measured either in the standard Euclidean metric or in some domain-specific
* metric. The algorithm outputs coordinate vectors y_i in a d-dimensional
* Euclidean space Y that best represent the intrinsic geometry of the data.
* The only free parameter (e or k) appears in Step 1.
*
* @see http://waldron.stanford.edu/~isomap/
* @see http://www.sciencemag.org/cgi/content/full/290/5500/2319
*
*/
public abstract class Isomap {
public double e = 0.2;
public int k = 3;
public boolean useKNN = true;
public final int n;
private DenseMatrix graph;
private Points points;
public Isomap(int size) {
this.n = size;
}
protected abstract double dist(int i, int j);
public Points getPoints() {
return points;
}
/** Step 1: Construct neighborhood graph. Define the graph G over all data
* points by connecting points i and j if [as measured by d_x(i,j)] they
* are closer than e (e-Isomap), or if i is one of the k nearest neighbors
* of j (k-Isomap). Set edge lengths equal to d_x(i,j).
*
*/
public void constructNeighborhoodGraph() {
if (useKNN) {
constructKayNearestNeighborhoodGraph();
} else {
constructCloserThanEpsilonNeighborhoodGraph();
}
}
private void constructCloserThanEpsilonNeighborhoodGraph() {
graph = new SymmetricMatrix(n);
graph.fill(Double.POSITIVE_INFINITY);
for (int i = 0; i < n; i++) {
for (int j = 0; j < i; j++) {
if (dist(i,j) < e) graph.put(i, j, e);
}
}
}
private void constructKayNearestNeighborhoodGraph() {
graph = new SymmetricMatrix(n);
graph.fill(Double.POSITIVE_INFINITY);
for (int i = 0; i < n; i++) {
double[] data = new double[n];
for (int j = 0; j < n; j++) data[j] = dist(i,j);
int[] index = indicesOfMinima(data, k+1);
for (int k = 0; k < index.length; k++) {
if (i == index[k]) continue;
graph.put(i,index[k],dist(i,index[k]));
}
}
}
static int[] indicesOfMinima(double[] ds, int k) {
if (ds.length <= k) return range(ds.length);
int[] index = new int[k];
for (int n = 0; n < index.length; n++) {
int i = indexOfMinimum(ds);
index[n] = i;
ds[i] = Double.POSITIVE_INFINITY;
}
return index;
}
private static int[] range(int k) {
int[] range = new int[k];
for (int n = 0; n < range.length; n++) range[n] = n;
return range;
}
static int indexOfMinimum(double[] ds) {
assert ds.length > 0;
int index = 0;
double min = ds[0];
for (int n = 0; n < ds.length; n++) {
if (ds[n] < min) {
index = n;
min = ds[n];
}
}
return index;
}
/** Step 2: Compute shortest path. Initialize d_G(i,j) = d_X(i,j) if i, j are
* linked by an edge; d_G(i,j) = infinity otherwise. Then for each value of
* k in 1, 2, ..., N in turn, replace all entries d_G(i,j) by min { d_G(i,j),
* d_G(i,k) + d_G(k,j)}. The matrix of final values D_G = { d_G(i,j) } will
* contain the shortest path distances between all pairs of points in G.
*
*/
public void computeShortestPathWithBruteForce() {
for (int k0 = 0; k0 < n; k0++) {
for (int i = 0; i < n; i++) {
double path_i_k = graph.get(i,k0);
for (int j = 0; j < n; j++) {
graph.put(i,j, Math.min(graph.get(i,j), path_i_k + graph.get(k0,j)));
}
}
}
}
public void computeShortestPathWithDijkstra() {
Graph g = new Graph(graph);
DijkstraAlgorithm2 dijsktra = new DijkstraAlgorithm2();
for (int i = 0; i < n; i++) {
/* NB: it is safe to update graph and use it for distances
* in the same time since we only lookup the distances of
* directly connected nodes, which are not updated.
*/
double[] cost = dijsktra.apply(g, g.nodes[i], graph);
for (int j = 0; j < n; j++) graph.put(i,j, cost[j]);
}
}
public boolean[][] getEdges() {
boolean[][] edges = new boolean[graph.rowCount()][];
for (int i = 0; i < edges.length; i++) edges[i] = new boolean[i];
for (int i = 0; i < edges.length; i++) {
for (int j = 0; j < i; j++) {
edges[i][j] = !Double.isInfinite(graph.get(i,j));
}
}
return edges;
}
/** Step 3: Construct d-dimensional embedding. Let λ_p be the p-th eigenvalue
* (in decreasing order) of the matrix tau(D_G), and v^i_p be the i-th component
* of the p-th eigenvector. Then set the p-th component of the d-dimensional
* coordinate vector y_i equal to sqrt of λ_p * v^i_p.
*
*/
public void constructDeeDimensionalEmbedding() {
this.applyTauOperator();
Eigenvalues eigen = Eigenvalues.of(graph).largest(2);
eigen.run();
points = new Points(n);
for (int i = 0; i < n; i++) {
points.x[i] = Math.sqrt(eigen.value[0]) * eigen.vector[0].get(i);
points.y[i] = Math.sqrt(eigen.value[1]) * eigen.vector[1].get(i);
}
}
/** Where the operator tau is defined by tau(D) = -HSH/2, where S is the matrix
* of squared distances { s_ij = d_ij^2 }, and H is the "centering matrix"
* { H_ij = d_ij - 1/N }.
*<P>
* This leaves open whether the centering matrix of D or S is used, and why they
* refer to a delta in the centering matrix instead of using unit matrix minus
* reciprocal N. However, in their matlab script, the isomap authors use
*<PRE>-.5*(D.^2 - sum(D.^2)'*ones(1,N)/N - ones(N,1)*sum(D.^2)/N + sum(sum(D.^2))/(N^2))</PRE>
* so I went with the same: first squaring the matrix, then subtracting column-
* and row-wise mean and adding global mean, and eventually divide by minus two.
*/
private void applyTauOperator() {
graph.apply(Function.SQUARE);
double mean = graph.mean();
double[] columnMean = graph.rowwiseMean();
int N = graph.columnCount();
for (int i = 0; i < N; i++) {
for (int j = 0; j < i; j++) {
graph.add(i,j,mean - columnMean[i] - columnMean[j]);
}
}
graph.applyMultiplication(-0.5);
}
public void run() {
if (n > 1) {
this.constructNeighborhoodGraph();
this.computeShortestPathWithDijkstra();
this.constructDeeDimensionalEmbedding();
}
else {
points = new Points(n);
}
graph = null;
}
}