Source Code of ch.akuhn.linalg.SVD

package ch.akuhn.linalg;


import ch.akuhn.edu.mit.tedlab.SMat;
import ch.akuhn.edu.mit.tedlab.SVDRec;
import ch.akuhn.edu.mit.tedlab.Svdlib;
import ch.akuhn.matrix.SparseMatrix;


/** Singular value decomposition of matrix A.
 *<P> 
 * Given a term-document matrix A, with dimension m on n, the decomposition consists of
 * S = a diagonal matrix with the k largest singular values, 
 * U = m left singular vectors of length k, one for each document, and
 * V = n right singular vectors of length k, one for each term.
 * The matrix A' = U * S * V^T is the closest rank k approximation of of matrix A
 * [Eckard and Young].
 *<P>
 * Instances of this class are immutable. 
 * 
 * @author Adrian Kuhn, 2008-2009
 *
 */
public class SVD {


    public final double[] s;
    public final double[][] U; // terms
    public final double[][] V; // documents




    public SVD(double[] s, double[][] U, double[][] V) {
        this.s = s;
        this.U = U;
        this.V = V; 
        assert invariant();
    }


    private boolean invariant() {
        if (s == null || U == null || V == null ) return false;
        if (s.length > U.length || s.length > V.length) return false;
        for (int n = 0; n < U.length; n++) if (s.length != U[n].length) return false;
        for (int n = 0; n < V.length; n++) if (s.length != V[n].length) return false;
        return true;
    }




    public SVD(SparseMatrix matrix, int dimensions) {
        if (matrix.rowCount() == 0 || matrix.columnCount() == 0) {
            s = new double[0];
            U = new double[matrix.rowCount()][0];
            V = new double[matrix.columnCount()][0];
        }
        else {
            SMat input = makeSMat(matrix);
            SVDRec r = new Svdlib().svdLAS2(input, dimensions, 0, new double[] { -1e-30, 1e-30 }, 1e-6);
            s = r.S;
            U = transpose(r.Ut.value);
            V = transpose(r.Vt.value);
        }
        assert invariant();
    }


    /*default*/ static final double[][] append(double[][] data, double[] values) {
        assert data.length == 0 || data[0].length == values.length;
        double[][] result = new double[data.length + 1][];
        System.arraycopy(data, 0, result, 0, data.length);
        result[data.length] = values;
        return result;
    }


    /*default*/ static final double[][] replace(double[][] data, int index, double[] values) {
        assert 0 <= index && index < data.length;
        assert data[index].length == values.length;
        double[][] result = new double[data.length][];
        System.arraycopy(data, 0, result, 0, data.length);
        result[index] = values;
        return result;
    }


    /*default*/ static final double[][] remove(double[][] data, int index) {
        assert 0 <= index && index < data.length;
        double[][] result = new double[data.length - 1][];
        System.arraycopy(data, 0, result, 0, index);
        System.arraycopy(data, index + 1, result, index, data.length - index - 1);
        return result;
    }




    /** Returns a copy with additional term.
     * 
     * @param u new term vector, length k.
     * @return new copy of this instance.
     */
    public SVD withAppendU(double[] u) {
        assert u.length == s.length;
        return new SVD(s, append(U, u), V);
    }


    /** Returns a copy with additional document.
     * 
     * @param v new document vector, length k.
     * @return new copy of this instance.
     */
    public SVD withAppendV(double[] v) {
        assert v.length == s.length;
        return new SVD(s, U, append(V, v));
    }


    /** Returns the rank of this singular value decomposition.
     * 
     * @return a positive integer.
     */
    public int getRank() {
        return s.length;
    }


    private double[] project(double[][] data, double[] weightings) {
        if (s.length == 0) return new double[] { };
        assert weightings.length == data.length;
        double[] result = new double[s.length];
        for (int n = 0; n < weightings.length; n++) {
            double weight = weightings[n];
            if (weight == 0) continue;
            double[] dn = data[n];
            for (int k = 0; k < s.length; k++) {
                result[k] += weight * dn[k];
            }
        }
        for (int k = 0; k < s.length; k++) {
            result[k] /= s[k];
        }
        return result;
    }


    /** Places a term at the weighted sum of its documents.
     * 
     * @param weightings document weight vector, length = m.
     * @return pseudo term vector.
     */
    public double[] makePseudoU(double[] weightings) {
        return project(V, weightings);
    }


    /** Places a document at the weighted sum of its terms.
     * 
     * @param weightings
     * @return
     */
    public double[] makePseudoV(double[] weightings) {
        return project(U, weightings);
    }


    /*default*/ static final SMat makeSMat(SparseMatrix matrix) {
        SMat S = new SMat(matrix.rowCount(), matrix.columnCount(), matrix.used());
        for (int j = 0, n = 0; j < matrix.columnCount(); j++) {
            S.pointr[j] = n;
            for (int i = 0; i < matrix.rowCount(); i++)
                if (matrix.get(i, j) != 0) {
                    S.rowind[n] = i;
                    S.value[n] = matrix.get(i, j);
                    n++;
                }
        }
        S.pointr[S.cols] = S.vals;
        return S;
    }


    private double similaritySqrt(double[] a, double[] b) {
        double sim = 0;
        double suma = 0;
        double sumb = 0;
        for (int k = 0; k < b.length; k++) {
            double s2 = s[k]; 
            sim += a[k] * b[k] * s2;
            suma += a[k] * a[k] * s2;
            sumb += b[k] * b[k] * s2;
        }
        return (sim / (Math.sqrt(suma) * Math.sqrt(sumb)));
    }




    private double similarity(double[] a, double[] b) {
        double sim = 0;
        double suma = 0;
        double sumb = 0;
        for (int k = 0; k < b.length; k++) {
            double s2 = s[k] * s[k]; 
            sim += a[k] * b[k] * s2;
            suma += a[k] * a[k] * s2;
            sumb += b[k] * b[k] * s2;
        }
        return (sim / (Math.sqrt(suma) * Math.sqrt(sumb)));
    }


    /** Computes the similarity between i-th term and pseudo-term t.
     * 
     * @param i index of a term, range 0..m.
     * @param t a pseudo-term vector, length k
     * @return a cosine value, typically greater then zero.
     */
    public double similarityU(int i, double[] t) {
        if (s.length == 0) return Double.NaN;
        return similarity(U[i], t);
    }


    /** Computes the similarity between a-th and b-th term.
     * 
     * @param a index of a term, range 0..m.
     * @param b index of a term, range 0..m.
     * @return a cosine value, typically greater then zero.
     */
    public double similarityUU(int a, int b) {
        if (s.length == 0) return Double.NaN;
        return similarity(U[a], U[b]);
    }


    /** Computes the similarity between i-th term and j-th document.
     * 
     * @param i index of a term, range 0..m.
     * @param j index of a document, range 0..n.
     * @return a cosine value, typically greater than zero.
     */
    public double similarityUV(int i, int j) {
        if (s.length == 0) return Double.NaN;
        return similaritySqrt(U[i], V[j]);
    }


    /** Computes the similarity between j-th document and pseudo-document d.
     * 
     * @param j index of a document, range 0..n. 
     * @param d a pseudo-document vector, length k.
     * @return a cosine value, typically greater than zero.
     */
    public double similarityV(int j, double[] d) {
        if (s.length == 0) return Double.NaN;
        return similarity(V[j], d);
    }


    /** Computes the similarity between a-th and b-th document.
     * 
     * @param a index of a document, range 0..n.
     * @param b index of a document, range 0..n.
     * @return a cosine value, typically greater then zero.
     */
    public double similarityVV(int a, int b) {
        if (s.length == 0) return Double.NaN;
        return similarity(V[a], V[b]);
    }


    public double euclidianVV(int a, int b) {
        if (s.length == 0) return Double.NaN;
        double[] aa = V[a];
        double[] bb = V[b];
        double sum = 0;
        for (int k = 0; k < bb.length; k++) {
            sum += (aa[k] - bb[k]) * (aa[k] - bb[k]);
        }
        return Math.sqrt(sum);
    }
    
    
    /*default*/ static final double[][] transpose(double[][] array) {
        if (array.length == 0) return new double[][] {{}};
        int width = array[0].length, height = array.length;
        double[][] t = new double[width][height];
        for (int n = 0; n < width; n++) {
            for (int m = 0; m < height; m++) {
                t[n][m] = array[m][n];
            }
        }
        return t;
    }


    /** Returns the decomposition of matrix A^T.
     * 
     * @return new copy of this instance.
     */
    public SVD transposed() {
        return new SVD(s, V, U); // swap U and V
    }


    /** Returns a copy, where one term is updated.
     * 
     * @param index index of a term, range 0..m.
     * @param values new term vector, length k.
     * @return new copy of this instance.
     */
    public SVD withReplaceU(int index, double[] values) {
        assert values.length == s.length;
        return new SVD(s, replace(U, index, values), V);
    }


    /** Returns a copy, where one document is updated.
     * 
     * @param index index of a document, range 0..n.
     * @param values new document vector, length k.
     * @return new copy of this instance.
     */
    public SVD withReplaceV(int index, double[] values) {
        assert values.length == s.length;
        return new SVD(s, U, replace(V, index, values));
    }


    /** Returns the number of right singular vectors in V,
     * that is the number of columns of A (m x n).
     * Given a term-document matrix, this is the number of documents.
     * 
     * @return the value of n.
     */
    public int columnCount() {
        return V.length;
    }


    /** Returns the number of left singular vectors in U,
     * that is the number of rows of A (m x n).
     * Given a term-document matrix, this is the number of terms.
     * 
     * @return the value of m.
     */
    public int rowCount() {
        return U.length;
    }


    /** Returns a copy, with selected documents only.
     * 
     * @param indices list of indices of documents, range 0..n.
     * @return a new copy of this instance.
     * 
     */
    public SVD withSelectV(int[] indices) {
        double[][] selectV = new double[indices.length][s.length];
        for (int n = 0; n < indices.length; n++) {
            for (int k = 0; k < s.length; k++) {
                selectV[n][k] = V[indices[n]][k];
            }
        }
        return new SVD(s, U, selectV);
    }


    /** Returns copy without given document.
     * 
     * @param doc index of a document, range 0..n.
     * @return new copy of this instance.
     */
    public SVD withoutV(int doc) {
        return new SVD(s, U, remove(V, doc));
    }




}
Source Code of ch.akuhn.linalg.SVD

Related Classes of ch.akuhn.linalg.SVD