Source Code of edu.ucla.sspace.matrix.CorrelationTransform

/*
 * Copyright 2009 Keith Stevens
 *
 * This file is part of the S-Space package and is covered under the terms and
 * conditions therein.
 *
 * The S-Space package is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation and distributed hereunder to you.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
 * EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
 * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
 * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
 * RIGHTS.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */


package edu.ucla.sspace.matrix;


import edu.ucla.sspace.matrix.MatrixIO.Format;
import edu.ucla.sspace.matrix.TransformStatistics.MatrixStatistics;


import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.SparseVector;


import java.io.File;




/**
 * Transforms a matrix using row correlation weighting.  The input matrix is
 * assumed to be formatted as rows representing terms and columns representing
 * co-occuring terms.  Each matrix cell indicates the number of times the row's
 * word occurs the other term.  See the following paper for details and
 * analysis:
 *
 * <p style="font-family:Garamond, Georgia, serif"> Rohde, D. L. T., Gonnerman,
 * L. M., Plaut, D. C. (2005).  An Improved Model of Semantic Similarity Based
 * on Lexical Co-Occurrence. <i>Cognitive Science</i> <b>(submitted)</b>.
 * Available <a
 * href="http://www.cnbc.cmu.edu/~plaut/papers/pdf/RohdeGonnermanPlautSUB-CogSci.COALS.pdf">here</a></p>


 * @author Keith Stevens
 */
public class CorrelationTransform extends BaseTransform {


    /**
     * {@inheritDoc}
     */
    protected GlobalTransform getTransform(File inputMatrixFile,
                                           MatrixIO.Format format) {
        return new CorrelationGlobalTransform(inputMatrixFile, format);
    }
    
    /**
     * {@inheritDoc}
     */
    protected GlobalTransform getTransform(Matrix matrix) {
        return new CorrelationGlobalTransform(matrix);
    }


    /**
     * Returns the name of this transform.
     */
    public String toString() {
        return "Correlation";
    }


    public class CorrelationGlobalTransform implements GlobalTransform {


        /**
         * The summation of the values each row
         */
        private double[] rowSums;


        /**
         * The summation of the values each column 
         */
        private double[] colSums;


        /**
         * The total sum of all values in the matrix.
         */
        private double totalSum;


        /**
         * Creates an instance of {@code CorrelationTransform} from a {@link
         * Matrix}.
         */
        public CorrelationGlobalTransform(Matrix matrix) {
            MatrixStatistics stats =
                TransformStatistics.extractStatistics(matrix);
            rowSums = stats.rowSums;
            colSums = stats.columnSums;
            totalSum = stats.matrixSum;
        }


        /**
         * Creates an instance of {@code CorrelationTransform} from a {@code
         * File} for format {@link Format}.
         */
        public CorrelationGlobalTransform(File inputMatrixFile,
                                          Format format) {
            MatrixStatistics stats =
                TransformStatistics.extractStatistics(inputMatrixFile, format);
            rowSums = stats.rowSums;
            colSums = stats.columnSums;
            totalSum = stats.matrixSum;
        }


        /**
         * Computes the correlation, scaled using the square root, between item
         * {@code row} and feature {@code column} where {@code value} specifies
         * the number of occurances.   If {@code value} is zero, the correlation
         * is zero.
         *
         * @param row The index specifying the item being observed
         * @param column The index specifying the feature being observed
         * @param value The number of occurance of the item and feature
         *
         * @return the square root of the correlation between the item aand
         *         feature
         */
        public double transform(int row, int column, double value) {
            if (value == 0d)
                return 0;


            double newValue = 
                (totalSum * value - rowSums[row] * colSums[column]) / 
                Math.sqrt(rowSums[row] * (totalSum - rowSums[row]) *
                        colSums[column] * (totalSum - colSums[column]));
            return (newValue > 0) ? Math.sqrt(newValue) : 0;
        }


        /**
         * Computes the correlation, scaled using the square root, between item
         * {@code row} and feature {@code column} where {@code value} specifies
         * the number of occurances.   If {@code value} is zero, the correlation
         * is zero.
         *
         * @param row The index specifying the item being observed
         * @param column The index specifying the feature being observed
         * @param value The number of occurance of the item and feature
         *
         * @return the square root of the correlation between the item aand
         *         feature
         */
        public double transform(int row, DoubleVector column) {
            double value = column.get(row);
            if (value == 0d)
                return 0;


            // Calcuate the term frequencies in this new document
            double colSum = 0;
            if (column instanceof SparseVector) {
                SparseVector sv = (SparseVector)column;
                for (int nz : sv.getNonZeroIndices())
                    colSum += column.get(nz);
            }
            else {
                int length = column.length();
                for (int i = 0; i < length; ++i)
                    colSum += column.get(i);
            }


            double newValue = 
                (totalSum * value - rowSums[row] * colSum) / 
                Math.sqrt(rowSums[row] * (totalSum - rowSums[row]) *
                        colSum * (totalSum - colSum));
            return (newValue > 0) ? Math.sqrt(newValue) : 0;
        }
    }
}
Source Code of edu.ucla.sspace.matrix.CorrelationTransform

Related Classes of edu.ucla.sspace.matrix.CorrelationTransform