Package edu.ucla.sspace.matrix

Source Code of edu.ucla.sspace.matrix.CorrelationTransform

/*
* Copyright 2009 Keith Stevens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.matrix;

import edu.ucla.sspace.matrix.MatrixIO.Format;
import edu.ucla.sspace.matrix.TransformStatistics.MatrixStatistics;

import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.SparseVector;

import java.io.File;


/**
* Transforms a matrix using row correlation weighting.  The input matrix is
* assumed to be formatted as rows representing terms and columns representing
* co-occuring terms.  Each matrix cell indicates the number of times the row's
* word occurs the other term.  See the following paper for details and
* analysis:
*
* <p style="font-family:Garamond, Georgia, serif"> Rohde, D. L. T., Gonnerman,
* L. M., Plaut, D. C. (2005).  An Improved Model of Semantic Similarity Based
* on Lexical Co-Occurrence. <i>Cognitive Science</i> <b>(submitted)</b>.
* Available <a
* href="http://www.cnbc.cmu.edu/~plaut/papers/pdf/RohdeGonnermanPlautSUB-CogSci.COALS.pdf">here</a></p>

* @author Keith Stevens
*/
public class CorrelationTransform extends BaseTransform {

    /**
     * {@inheritDoc}
     */
    protected GlobalTransform getTransform(File inputMatrixFile,
                                           MatrixIO.Format format) {
        return new CorrelationGlobalTransform(inputMatrixFile, format);
    }
   
    /**
     * {@inheritDoc}
     */
    protected GlobalTransform getTransform(Matrix matrix) {
        return new CorrelationGlobalTransform(matrix);
    }

    /**
     * Returns the name of this transform.
     */
    public String toString() {
        return "Correlation";
    }

    public class CorrelationGlobalTransform implements GlobalTransform {

        /**
         * The summation of the values each row
         */
        private double[] rowSums;

        /**
         * The summation of the values each column
         */
        private double[] colSums;

        /**
         * The total sum of all values in the matrix.
         */
        private double totalSum;

        /**
         * Creates an instance of {@code CorrelationTransform} from a {@link
         * Matrix}.
         */
        public CorrelationGlobalTransform(Matrix matrix) {
            MatrixStatistics stats =
                TransformStatistics.extractStatistics(matrix);
            rowSums = stats.rowSums;
            colSums = stats.columnSums;
            totalSum = stats.matrixSum;
        }

        /**
         * Creates an instance of {@code CorrelationTransform} from a {@code
         * File} for format {@link Format}.
         */
        public CorrelationGlobalTransform(File inputMatrixFile,
                                          Format format) {
            MatrixStatistics stats =
                TransformStatistics.extractStatistics(inputMatrixFile, format);
            rowSums = stats.rowSums;
            colSums = stats.columnSums;
            totalSum = stats.matrixSum;
        }

        /**
         * Computes the correlation, scaled using the square root, between item
         * {@code row} and feature {@code column} where {@code value} specifies
         * the number of occurances.   If {@code value} is zero, the correlation
         * is zero.
         *
         * @param row The index specifying the item being observed
         * @param column The index specifying the feature being observed
         * @param value The number of occurance of the item and feature
         *
         * @return the square root of the correlation between the item aand
         *         feature
         */
        public double transform(int row, int column, double value) {
            if (value == 0d)
                return 0;

            double newValue =
                (totalSum * value - rowSums[row] * colSums[column]) /
                Math.sqrt(rowSums[row] * (totalSum - rowSums[row]) *
                        colSums[column] * (totalSum - colSums[column]));
            return (newValue > 0) ? Math.sqrt(newValue) : 0;
        }

        /**
         * Computes the correlation, scaled using the square root, between item
         * {@code row} and feature {@code column} where {@code value} specifies
         * the number of occurances.   If {@code value} is zero, the correlation
         * is zero.
         *
         * @param row The index specifying the item being observed
         * @param column The index specifying the feature being observed
         * @param value The number of occurance of the item and feature
         *
         * @return the square root of the correlation between the item aand
         *         feature
         */
        public double transform(int row, DoubleVector column) {
            double value = column.get(row);
            if (value == 0d)
                return 0;

            // Calcuate the term frequencies in this new document
            double colSum = 0;
            if (column instanceof SparseVector) {
                SparseVector sv = (SparseVector)column;
                for (int nz : sv.getNonZeroIndices())
                    colSum += column.get(nz);
            }
            else {
                int length = column.length();
                for (int i = 0; i < length; ++i)
                    colSum += column.get(i);
            }

            double newValue =
                (totalSum * value - rowSums[row] * colSum) /
                Math.sqrt(rowSums[row] * (totalSum - rowSums[row]) *
                        colSum * (totalSum - colSum));
            return (newValue > 0) ? Math.sqrt(newValue) : 0;
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.matrix.CorrelationTransform

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.