Examples of DoubleVector


Examples of edu.ucla.sspace.vector.DoubleVector

                docVec.add(dim, 1d);
        }       
       
        // Transform the vector according to this instance's transform's state,
        // which should normalize the vector as the original vectors were.
        DoubleVector transformed = transform.transform(docVec);

        // Represent the document as a 1-column matrix       
        Matrix queryAsMatrix = new ArrayMatrix(1, numDims);
        for (int nz : docVec.getNonZeroIndices())
            queryAsMatrix.set(0, nz, transformed.get(nz));
       
        // Project the new document vector, d, by using
        //
        //   d * U_k * Sigma_k^-1
        //
        // where k is the dimensionality of the LSA space
       
        Matrix UtimesSigmaInv = null;
           
        // We cache the reuslts of the U_k * Sigma_k^-1 multiplication since
        // this will be the same for all projections.
        while (UtimesSigmaInv == null) {
            if (UtimesSigmaInvRef != null
                    && ((UtimesSigmaInv = UtimesSigmaInvRef.get()) != null))
                break;
           
            int rows = sigma.rows();
            double[] sigmaInv = new double[rows];
            for (int i = 0; i < rows; ++i)
                sigmaInv[i] = 1d / sigma.get(i, i);
            DiagonalMatrix sigmaInvMatrix = new DiagonalMatrix(sigmaInv);

            UtimesSigmaInv =
                Matrices.multiply(U, sigmaInvMatrix);
            // Update the field with the new reference to the precomputed matrix
            UtimesSigmaInvRef = new WeakReference<Matrix>(UtimesSigmaInv);
        }

        // Compute the resulting projected vector as a matrix
        Matrix result = Matrices.multiply(queryAsMatrix, UtimesSigmaInv);

        // Copy out the vector itself so that we don't retain a reference to the
        // matrix as a result of its getRowVector call, which isn't guaranteed
        // to return a copy.
        int cols = result.columns();
        DoubleVector projected = new DenseVector(result.columns());
        for (int i = 0; i < cols; ++i)
            projected.set(i, result.get(0, i));
        return projected;
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

    public SortedMultiMap<Double,String> getMostSimilar(
             Set<String> terms, int numberOfSimilarWords) {
        if (terms.isEmpty())
            return null;
        // Compute the mean vector for all the terms
        DoubleVector mean = new DenseVector(sspace.getVectorLength());
        int found = 0;
        for (String term : terms) {
            Vector v = sspace.getVector(term);
            if (v == null)
                info(LOGGER, "No vector for term " + term);
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

            return Matrices.asSparseMatrix(scaledVectors);
        } else {
            List<DoubleVector> scaledVectors =
                new ArrayList<DoubleVector>(matrix.rows());
            for (int r = 0; r < matrix.rows(); ++r) {
                DoubleVector v = matrix.getRowVector(r);
                scaledVectors.add(new ScaledDoubleVector(v, 1/v.magnitude()));
            }
            return Matrices.asMatrix(scaledVectors);
        }
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

        for (int i = 0; i < r; ++i)
            lockRow(i, c);

        double[][] m = new double[r][0];
        for (int i = 0; i < r; ++i) {
            DoubleVector row = getRowVector(i);
            // Ensure that we see a consistent length for all the rows
            if (row.length() != c)
                row = Vectors.subview(row, 0, c);
            m[i] = row.toArray();
        }

        for (int i = 0; i < r; ++i)
            unlockRow(i, c);
       
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

        if (transform == null)
            throw new IllegalStateException(
                "the initial matrix has not been transformed yet");
        // Create a new instance of that vector's type, which will contain the
        // updated values
        DoubleVector transformed = Vectors.instanceOf(column);
        int length = column.length();
        for (int row = 0; row < length; ++row) {
            double newValue = transform.transform(row, column);
            transformed.set(row, newValue);
        }
        return transformed;
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

        candidateClusters.add(first);

        // Loop through all remaining rows, either assigning them to the most
        // similar cluster, or splitting them off into their own cluster
        for (int r = 1; r < rows; ++r) {
            DoubleVector row = matrix.getRowVector(r);
            CandidateCluster mostSim = null;
            double highestSim = -1d;
            for (CandidateCluster cc : candidateClusters) {
                double sim = simFunc.sim(cc.centerOfMass(), row);
                if (sim > highestSim) {
                    mostSim = cc;
                    highestSim = sim;
                }                   
            }
           
            if (highestSim < similarityThreshold) {
                CandidateCluster cc = new CandidateCluster();
                cc.add(r, row);
                candidateClusters.add(cc);
            }
            else {
                mostSim.add(r, row);
            }
        }       

        /*
         * Generate the list of final clusters
         */
        List<CandidateCluster> finalClusters =
            new ArrayList<CandidateCluster>();

        for (CandidateCluster cc : candidateClusters) {
            if (cc.size() < minClusterSize)
                continue;
           
            double maxSim = -1;
            for (CandidateCluster cc2 : candidateClusters) {
                if (cc == cc2)
                    continue;
                double sim = simFunc.sim(cc.centerOfMass(), cc2.centerOfMass());
                if (sim > maxSim)
                    maxSim = sim;
            }
            if (maxSim < similarityThreshold)
                finalClusters.add(cc);
            // Compute the cluster cohesiveness for all clusters with sim >
            // threshold, adding the cluster with the highest to the final set
            else {
                CandidateCluster mostCohesive = null;
                double maxCohesiveness = -1;
                for (CandidateCluster cc2 : candidateClusters) {
                    if (cc == cc2)
                        continue;
                    double sim = simFunc.sim(cc.centerOfMass(), cc2.centerOfMass());
                    if (sim < similarityThreshold)
                        continue;

                    IntIterator iter = cc2.indices().iterator();
                    double similaritySum = 0;
                    while (iter.hasNext()) {
                        DoubleVector v = matrix.getRowVector(iter.next());
                        similaritySum += simFunc.sim(cc2.centerOfMass(), v);
                    }
                    double avgSim = similaritySum / cc2.size();
                   
                    if (avgSim > maxCohesiveness) {
                        maxCohesiveness = avgSim;
                        mostCohesive = cc2;
                    }
                }
                finalClusters.add(mostCohesive);
            }
        }

       
        /*
         * OPTIONAL STEP: if we're inducing the number of clusters, keep the set
         * of final clusters as is; otherwise, ensure that the size of the set
         * is equal to the requested number of clusters
         */
        // TODO!

        int foundClusters = finalClusters.size();

        /*
         * THIRD PASS: compute the similarity distribution
         */
        double[] similarities = new double[rows];
        int[] clusterAssignments = new int[rows];
        for (int r = 0; r < rows; ++r) {
            DoubleVector v = matrix.getRowVector(r);
            double highestSim = -1;
            int mostSim = -1;
            for (int j = 0; j < foundClusters; ++j) {
                CandidateCluster cc = finalClusters.get(j);
                double sim = simFunc.sim(v, cc.centerOfMass());
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

    /**
     * Generate a new random vector using a guassian distribution for each
     * value.
     */
    public synchronized DoubleVector generate() {
        DoubleVector termVector = new DenseVector(indexVectorLength);
        for (int i = 0; i < indexVectorLength; i++)
            termVector.set(i, mean + (randomGenerator.nextGaussian() * stdev));
        return termVector;
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

     * Generates a simple random vector.
     */
    private static DoubleVector generateInitialVector(int length,
                                                      double mean,
                                                      double std) {
        DoubleVector vector = new DenseVector(length);
        for (int i = 0; i < length; ++i) {
            double v = RANDOM.nextGaussian();
            v = std * v + mean;
            vector.set(i, v);
        }
        return vector;
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

    public DoubleVector generate() {
        if (generatedVectors.size() == vectorLength)
            throw new IllegalArgumentException(
                    "Too many vectors have been generated");

        DoubleVector vector =
            generateInitialVector(vectorLength, mean, std);
        for (DoubleVector otherVector : generatedVectors) {
            double uDotV = dotProduct(otherVector, vector);
            double uDotU = dotProduct(otherVector, otherVector);
            for (int i = 0; i < vectorLength; ++i) {
                double projection = otherVector.get(i) * uDotV / uDotU;
                vector.set(i, vector.get(i) - projection);
            }
        }
        generatedVectors.add(vector);
        return vector;
    }
View Full Code Here

Examples of edu.ucla.sspace.vector.DoubleVector

            new ArrayList<CandidateCluster>(kappa);

        for (int r = 0; r < rows; /* no auto-increment */) {
           
            for ( ; facilities.size() <= kappa && r < rows; ++r) {
                DoubleVector x = matrix.getRowVector(r);
               
                CandidateCluster closest = null;
                // Delta is ultimately assigned the lowest inverse-similarity
                // (distance) to any of the current facilities' center of mass
                double delta = Double.MAX_VALUE;
                for (CandidateCluster y : facilities) {
                    double similarity =
                        simFunc.sim(x, y.centerOfMass());
                    double invSim = invertSim(similarity);
                    if (invSim < delta) {
                        delta = invSim;
                        closest = y;
                    }
                }
               
                // Base case: If this is the first data point and there are no
                // other facilities
                //
                // Or if we surpass the probability of a new event occurring
                // (line 6)
                if (closest == null || Math.random() < delta / f) {
                    CandidateCluster fac = new CandidateCluster();
                    fac.add(r, x);
                    facilities.add(fac);
                }
                // Otherwise, add this data point to an existing facility
                else {
                    closest.add(r, x);
                }

            }

            // If we still have data points left to process (line 10:)
            if (r < rows) {
                // Check whether we have more than kappa clusters (line 11).
                // Kappa provides the upper bound on the clusters (facilities)
                // that are kept at any given time.  If there are more, then we
                // need to consolidate facilities
                while (facilities.size() > kappa) {
                    f *= beta;

                    int curNumFacilities = facilities.size();
                    List<CandidateCluster> consolidated =
                        new ArrayList<CandidateCluster>(kappa);
                    consolidated.add(facilities.get(0));
                    for (int j = 1; j < curNumFacilities; ++j) {
                        CandidateCluster x = facilities.get(j);
                        int pointsAssigned = x.size();
                        // Compute the similarity of this facility to all other
                        // consolidated facilities.  Delta represents the lowest
                        // inverse-similarity (distance) to another facility.
                        // See line 17 of the algorithm.
                        double delta = Double.MAX_VALUE;                       
                        CandidateCluster closest = null;
                        for (CandidateCluster y : consolidated) {
                            double similarity =
                                simFunc.sim(x.centerOfMass(), y.centerOfMass());
                            double invSim = invertSim(similarity);
                            if (invSim < delta) {
                                delta = invSim;
                                closest = y;
                            }
                        }
                        
                        // Use (pointsAssigned * delta / f) as a threshold for
                        // whether this facility could constitute a new event.
                        // If a random check is less than it, then we nominate
                        // this facilty to continue.
                        if (Math.random() < (pointsAssigned * delta) / f) {
                            consolidated.add(x);
                        }
                        // Otherwise, we consolidate the points in this
                        // community to the closest facility
                        else {
                            assert closest != null : "no closest facility";
                            closest.merge(x);
                        }
                    }
                    verbose(LOGGER, "Consolidated %d facilities down to %d",
                            facilities.size(), consolidated.size());
                    facilities = consolidated;
                }
            }
            // Once we have processed all of the items in the stream (line 23 of
            // algorithm), reduce the kappa clusters into k clusters.
            else {
                // Edge case for when we already have fewer facilities than we
                // need
                if (facilities.size() <= numClusters) {
                    verbose(LOGGER, "Had fewer facilities, %d, than the " +
                            "requested number of clusters %d",
                            facilities.size(), numClusters);

                    // There's no point in reducing the number of facilities
                    // further since we're under the desired amount, nor can we
                    // go back and increase the number of facilities since all
                    // the data has been seen at this point.  Therefore, just
                    // loop through the candidates and report their assignemnts.
                    Assignment[] assignments = new Assignment[rows];
                    int numFacilities = facilities.size();
                    for (int j = 0; j < numFacilities; ++j) {
                        CandidateCluster fac = facilities.get(j);
                        veryVerbose(LOGGER, "Facility %d had a center of mass at %s",
                                    j, fac.centerOfMass());
                       
                        int clusterId = j;
                        IntIterator iter = fac.indices().iterator();
                        while (iter.hasNext()) {
                            int row = iter.nextInt();
                            assignments[row] =
                                new HardAssignment(clusterId);
                        }
                    }
                    return new Assignments(numClusters, assignments, matrix);                   
                }
                else {
                    verbose(LOGGER, "Had more than %d facilities, " +
                            "consolidating to %d", facilities.size(),
                            numClusters);
                   
                    List<DoubleVector> facilityCentroids =
                        new ArrayList<DoubleVector>(facilities.size());
                    int[] weights = new int[facilities.size()];
                    int i = 0;
                    for (CandidateCluster fac : facilities) {
                        facilityCentroids.add(fac.centerOfMass());
                        weights[i++] = fac.size();
                    }
                    // Wrap the facilities centroids in a matrix for convenience
                    Matrix m = Matrices.asMatrix(facilityCentroids);

                    // Select the initial seed points for reducing the kappa
                    // clusters to k using the generalized ORSS selection
                    // process, which supports data comparisons other than
                    // Euclidean distance
                    GeneralizedOrssSeed orss = new GeneralizedOrssSeed(simFunc);
                    DoubleVector[] centroids = orss.chooseSeeds(numClusters, m);
                    assert nonNullCentroids(centroids)
                        : "ORSS seed returned too few centroids";
                   
                    // This records the assignments of the kappa facilities to
                    // the k centers.  Initially, everyhting is assigned to the
                    // same center and iterations repeat until convergence.
                    int[] facilityAssignments = new int[facilities.size()];
                   
                    // Using those facilities as starting points, run k-means on
                    // the facility centroids until no facilities change their
                    // memebership.
                    int numChanged = 0;
                    int kmeansIters = 0;
                    do {
                        numChanged = 0;
                        // Recompute the new centroids each time
                        DoubleVector[] updatedCentroids =
                            new DoubleVector[numClusters];
                        for (i = 0; i < updatedCentroids.length; ++i)
                            updatedCentroids[i] = new DenseVector(cols);
                        int[] updatedCentroidSizes = new int[numClusters];

                        double similaritySum = 0;
                       
                        // For each CandidateCluster find the most similar centroid
                        i = 0;
                        for (CandidateCluster fac : facilities) {
                            int mostSim = -1;
                            double highestSim = -1;
                            for (int j = 0; j < centroids.length; ++j) {
//                                  System.out.printf("centroids[%d]: %s%n fac.centroid(): %s%n",
//                                                    j, centroids[j],
//                                                    fac.centerOfMass());
                                double sim = simFunc.sim(centroids[j],
                                                         fac.centerOfMass());
                                if (sim > highestSim) {
                                    highestSim = sim;
                                    mostSim = j;
                                }
                            }

                            // For the most similar centroid, update its center
                            // of mass for the next round with the weighted
                            // vector
                            VectorMath.add(updatedCentroids[mostSim],
                                           fac.sum());
                            updatedCentroidSizes[mostSim] += fac.size();
                            int curAssignment = facilityAssignments[i];
                            facilityAssignments[i] = mostSim;
                            similaritySum += highestSim;
                            if (curAssignment != mostSim) {
                                veryVerbose(LOGGER, "Facility %d changed its " +
                                            "centroid from %d to %d",
                                            i, curAssignment, mostSim);
                                numChanged++;
                            }
                            i++;
                        }

                        // Once all the facilities have been assigned to one of
                        // the k-centroids, recompute the centroids by
                        // normalizing the sum of the weighted vectors according
                        // the number of points
                        for (int j = 0; j < updatedCentroids.length; ++j) {
                            DoubleVector v = updatedCentroids[j];
                            int size = updatedCentroidSizes[j];
                            for (int k = 0; k < cols; ++k)
                                v.set(k, v.get(k) / size);
                            // Update this centroid for the next round
                            centroids[j] = v;                           
                        }

                        veryVerbose(LOGGER, "%d centroids swapped their facility",
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.