Package org.apache.mahout.math

Examples of org.apache.mahout.math.Matrix


    DistributedRowMatrix corpus = TestDistributedRowMatrix.randomDistributedMatrix(500,
        450, 400, 10, 10.0, symmetric, "testdata");
    corpus.configure(new JobConf());
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    int desiredRank = 30;
    Matrix eigenVectors = new DenseMatrix(desiredRank, corpus.numCols());
    List<Double> eigenValues = new ArrayList<Double>();
    solver.solve(corpus, desiredRank, eigenVectors, eigenValues, symmetric);
    assertOrthonormal(eigenVectors);
    assertEigen(eigenVectors, corpus, eigenVectors.numRows() / 2, 0.01, symmetric);
  }
View Full Code Here


    SingularVectorVerifier verifier = new AsyncEigenVerifier();
    HebbianSolver solver = new HebbianSolver(updater,
        verifier,
        convergence,
        maxPasses);
    Matrix corpus = null;
    if (numThreads <= 1) {
      //  corpus = new DiskBufferedDoubleMatrix(new File(corpusDir), inBufferSize);
    } else {
      //  corpus = new ParallelMultiplyingDiskBufferedDoubleMatrix(new File(corpusDir), inBufferSize, numThreads);
    }
View Full Code Here

  }

  public void testMatrixTimesVector() throws Exception {
    Vector v = new RandomAccessSparseVector(50);
    v.assign(1.0);
    Matrix m = SolverTest.randomSequentialAccessSparseMatrix(100, 90, 50, 20, 1.0);
    DistributedRowMatrix dm = randomDistributedMatrix(100, 90, 50, 20, 1.0, false);

    Vector expected = m.times(v);
    Vector actual = dm.times(v);
    assertEquals(0.0, expected.getDistanceSquared(actual), 1.0e-9);
  }
View Full Code Here

  }

  public void testMatrixTimesSquaredVector() throws Exception {
    Vector v = new RandomAccessSparseVector(50);
    v.assign(1.0);
    Matrix m = SolverTest.randomSequentialAccessSparseMatrix(100, 90, 50, 20, 1.0);
    DistributedRowMatrix dm = randomDistributedMatrix(100, 90, 50, 20, 1.0, false);

    Vector expected = m.timesSquared(v);
    Vector actual = dm.timesSquared(v);
    assertEquals(0.0, expected.getDistanceSquared(actual), 1.0e-9);
  }
View Full Code Here

    Vector actual = dm.timesSquared(v);
    assertEquals(0.0, expected.getDistanceSquared(actual), 1.0e-9);
  }

  public void testMatrixTimesMatrix() throws Exception {
    Matrix inputA = SolverTest.randomSequentialAccessSparseMatrix(20, 19, 15, 5, 10.0);
    Matrix inputB = SolverTest.randomSequentialAccessSparseMatrix(20, 13, 25, 10, 5.0);
    Matrix expected = inputA.transpose().times(inputB);

    DistributedRowMatrix distA = randomDistributedMatrix(20, 19, 15, 5, 10.0, false, "/distA");
    DistributedRowMatrix distB = randomDistributedMatrix(20, 13, 25, 10, 5.0, false, "/distB");
    DistributedRowMatrix product = distA.times(distB);
View Full Code Here

                                                             int entriesPerRow,
                                                             double entryMean,
                                                             boolean isSymmetric,
                                                             String baseTmpDir) throws IOException {
    baseTmpDir = TESTDATA + baseTmpDir;
    Matrix c = SolverTest.randomSequentialAccessSparseMatrix(numRows, nonNullRows, numCols, entriesPerRow, entryMean);
    if(isSymmetric) {
      c = c.times(c.transpose());
    }
    final Matrix m = c;
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    ClusteringTestUtils.writePointsToFile(new Iterable<VectorWritable>() {
      @Override
      public Iterator<VectorWritable> iterator() {
        final Iterator<MatrixSlice> it = m.iterator();
        final VectorWritable v = new VectorWritable();
        return new Iterator<VectorWritable>() {
          @Override
          public boolean hasNext() { return it.hasNext(); }
          @Override
          public VectorWritable next() {
            MatrixSlice slice = it.next();
            v.set(slice.vector());
            return v;
          }
          @Override
          public void remove() { it.remove(); }
        };
      }
    }, true, baseTmpDir + "/distMatrix/part-00000", fs, conf);

    DistributedRowMatrix distMatrix = new DistributedRowMatrix(baseTmpDir + "/distMatrix",
                                                               baseTmpDir + "/tmpOut",
                                                               m.numRows(),
                                                               m.numCols());
    distMatrix.configure(new JobConf(conf));

    return distMatrix;
  }
View Full Code Here

    return v;
  }

  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics,numWords);
    double[] logTotals = new double[numTopics];
    for(int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for(int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k,w,Math.log(pseudocount));
      }

      logTotals[k] = Math.log(total);
    }
View Full Code Here

                    List<Double> eigenValues,
                    boolean isSymmetric) {
    log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
    Vector currentVector = getInitialVector(corpus);
    Vector previousVector = new DenseVector(currentVector.size());
    Matrix basis = new SparseRowMatrix(new int[]{desiredRank, corpus.numCols()});
    basis.assignRow(0, currentVector);
    double alpha = 0;
    double beta = 0;
    DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
    for (int i = 1; i < desiredRank; i++) {
      startTime(TimingSection.ITERATE);
      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
      log.info("{} passes through the corpus so far...", i);
      calculateScaleFactor(nextVector);
      nextVector.assign(new Scale(1 / scaleFactor));
      nextVector.assign(previousVector, new PlusMult(-beta));
      // now orthogonalize
      alpha = currentVector.dot(nextVector);
      nextVector.assign(currentVector, new PlusMult(-alpha));
      endTime(TimingSection.ITERATE);
      startTime(TimingSection.ORTHOGANLIZE);
      orthoganalizeAgainstAllButLast(nextVector, basis);
      endTime(TimingSection.ORTHOGANLIZE);
      // and normalize
      beta = nextVector.norm(2);
      if (outOfRange(beta) || outOfRange(alpha)) {
        log.warn("Lanczos parameters out of range: alpha = {}, beta = {}.  Bailing out early!", alpha, beta);
        break;
      }
      final double b = beta;
      nextVector.assign(new Scale(1 / b));
      basis.assignRow(i, nextVector);
      previousVector = currentVector;
      currentVector = nextVector;
      // save the projections and norms!
      triDiag.set(i - 1, i - 1, alpha);
      if (i < desiredRank - 1) {
        triDiag.set(i - 1, i, beta);
        triDiag.set(i, i - 1, beta);
      }
    }
    startTime(TimingSection.TRIDIAG_DECOMP);

    log.info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
    // at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
    EigenvalueDecomposition decomp = new EigenvalueDecomposition(triDiag);

    DoubleMatrix2D eigenVects = decomp.getV();
    DoubleMatrix1D eigenVals = decomp.getRealEigenvalues();
    endTime(TimingSection.TRIDIAG_DECOMP);
    startTime(TimingSection.FINAL_EIGEN_CREATE);

    for (int i = 0; i < basis.numRows() - 1; i++) {
      Vector realEigen = new DenseVector(corpus.numCols());
      // the eigenvectors live as columns of V, in reverse order.  Weird but true.
      DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
      for (int j = 0; j < ejCol.size(); j++) {
        double d = ejCol.getQuick(j);
        realEigen.assign(basis.getRow(j), new PlusMult(d));
      }
      realEigen = realEigen.normalize();
      eigenVectors.assignRow(i, realEigen);
      log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
      eigenValues.add(eigenVals.get(i));
View Full Code Here

    return v;
  }
 
  private LDAState generateRandomState(int numWords, int numTopics) {
    double topicSmoothing = 50.0 / numTopics; // whatever
    Matrix m = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
   
    for (int k = 0; k < numTopics; ++k) {
      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-10;
        total += pseudocount;
        m.setQuick(k, w, Math.log(pseudocount));
      }
     
      logTotals[k] = Math.log(total);
    }
   
View Full Code Here

    // since some of the eigen-output is spurious and will be eliminated
    // upon verification, we have to aim to overshoot and then discard
    // unnecessary vectors later
    int overshoot = (int) ((double) clusters * OVERSHOOT_MULTIPLIER);
    List<Double> eigenValues = new ArrayList<Double>(overshoot);
    Matrix eigenVectors = new DenseMatrix(overshoot, numDims);
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  L.getRowPath(),
                  new Path(outputTmp, "lanczos-" + (System.nanoTime() & 0xFF)),
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.Matrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.