Package org.apache.mahout.math

Examples of org.apache.mahout.math.SparseRowMatrix


  public static Matrix randomSequentialAccessSparseMatrix(int numRows,
                                                          int nonNullRows,
                                                          int numCols,
                                                          int entriesPerRow,
                                                          double entryMean) {
    SparseRowMatrix m = new SparseRowMatrix(new int[]{numRows, numCols});
    double n = 0;
    Random r = new Random(1234L);
    for (int i = 0; i < nonNullRows; i++) {
      SequentialAccessSparseVector v = new SequentialAccessSparseVector(numCols);
      for (int j = 0; j < entriesPerRow; j++) {
        int col = r.nextInt(numCols);
        double val = r.nextGaussian();
        v.set(col, val * entryMean);
      }
      int c = r.nextInt(numRows);
      if (r.nextBoolean() || numRows == nonNullRows) {
        m.assignRow(numRows == nonNullRows ? i : c, v);
      } else {
        Vector other = m.getRow(r.nextInt(numRows));
        if (other != null && other.getLengthSquared() > 0) {
          m.assignRow(c, other.clone());
        }
      }
      n += m.getRow(c).getLengthSquared();
    }
    return m;
  }
View Full Code Here


    if(inMemory) {
      List<Vector> eigenVectors = new ArrayList<Vector>();
      for(MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }
      eigensToVerify = new SparseRowMatrix(new int[] {eigenVectors.size(), eigenVectors.get(0).size()},
                                           eigenVectors.toArray(new Vector[eigenVectors.size()]),
                                           true,
                                           true);

    } else {
View Full Code Here

                    List<Double> eigenValues,
                    boolean isSymmetric) {
    log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
    Vector currentVector = getInitialVector(corpus);
    Vector previousVector = new DenseVector(currentVector.size());
    Matrix basis = new SparseRowMatrix(new int[]{desiredRank, corpus.numCols()});
    basis.assignRow(0, currentVector);
    double alpha = 0;
    double beta = 0;
    DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
    for (int i = 1; i < desiredRank; i++) {
      startTime(TimingSection.ITERATE);
      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
      log.info("{} passes through the corpus so far...", i);
      calculateScaleFactor(nextVector);
      nextVector.assign(new Scale(1 / scaleFactor));
      nextVector.assign(previousVector, new PlusMult(-beta));
      // now orthogonalize
      alpha = currentVector.dot(nextVector);
      nextVector.assign(currentVector, new PlusMult(-alpha));
      endTime(TimingSection.ITERATE);
      startTime(TimingSection.ORTHOGANLIZE);
      orthoganalizeAgainstAllButLast(nextVector, basis);
      endTime(TimingSection.ORTHOGANLIZE);
      // and normalize
      beta = nextVector.norm(2);
      if (outOfRange(beta) || outOfRange(alpha)) {
        log.warn("Lanczos parameters out of range: alpha = {}, beta = {}.  Bailing out early!", alpha, beta);
        break;
      }
      final double b = beta;
      nextVector.assign(new Scale(1 / b));
      basis.assignRow(i, nextVector);
      previousVector = currentVector;
      currentVector = nextVector;
      // save the projections and norms!
      triDiag.set(i - 1, i - 1, alpha);
      if (i < desiredRank - 1) {
        triDiag.set(i - 1, i, beta);
        triDiag.set(i, i - 1, beta);
      }
    }
    startTime(TimingSection.TRIDIAG_DECOMP);

    log.info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
    // at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
    EigenvalueDecomposition decomp = new EigenvalueDecomposition(triDiag);

    DoubleMatrix2D eigenVects = decomp.getV();
    DoubleMatrix1D eigenVals = decomp.getRealEigenvalues();
    endTime(TimingSection.TRIDIAG_DECOMP);
    startTime(TimingSection.FINAL_EIGEN_CREATE);

    for (int i = 0; i < basis.numRows() - 1; i++) {
      Vector realEigen = new DenseVector(corpus.numCols());
      // the eigenvectors live as columns of V, in reverse order.  Weird but true.
      DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
      for (int j = 0; j < ejCol.size(); j++) {
        double d = ejCol.getQuick(j);
        realEigen.assign(basis.getRow(j), new PlusMult(d));
      }
      realEigen = realEigen.normalize();
      eigenVectors.assignRow(i, realEigen);
      log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
      eigenValues.add(eigenVals.get(i));
View Full Code Here

   */
  @Test
  public void completeJobToyExample() throws Exception {

    Double na = Double.NaN;
    Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] {
        new DenseVector(new double[] { 5.0, 5.0, 2.0, na }),
        new DenseVector(new double[] { 2.0, na,  3.0, 5.0 }),
        new DenseVector(new double[] { na,  5.0, na,  3.0 }),
        new DenseVector(new double[] { 3.0, na,  na,  5.0 }) });

    writeLines(inputFile, preferencesAsText(preferences));

    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;

    alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
        "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda),
        "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations) });

    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
        preferences.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
        preferences.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
      MatrixSlice slice = sliceIterator.next();
      Iterator<Vector.Element> elementIterator = slice.vector().iterateNonZero();
      while (elementIterator.hasNext()) {
        Vector.Element e = elementIterator.next();
View Full Code Here

  }

  @Test
  public void completeJobImplicitToyExample() throws Exception {

    Matrix observations = new SparseRowMatrix(4, 4, new Vector[] {
        new DenseVector(new double[] { 5.0, 5.0, 2.0, 0 }),
        new DenseVector(new double[] { 2.0, 0,   3.0, 5.0 }),
        new DenseVector(new double[] { 0,   5.0, 0,   3.0 }),
        new DenseVector(new double[] { 3.0, 0,   0,   5.0 }) });

    Matrix preferences = new SparseRowMatrix(4, 4, new Vector[] {
        new DenseVector(new double[] { 1.0, 1.0, 1.0, 0 }),
        new DenseVector(new double[] { 1.0, 0,   1.0, 1.0 }),
        new DenseVector(new double[] { 0,   1.0, 0,   1.0 }),
        new DenseVector(new double[] { 1.0, 0,   0,   1.0 }) });

    writeLines(inputFile, preferencesAsText(observations));

    ParallelALSFactorizationJob alsFactorization = new ParallelALSFactorizationJob();
    alsFactorization.setConf(conf);

    int numFeatures = 3;
    int numIterations = 5;
    double lambda = 0.065;
    double alpha = 20;

    alsFactorization.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
        "--tempDir", tmpDir.getAbsolutePath(), "--lambda", String.valueOf(lambda),
        "--implicitFeedback", String.valueOf(true), "--alpha", String.valueOf(alpha),
        "--numFeatures", String.valueOf(numFeatures), "--numIterations", String.valueOf(numIterations) });

    Matrix u = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "U/part-m-00000"),
        observations.numRows(), numFeatures);
    Matrix m = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(), "M/part-m-00000"),
        observations.numCols(), numFeatures);

    StringBuilder info = new StringBuilder();
    info.append("\nObservations - users x items\n");
    info.append(MathHelper.nice(observations));
    info.append("\nA - users x items\n\n");
    info.append(MathHelper.nice(preferences));
    info.append("\nU - users x features\n\n");
    info.append(MathHelper.nice(u));
    info.append("\nM - items x features\n\n");
    info.append(MathHelper.nice(m));
    Matrix Ak = u.times(m.transpose());
    info.append("\nAk - users x items\n\n");
    info.append(MathHelper.nice(Ak));
    info.append('\n');

    log.info(info.toString());

    RunningAverage avg = new FullRunningAverage();
    Iterator<MatrixSlice> sliceIterator = preferences.iterateAll();
    while (sliceIterator.hasNext()) {
      MatrixSlice slice = sliceIterator.next();
      for (Vector.Element e : slice.vector()) {
        if (!Double.isNaN(e.get())) {
          double pref = e.get();
View Full Code Here

  @Override
  public void map(IntWritable docId, VectorWritable doc, Context context)
      throws IOException, InterruptedException {
    int numTopics = getNumTopics();
    Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 /numTopics);
    Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
    int maxIters = getMaxIters();
    ModelTrainer modelTrainer = getModelTrainer();
    for(int i = 0; i < maxIters; i++) {
      modelTrainer.getReadModel().trainDocTopicModel(doc.get(), docTopics, docModel);
    }
View Full Code Here

    if (inMemory) {
      List<Vector> eigenVectors = Lists.newArrayList();
      for (MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }
      eigensToVerify = new SparseRowMatrix(eigenVectors.size(), eigenVectors.get(0).size(),
                                           eigenVectors.toArray(new Vector[eigenVectors.size()]),
                                           true,
                                           true);

    } else {
View Full Code Here

        vectorList.add(record.getSecond().get());
      }
    }
    int numRows = vectorList.size();
    int numCols = vectorList.get(0).size();
    return new SparseRowMatrix(numRows, numCols,
        vectorList.toArray(new Vector[vectorList.size()]), true,
        vectorList.get(0).isSequentialAccess());
  }
View Full Code Here

    while(true) {
      try {
        List<TrainerRunnable> runnables = Lists.newArrayList();
        for(Map.Entry<Vector, Vector> entry : batch.entrySet()) {
          runnables.add(new TrainerRunnable(readModel, null, entry.getKey(),
              entry.getValue(), new SparseRowMatrix(numTopics, numTerms, true),
              numDocTopicsIters));
        }
        threadPool.invokeAll(runnables);
        if(update) {
          for(TrainerRunnable runnable : runnables) {
View Full Code Here

  public void train(Vector document, Vector docTopicCounts, boolean update, int numDocTopicIters) {
    while(true) {
      try {
        workQueue.put(new TrainerRunnable(readModel,
            update ? writeModel : null, document, docTopicCounts, new SparseRowMatrix(
            numTopics, numTerms, true), numDocTopicIters));
        return;
      } catch (InterruptedException e) {
        log.warn("Interrupted waiting to submit document to work queue: " + document, e);
      }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.SparseRowMatrix

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.