Examples of org.apache.mahout.math.SparseRowMatrix

org.apache.mahout.math.SparseRowMatrix
sparse matrix with general element values whose rows are accessible quickly. Implemented as a row array of either SequentialAccessSparseVectors or RandomAccessSparseVectors.

  }


  public void trainSync(Vector document, Vector docTopicCounts, boolean update,
      int numDocTopicIters) {
    new TrainerRunnable(readModel,
            update ? writeModel : null, document, docTopicCounts, new SparseRowMatrix(
            numTopics, numTerms, true), numDocTopicIters).run();
  }

View Full Code Here

            numTopics, numTerms, true), numDocTopicIters).run();
  }


  public double calculatePerplexity(Vector document, Vector docTopicCounts, int numDocTopicIters) {
    TrainerRunnable runner =  new TrainerRunnable(readModel,
            null, document, docTopicCounts, new SparseRowMatrix(
            numTopics, numTerms, true), numDocTopicIters);
    return runner.call();
  }

View Full Code Here

    }
  }


  public static Matrix sampledCorpus(Matrix matrix, Random random,
      int numDocs, int numSamples, int numTopicsPerDoc) {
    Matrix corpus = new SparseRowMatrix(numDocs, matrix.numCols());
    LDASampler modelSampler = new LDASampler(matrix, random);
    Vector topicVector = new DenseVector(matrix.numRows());
    for(int i = 0; i < numTopicsPerDoc; i++) {
      int topic = random.nextInt(topicVector.size());
      topicVector.set(topic, topicVector.get(topic) + 1);
    }
    for(int docId = 0; docId < numDocs; docId++) {
      for(int sample : modelSampler.sample(topicVector, numSamples)) {
        corpus.set(docId, sample, corpus.get(docId, sample) + 1);
      }
    }
    return corpus;
  }

View Full Code Here

  public static Matrix randomSequentialAccessSparseMatrix(int numRows,
                                                          int nonNullRows,
                                                          int numCols,
                                                          int entriesPerRow,
                                                          double entryMean) {
    Matrix m = new SparseRowMatrix(numRows, numCols);
    //double n = 0;
    Random r = RandomUtils.getRandom();
    for (int i = 0; i < nonNullRows; i++) {
      Vector v = new SequentialAccessSparseVector(numCols);
      for (int j = 0; j < entriesPerRow; j++) {
        int col = r.nextInt(numCols);
        double val = r.nextGaussian();
        v.set(col, val * entryMean);
      }
      int c = r.nextInt(numRows);
      if (r.nextBoolean() || numRows == nonNullRows) {
        m.assignRow(numRows == nonNullRows ? i : c, v);
      } else {
        Vector other = m.viewRow(r.nextInt(numRows));
        if (other != null && other.getLengthSquared() > 0) {
          m.assignRow(c, other.clone());
        }
      }
      //n += m.getRow(c).getLengthSquared();
    }
    return m;

View Full Code Here

  public static Matrix randomSequentialAccessSparseMatrix(int numRows,
                                                          int nonNullRows,
                                                          int numCols,
                                                          int entriesPerRow,
                                                          double entryMean) {
    SparseRowMatrix m = new SparseRowMatrix(new int[]{numRows, numCols});
    //double n = 0;
    Random r = new Random(1234L);
    for (int i = 0; i < nonNullRows; i++) {
      SequentialAccessSparseVector v = new SequentialAccessSparseVector(numCols);
      for (int j = 0; j < entriesPerRow; j++) {
        int col = r.nextInt(numCols);
        double val = r.nextGaussian();
        v.set(col, val * entryMean);
      }
      int c = r.nextInt(numRows);
      if (r.nextBoolean() || numRows == nonNullRows) {
        m.assignRow(numRows == nonNullRows ? i : c, v);
      } else {
        Vector other = m.getRow(r.nextInt(numRows));
        if (other != null && other.getLengthSquared() > 0) {
          m.assignRow(c, other.clone());
        }
      }
      //n += m.getRow(c).getLengthSquared();
    }
    return m;

View Full Code Here

                    List<Double> eigenValues,
                    boolean isSymmetric) {
    log.info("Finding {} singular vectors of matrix with {} rows, via Lanczos", desiredRank, corpus.numRows());
    Vector currentVector = getInitialVector(corpus);
    Vector previousVector = new DenseVector(currentVector.size());
    Matrix basis = new SparseRowMatrix(new int[]{desiredRank, corpus.numCols()});
    basis.assignRow(0, currentVector);
    double beta = 0;
    DoubleMatrix2D triDiag = new DenseDoubleMatrix2D(desiredRank, desiredRank);
    for (int i = 1; i < desiredRank; i++) {
      startTime(TimingSection.ITERATE);
      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
      log.info("{} passes through the corpus so far...", i);
      calculateScaleFactor(nextVector);
      nextVector.assign(new Scale(1 / scaleFactor));
      nextVector.assign(previousVector, new PlusMult(-beta));
      // now orthogonalize
      double alpha = currentVector.dot(nextVector);
      nextVector.assign(currentVector, new PlusMult(-alpha));
      endTime(TimingSection.ITERATE);
      startTime(TimingSection.ORTHOGANLIZE);
      orthoganalizeAgainstAllButLast(nextVector, basis);
      endTime(TimingSection.ORTHOGANLIZE);
      // and normalize
      beta = nextVector.norm(2);
      if (outOfRange(beta) || outOfRange(alpha)) {
        log.warn("Lanczos parameters out of range: alpha = {}, beta = {}.  Bailing out early!", alpha, beta);
        break;
      }
      nextVector.assign(new Scale(1 / beta));
      basis.assignRow(i, nextVector);
      previousVector = currentVector;
      currentVector = nextVector;
      // save the projections and norms!
      triDiag.set(i - 1, i - 1, alpha);
      if (i < desiredRank - 1) {
        triDiag.set(i - 1, i, beta);
        triDiag.set(i, i - 1, beta);
      }
    }
    startTime(TimingSection.TRIDIAG_DECOMP);


    log.info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
    // at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
    EigenvalueDecomposition decomp = new EigenvalueDecomposition(triDiag);


    DoubleMatrix2D eigenVects = decomp.getV();
    DoubleMatrix1D eigenVals = decomp.getRealEigenvalues();
    endTime(TimingSection.TRIDIAG_DECOMP);
    startTime(TimingSection.FINAL_EIGEN_CREATE);


    for (int i = 0; i < basis.numRows() - 1; i++) {
      Vector realEigen = new DenseVector(corpus.numCols());
      // the eigenvectors live as columns of V, in reverse order.  Weird but true.
      DoubleMatrix1D ejCol = eigenVects.viewColumn(basis.numRows() - i - 1);
      for (int j = 0; j < ejCol.size(); j++) {
        double d = ejCol.getQuick(j);
        realEigen.assign(basis.getRow(j), new PlusMult(d));
      }
      realEigen = realEigen.normalize();
      eigenVectors.assignRow(i, realEigen);
      log.info("Eigenvector {} found with eigenvalue {}", i, eigenVals.get(i));
      eigenValues.add(eigenVals.get(i));

View Full Code Here

    if (inMemory) {
      List<Vector> eigenVectors = new ArrayList<Vector>();
      for (MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }
      eigensToVerify = new SparseRowMatrix(new int[] { eigenVectors.size(), eigenVectors.get(0).size() }, eigenVectors
          .toArray(new Vector[eigenVectors.size()]), true, true);


    } else {
      eigensToVerify = eigens;
    }

View Full Code Here

  @Override
  public void map(IntWritable docId, VectorWritable doc, Context context)
    throws IOException, InterruptedException {
    int numTopics = getNumTopics();
    Vector docTopics = new DenseVector(numTopics).assign(1.0 / numTopics);
    Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
    int maxIters = getMaxIters();
    ModelTrainer modelTrainer = getModelTrainer();
    for (int i = 0; i < maxIters; i++) {
      modelTrainer.getReadModel().trainDocTopicModel(doc.get(), docTopics, docModel);
    }

View Full Code Here

    while (true) {
      try {
        List<TrainerRunnable> runnables = Lists.newArrayList();
        for (Map.Entry<Vector, Vector> entry : batch.entrySet()) {
          runnables.add(new TrainerRunnable(readModel, null, entry.getKey(),
              entry.getValue(), new SparseRowMatrix(numTopics, numTerms, true),
              numDocTopicsIters));
        }
        threadPool.invokeAll(runnables);
        if (update) {
          for (TrainerRunnable runnable : runnables) {

View Full Code Here

  public void train(Vector document, Vector docTopicCounts, boolean update, int numDocTopicIters) {
    while (true) {
      try {
        workQueue.put(new TrainerRunnable(readModel, update
            ? writeModel
            : null, document, docTopicCounts, new SparseRowMatrix(numTopics, numTerms, true), numDocTopicIters));
        return;
      } catch (InterruptedException e) {
        log.warn("Interrupted waiting to submit document to work queue: {}", document, e);
      }
    }

View Full Code Here

0 1 2 3 4

TOP

Related Classes of org.apache.mahout.math.SparseRowMatrix

org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJobTest

org.apache.mahout.cf.taste.impl.recommender.svd.ALSWRFactorizerTest

org.apache.mahout.classifier.naivebayes.NaiveBayesModel

org.apache.mahout.clustering.ClusteringTestUtils

org.apache.mahout.clustering.lda.cvb.CVB0DocInferenceMapper

org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0

org.apache.mahout.clustering.lda.cvb.ModelTrainer

org.apache.mahout.math.decomposer.lanczos.LanczosSolver

org.apache.mahout.math.decomposer.SolverTest

org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.