Package org.apache.mahout.math.hadoop

Examples of org.apache.mahout.math.hadoop.DistributedRowMatrix.configure()


    maxError = Double.parseDouble(argMap.get("--maxError"));
    minEigenValue = Double.parseDouble(argMap.get("--minEigenvalue"));

    DistributedRowMatrix c = new DistributedRowMatrix(argMap.get("--corpusInput"), tmpOut, 1, 1);
    c.configure(new JobConf(getConf()));
    corpus = c;

    // set up eigenverifier and orthoverifier TODO: allow multithreaded execution

    eigenVerifier = new SimpleEigenVerifier();
View Full Code Here


    return eigenMetaData;
  }

  private void prepareEigens(String eigenInput, boolean inMemory) {
    DistributedRowMatrix eigens = new DistributedRowMatrix(eigenInput, tmpOut, 1, 1);
    eigens.configure(new JobConf(getConf()));
    if(inMemory) {
      List<Vector> eigenVectors = new ArrayList<Vector>();
      for(MatrixSlice slice : eigens) {
        eigenVectors.add(slice.vector());
      }
View Full Code Here

   
    DistributedRowMatrix matrix = new DistributedRowMatrix(inputPathString,
                                                           outputTmpPathString,
                                                           numRows,
                                                           numCols);
    matrix.configure(new JobConf(getConf()));
    solve(matrix, desiredRank, eigenVectors, eigenValues, isSymmetric);

    serializeOutput(eigenVectors, eigenValues, outputEigenVectorPath)
    return 0;
  }
View Full Code Here

    if (!testData.exists()) {
      testData.mkdir();
    }
    DistributedRowMatrix corpus = TestDistributedRowMatrix.randomDistributedMatrix(500,
        450, 400, 10, 10.0, symmetric, "testdata");
    corpus.configure(new JobConf());
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    int desiredRank = 30;
    Matrix eigenVectors = new DenseMatrix(desiredRank, corpus.numCols());
    List<Double> eigenValues = new ArrayList<Double>();
    solver.solve(corpus, desiredRank, eigenVectors, eigenValues, symmetric);
View Full Code Here

    DistributedRowMatrix A = new DistributedRowMatrix(affSeqFiles,
                                                      new Path(outputTmp, "afftmp-" + (System.nanoTime() & 0xFF)),
                                                      numDims,
                                                      numDims);
    JobConf depConf = new JobConf(conf);
    A.configure(depConf);

    // Next step: construct the diagonal matrix D (represented as a vector)
    // and calculate the normalized Laplacian of the form:
    // L = D^(-0.5)AD^(-0.5)
    Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
View Full Code Here

    // L = D^(-0.5)AD^(-0.5)
    Vector D = MatrixDiagonalizeJob.runJob(affSeqFiles, numDims);
    DistributedRowMatrix L =
        VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
            new Path(outputCalc, "laplacian-" + (System.nanoTime() & 0xFF)));
    L.configure(depConf);

    // Next step: perform eigen-decomposition using LanczosSolver
    // since some of the eigen-output is spurious and will be eliminated
    // upon verification, we have to aim to overshoot and then discard
    // unnecessary vectors later
View Full Code Here

    EigenVerificationJob verifier = new EigenVerificationJob();
    Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
    verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, 0.0, clusters);
    Path cleanedEigens = verifier.getCleanedEigensPath();
    DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new Path(cleanedEigens, "tmp"), clusters, numDims);
    W.configure(depConf);
    DistributedRowMatrix Wtrans = W.transpose();
    //    DistributedRowMatrix Wt = W.transpose();

    // next step: normalize the rows of Wt to unit length
    Path unitVectors = new Path(outputCalc, "unitvectors-" + (System.nanoTime() & 0xFF));
View Full Code Here

    // next step: normalize the rows of Wt to unit length
    Path unitVectors = new Path(outputCalc, "unitvectors-" + (System.nanoTime() & 0xFF));
    UnitVectorizerJob.runJob(Wtrans.getRowPath(), unitVectors);
    DistributedRowMatrix Wt = new DistributedRowMatrix(unitVectors, new Path(unitVectors, "tmp"), clusters, numDims);
    Wt.configure(depConf);

    // Finally, perform k-means clustering on the rows of L (or W)
    // generate random initial clusters
    Path initialclusters = RandomSeedGenerator.buildRandom(Wt.getRowPath(),
                                                           new Path(output, Cluster.INITIAL_CLUSTERS_DIR),
View Full Code Here

      // 3) calculate eigenvectors of L

      DistributedRowMatrix L =
          VectorMatrixMultiplicationJob.runJob(A.getRowPath(), D,
              new Path(outputCalc, "laplacian-" + (System.nanoTime() & 0xFF)));
      L.configure(new JobConf(conf));

      // eigendecomposition (step 3)
      int overshoot = (int) ((double) dimensions * OVERSHOOT_MULTIPLIER);
      List<Double> eigenValues = new ArrayList<Double>(overshoot);
      Matrix eigenVectors = new DenseMatrix(overshoot, dimensions);
View Full Code Here

      // eigendecomposition (step 3)
      int overshoot = (int) ((double) dimensions * OVERSHOOT_MULTIPLIER);
      List<Double> eigenValues = new ArrayList<Double>(overshoot);
      Matrix eigenVectors = new DenseMatrix(overshoot, dimensions);
      DistributedRowMatrix U = performEigenDecomposition(conf, L, dimensions, overshoot, eigenValues, eigenVectors, outputCalc);
      U.configure(new JobConf(conf));
      eigenValues = eigenValues.subList(0, dimensions);

      // here's where things get interesting: steps 4, 5, and 6 are unique
      // to this algorithm, and depending on the final output, steps 1-3
      // may be repeated as well
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.