Package org.apache.mahout.math.hadoop.decomposer

Examples of org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver


  // @Test
  public void testKmeansSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration conf = new Configuration();
    solver.setConf(conf);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    int desiredRank = 15;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank, 0.5, 0.0, true);
    Path cleanEigenvectors = new Path(output,
        EigenVerificationJob.CLEAN_EIGENVECTORS);
   
    // build in-memory data matrix A
View Full Code Here


  // @Test
  public void testKmeansDSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // Run EigenVerificationJob from within DistributedLanczosSolver.run(...)
    int desiredRank = 13;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank, 0.5, 0.0, false);
   
    Path cleanEigenvectors = new Path(output,
        EigenVerificationJob.CLEAN_EIGENVECTORS);
   
View Full Code Here

  // @Test
  public void testKmeansDSVD2() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
    solver.setConf(config);
    Path testData = getTestTempDirPath("testdata");
    int sampleDimension = sampleData.get(0).get().size();
    // call EigenVerificationJob separately
    int desiredRank = 13;
    solver.run(testData, output, tmp, null, sampleData.size(), sampleDimension,
        false, desiredRank);
    Path rawEigenvectors = new Path(output,
        DistributedLanczosSolver.RAW_EIGENVECTORS);
    Configuration conf = new Configuration(config);
    new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp, 0.5,
View Full Code Here

    // Next step: perform eigen-decomposition using LanczosSolver
    // since some of the eigen-output is spurious and will be eliminated
    // upon verification, we have to aim to overshoot and then discard
    // unnecessary vectors later
    int overshoot = (int) ((double) clusters * OVERSHOOT_MULTIPLIER);
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    LanczosState state = new LanczosState(L, clusters, solver.getInitialVector(L));
    Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  state,
                  overshoot,
                  true,
                  lanczosSeqFiles.toString());

View Full Code Here

      L.setConf(new Configuration(conf));

      // eigendecomposition (step 3)
      int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
      LanczosState state = new LanczosState(L, eigenrank,
          new DistributedLanczosSolver().getInitialVector(L));

      DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
      U.setConf(new Configuration(conf));
      List<Double> eigenValues = Lists.newArrayList();
      for (int i=0; i<eigenrank; i++) {
View Full Code Here

                                                               DistributedRowMatrix input,
                                                               LanczosState state,
                                                               int numEigenVectors,
                                                               int overshoot,
                                                               Path tmp) throws IOException {
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Path seqFiles = new Path(tmp, "eigendecomp-" + (System.nanoTime() & 0xFF));
    solver.runJob(conf,
                  state,
                  overshoot,
                  true,
                  seqFiles.toString());
View Full Code Here

      // Perform eigen-decomposition using LanczosSolver
      // since some of the eigen-output is spurious and will be eliminated
      // upon verification, we have to aim to overshoot and then discard
      // unnecessary vectors later
      int overshoot = Math.min((int) (clusters * OVERSHOOTMULTIPLIER), numDims);
      DistributedLanczosSolver solver = new DistributedLanczosSolver();
      LanczosState state = new LanczosState(L, overshoot, DistributedLanczosSolver.getInitialVector(L));
      Path lanczosSeqFiles = new Path(outputCalc, "eigenvectors");

      solver.runJob(conf, state, overshoot, true, lanczosSeqFiles.toString());

      // perform a verification
      EigenVerificationJob verifier = new EigenVerificationJob();
      Path verifiedEigensPath = new Path(outputCalc, "eigenverifier");
      verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath, true, 1.0, clusters);
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.