Examples of OnlineSummarizer


Examples of org.apache.mahout.math.stats.OnlineSummarizer

  }

  public static void printSummaries(List<OnlineSummarizer> summarizers, String type, PrintWriter fileOut) {
    double maxDistance = 0;
    for (int i = 0; i < summarizers.size(); ++i) {
      OnlineSummarizer summarizer = summarizers.get(i);
      if (summarizer.getCount() == 0) {
        System.out.printf("Cluster %d is empty\n", i);
        continue;
      }
      maxDistance = Math.max(maxDistance, summarizer.getMax());
      System.out.printf("Average distance in cluster %d [%d]: %f\n", i, summarizer.getCount(), summarizer.getMean());
      // If there is just one point in the cluster, quartiles cannot be estimated. We'll just assume all the quartiles
      // equal the only value.
      boolean moreThanOne = summarizer.getCount() > 1;
      if (fileOut != null) {
        fileOut.printf("%d,%f,%f,%f,%f,%f,%f,%f,%d,%s\n", i, summarizer.getMean(),
            summarizer.getSD(),
            summarizer.getQuartile(0),
            moreThanOne ? summarizer.getQuartile(1) : summarizer.getQuartile(0),
            moreThanOne ? summarizer.getQuartile(2) : summarizer.getQuartile(0),
            moreThanOne ? summarizer.getQuartile(3) : summarizer.getQuartile(0),
            summarizer.getQuartile(4), summarizer.getCount(), type);
      }
    }
    System.out.printf("Num clusters: %d; maxDistance: %f\n", summarizers.size(), maxDistance);
  }
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

    // full distance computations as possible.  Our goal is to only do full distance computations for
    // vectors with hash distance at most as large as the searchSize biggest hash distance seen so far.

    OnlineSummarizer[] distribution = new OnlineSummarizer[BITS + 1];
    for (int i = 0; i < BITS + 1; i++) {
      distribution[i] = new OnlineSummarizer();
    }

    distanceEvaluations = 0;
   
    // We keep the counts of the hash distances here.  This lets us accurately
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

    assertEquals("Total weight not preserved", totalWeight(syntheticData.getFirst()), totalWeight(clusterer), 1.0e-9);

    // Verify that each corner of the cube has a centroid very nearby.
    // This is probably FALSE for large-dimensional spaces!
    OnlineSummarizer summarizer = new OnlineSummarizer();
    for (Vector mean : syntheticData.getSecond()) {
      WeightedThing<Vector> v = searcher.search(mean, 1).get(0);
      summarizer.add(v.getWeight());
    }
    assertTrue(String.format("Median weight [%f] too large [>%f]", summarizer.getMedian(),
        DISTRIBUTION_RADIUS), summarizer.getMedian() < DISTRIBUTION_RADIUS);

    double clusterTime = (endTime - startTime) / 1000.0;
    System.out.printf("%s\n%.2f for clustering\n%.1f us per row\n\n",
        searcher.getClass().getName(), clusterTime,
        clusterTime / syntheticData.getFirst().size() * 1.0e6);
 
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

      encoder[i] = new ConstantValueEncoder("v" + 1);
    }

    OnlineSummarizer[] s = new OnlineSummarizer[FIELDS];
    for (int i = 0; i < FIELDS; i++) {
      s[i] = new OnlineSummarizer();
    }
    long t0 = System.currentTimeMillis();
    Vector v = new DenseVector(1000);
    if ("--generate".equals(args[0])) {
      PrintWriter out =
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

      if (lmp.getTargetCategories().size() <= 2) {
        collector = new Auc();
      }

      OnlineSummarizer slh = new OnlineSummarizer();
      ConfusionMatrix cm = new ConfusionMatrix(lmp.getTargetCategories(), defaultCategory);

      State<Wrapper, CrossFoldLearner> best = lr.getBest();
      if (best == null) {
        output.println("AdaptiveLogisticRegression has not be trained probably.");
        return;
      }
      CrossFoldLearner learner = best.getPayload().getLearner();

      BufferedReader in = TrainLogistic.open(inputFile);
      String line = in.readLine();
      csv.firstLine(line);
      line = in.readLine();
      if (showScores) {
        output.println("\"target\", \"model-output\", \"log-likelihood\", \"average-likelihood\"");
      }
      while (line != null) {
        Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
        //TODO: How to avoid extra target values not shown in the training process.
        int target = csv.processLine(line, v);
        double likelihood = learner.logLikelihood(target, v);
        double score = learner.classifyFull(v).maxValue();

        slh.add(likelihood);
        cm.addInstance(csv.getTargetString(line), csv.getTargetLabel(target));       

        if (showScores) {
          output.printf(Locale.ENGLISH, "%8d, %.12f, %.13f, %.13f%n", target,
              score, learner.logLikelihood(target, v), slh.getMean());
        }
        if (collector != null) {
          collector.add(target, score);
        }
        line = in.readLine();
      }

      output.printf(Locale.ENGLISH,"\nLog-likelihood:");
      output.printf(Locale.ENGLISH, "Min=%.2f, Max=%.2f, Mean=%.2f, Median=%.2f%n",
          slh.getMin(), slh.getMax(), slh.getMean(), slh.getMedian());

      if (collector != null) {       
        output.printf(Locale.ENGLISH, "%nAUC = %.2f%n", collector.auc());
      }
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

    RandomUtils.useTestSeed();
  }

  @Test
  public void testOffset() {
    OnlineSummarizer s = new OnlineSummarizer();
    Sampler<Double> sampler = new Normal(2, 5);
    for (int i = 0; i < 10001; i++) {
      s.add(sampler.sample());
    }
    assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD());
    assertEquals(5, s.getSD(), 0.12);
  }
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

        DenseVector offset = new DenseVector(new double[]{6, 3, 0});
        MultiNormal n = new MultiNormal(
                new DenseVector(new double[]{1, 2, 5}), offset);

        OnlineSummarizer[] s = {
                new OnlineSummarizer(),
                new OnlineSummarizer(),
                new OnlineSummarizer()
        };

        OnlineSummarizer[] cross = {
                new OnlineSummarizer(),
                new OnlineSummarizer(),
                new OnlineSummarizer()
        };

        for (int i = 0; i < 10000; i++) {
            Vector v = n.sample();
            for (int j = 0; j < 3; j++) {
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer


    @Test
    public void testRadius() {
        MultiNormal gen = new MultiNormal(0.1, new DenseVector(10));
        OnlineSummarizer s = new OnlineSummarizer();
        for (int i = 0; i < 10000; i++) {
            double x = gen.sample().norm(2) / Math.sqrt(10);
            s.add(x);
        }
        assertEquals(0.1, s.getMean(), 0.01);

    }
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

    System.out.printf("speedup,q1,q2,q3\n");

    for (int i = 0; i < 12; i++) {
      double strategy = (i - 1.0) / 10.0;
      cut.setRaiseHashLimitStrategy(strategy);
      OnlineSummarizer t1 = evaluateStrategy(testData, ref, cut);
      int evals = cut.resetEvaluationCount();
      final double speedup = 10.0e6 / evals;
      System.out.printf("%.1f,%.2f,%.2f,%.2f\n", speedup, t1.getQuartile(1),
          t1.getQuartile(2), t1.getQuartile(3));
      assertTrue(t1.getQuartile(2) > 0.45);
      assertTrue(speedup > 4 || t1.getQuartile(2) > 0.9);
      assertTrue(speedup > 15 || t1.getQuartile(2) > 0.8);
    }
  }
View Full Code Here

Examples of org.apache.mahout.math.stats.OnlineSummarizer

    }
  }

  private static OnlineSummarizer evaluateStrategy(Matrix testData, BruteSearch ref,
                                                   LocalitySensitiveHashSearch cut) {
    OnlineSummarizer t1 = new OnlineSummarizer();

    for (int i = 0; i < 100; i++) {
      final Vector q = testData.viewRow(i);
      List<WeightedThing<Vector>> v1 = cut.search(q, 150);
      BitSet b1 = new BitSet();
      for (WeightedThing<Vector> v : v1) {
        b1.set(((WeightedVector)v.getValue()).getIndex());
      }

      List<WeightedThing<Vector>> v2 = ref.search(q, 100);
      BitSet b2 = new BitSet();
      for (WeightedThing<Vector> v : v2) {
        b2.set(((WeightedVector)v.getValue()).getIndex());
      }

      b1.and(b2);
      t1.add(b1.cardinality());
    }
    return t1;
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.