Examples of EuclideanDistanceMeasure


Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

      }
      List<Vector> pointsVectors = new ArrayList<Vector>();
      for(VectorWritable point : points)
        pointsVectors.add(point.get());
     
      DistanceMeasure measure = new EuclideanDistanceMeasure();
      FuzzyKMeansClusterer clusterer = new FuzzyKMeansClusterer(measure, 0.001, 2);
      FuzzyKMeansClusterer.runFuzzyKMeansIteration(pointsVectors, reference, clusterer);
     
      for (SoftCluster key : reference) {
        String clusterId = key.getIdentifier();
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

      List<Vector> pointsVectors = new ArrayList<Vector>();
      for(VectorWritable point : points)
        pointsVectors.add(point.get());
     
      List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(pointsVectors, reference,
        new EuclideanDistanceMeasure(), 0.001, 2, 1);
      computeCluster(pointsVectors, clusters.get(clusters.size() - 1), new FuzzyKMeansClusterer(
          new EuclideanDistanceMeasure(), 0.001, 2), pointClusterInfo);
     
      // Now compare the clustermapper results with reducer
      for (String key : clusterMapperCollector.getKeys()) {
        List<FuzzyKMeansOutput> value = clusterMapperCollector.getValue(key);
       
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

   * Story: test that the reducer will sum the partial cluster totals for all of the clusters and points that
   * it sees
   */
  public void testKMeansReducer() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
    JobConf conf = new JobConf();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY,
      "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  }
 
  /** Story: User can cluster points without instantiating them all in memory at once */
  public void testIterativeEuclidean() throws Exception {
    List<Vector> points = getPoints(raw);
    List<Canopy> canopies = CanopyClusterer.createCanopies(points, new EuclideanDistanceMeasure(), 3.1, 2.1);
   
    System.out.println("testIterativeEuclidean");
    printCanopies(canopies);
    verifyEuclideanCanopies(canopies);
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

      mark.incrementalCreateBenchmark();
      mark.cloneBenchmark();
      mark.dotBenchmark();
      mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
      mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
      //mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
      mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
     
      log.info("\n{}", mark.summarize());
    } catch (OptionException e) {
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    RandomUtils.useTestSeed();
    DisplayDirichlet.generateSamples();
    List<Vector> points = new ArrayList<Vector>();
    for (VectorWritable sample : sampleData)
      points.add(sample.get());
    canopies = MeanShiftCanopyClusterer.clusterPoints(points, new EuclideanDistanceMeasure(), 0.5, 1.0, 0.05,
      10);
    for (MeanShiftCanopy canopy : canopies) {
      log.info(canopy.toString());
    }
    new DisplayMeanShift();
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

   * optimal k-means solution (given good starting points).
   *
   * @param datapoints          Rows containing WeightedVectors
   */
  private void iterativeAssignment(List<? extends WeightedVector> datapoints) {
    DistanceMeasure l2 = new EuclideanDistanceMeasure();
    // closestClusterDistances.get(i) is the distance from the i'th cluster to its closest
    // neighboring cluster.
    List<Double> closestClusterDistances = Lists.newArrayListWithExpectedSize(numClusters);
    // clusterAssignments[i] == j means that the i'th point is assigned to the j'th cluster. When
    // these don't change, we are done.
    List<Integer> clusterAssignments = Lists.newArrayListWithExpectedSize(datapoints.size());
    // Each point is assigned to the invalid "-1" cluster initially.
    for (int i = 0; i < datapoints.size(); ++i) {
      clusterAssignments.add(-1);
    }

    boolean changed = true;
    for (int i = 0; changed && i < maxNumIterations; i++) {
      // We compute what the distance between each cluster and its closest neighbor is to set a
      // proportional distance threshold for points that should be involved in calculating the
      // centroid.
      closestClusterDistances.clear();
      for (Vector center : centroids) {
        Vector closestOtherCluster = centroids.search(center, 2).get(1).getValue();
        closestClusterDistances.add(l2.distance(center, closestOtherCluster));
      }

      // Copies the current cluster centroids to newClusters and sets their weights to 0. This is
      // so we calculate the new centroids as we go through the datapoints.
      List<Centroid> newCentroids = Lists.newArrayList();
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  /**
   * Calls estimateDistanceCutoff(data, EuclideanDistance, sampleLimit).
   * @see DataUtils#estimateDistanceCutoff(Iterable, org.apache.mahout.common.distance.DistanceMeasure, int)
   */
  public static double estimateDistanceCutoff(Iterable<? extends Vector> data, int sampleLimit) {
    return estimateDistanceCutoff(data, new EuclideanDistanceMeasure(), sampleLimit);
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  /**
   * Calls estimateDistanceCutoff(data, EuclideanDistanceMeasure, 100).
   * @see DataUtils#estimateDistanceCutoff(Iterable, org.apache.mahout.common.distance.DistanceMeasure, int)
   */
  public static double estimateDistanceCutoff(Iterable<? extends Vector> data) {
    return estimateDistanceCutoff(data, new EuclideanDistanceMeasure(), 100);
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  }

  @Test
  public void testRepresentativePoints() throws Exception {
    ClusteringTestUtils.writePointsToFile(referenceData, new Path(testdata, "file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    // run using MR reference point calculation
    CanopyDriver.run(conf, testdata, output, measure, 3.1, 1.1, true, true);
    int numIterations = 2;
    Path clustersIn = new Path(output, "clusters-0");
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.