Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

org.apache.mahout.common.distance.EuclideanDistanceMeasure
This class implements a Euclidean distance metric by summing the square root of the squared differences between each coordinate.
If you don't care about the true distance and only need the values for comparison, then the base class, {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't do the actual square root of the squared differences.

      mark.dotBenchmark();
      mark.serializeBenchmark();
      mark.deserializeBenchmark();
      mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
      mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
      mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
      
      log.info("\n{}", mark.summarize());
    } catch (OptionException e) {

View Full Code Here

      Vector point = new DenseVector(doubles.size());
      int index = 0;
      for (Double d : doubles) {
        point.set(index++, d);
      }
      MeanShiftCanopy canopy = new MeanShiftCanopy(point, nextCanopyId++, new EuclideanDistanceMeasure());
      context.write(new Text(), canopy);
    }
  }

View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      run(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55);
    }
  }

View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(), new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 0.5, 10);
    }
  }

View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(), new Path("testdata"), output, new EuclideanDistanceMeasure(), 47.6, 1, 0.5, 10);
    }
  }

View Full Code Here

      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(),
                    new Path("testdata"),
                    output,
                    new EuclideanDistanceMeasure(),
                    80, 55, 10, (float) 2, 0.5);
    }
  }

View Full Code Here


  /** Story: Test the reference implementation */
  @Test
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = getPoints(REFERENCE);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // try all possible values of k
    for (int k = 0; k < points.size(); k++) {
      System.out.println("Test k=" + (k + 1) + ':');
      // pick k initial cluster centers at random
      List<Cluster> clusters = new ArrayList<Cluster>();

View Full Code Here


  /** Story: test that the mapper will map input points to the nearest cluster */
  @Test
  public void testKMeansMapper() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      List<Cluster> clusters = new ArrayList<Cluster>();


      for (int i = 0; i < k + 1; i++) {
        Cluster cluster = new Cluster(points.get(i).get(), i, measure);
        // add the center so the centroid will be correct upon output
        cluster.observe(cluster.getCenter(), 1);
        clusters.add(cluster);
      }
      mapper.setup(clusters, measure);


      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      assertEquals("Number of map results", k + 1, mapWriter.getData().size());
      Map<String, Cluster> clusterMap = loadClusterMap(clusters);
      for (Text key : mapWriter.getKeys()) {
        AbstractCluster cluster = clusterMap.get(key.toString());
        List<ClusterObservations> values = mapWriter.getValue(key);
        for (ClusterObservations value : values) {
          double distance = measure.distance(cluster.getCenter(), value.getS1());
          for (AbstractCluster c : clusters) {
            assertTrue("distance error", distance <= measure.distance(value.getS1(), c.getCenter()));
          }
        }
      }
    }
  }

View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansCombiner() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random

View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansReducer() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      System.out.println("K = " + k);
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      List<Cluster> clusters = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i).get();
        Cluster cluster = new Cluster(vec, i, measure);
        // add the center so the centroid will be correct upon output
        // cluster.addPoint(cluster.getCenter());
        clusters.add(cluster);
      }
      mapper.setup(clusters, new EuclideanDistanceMeasure());
      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      // now combine the data

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.mahout.common.distance.EuclideanDistanceMeasure

mia.clustering.ch07.SimpleKMeansClustering

mia.clustering.ch09.CanopyExample

mia.clustering.ch09.FuzzyKMeansExample

mia.clustering.ch09.KMeansExample

mia.clustering.ch09.NewsKMeansClustering

org.apache.mahout.benchmark.VectorBenchmarks

org.apache.mahout.clustering.canopy.TestCanopyCreation

org.apache.mahout.clustering.cdbw.TestCDbwEvaluator

org.apache.mahout.clustering.ClusteringUtils

org.apache.mahout.clustering.conversion.meanshift.InputMapper

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.