Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure


      mark.dotBenchmark();
      mark.serializeBenchmark();
      mark.deserializeBenchmark();
      mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
      mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
      mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
      mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
     
      log.info("\n{}", mark.summarize());
    } catch (OptionException e) {
View Full Code Here


      Vector point = new DenseVector(doubles.size());
      int index = 0;
      for (Double d : doubles) {
        point.set(index++, d);
      }
      MeanShiftCanopy canopy = new MeanShiftCanopy(point, nextCanopyId++, new EuclideanDistanceMeasure());
      context.write(new Text(), canopy);
    }
  }
View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      run(new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55);
    }
  }
View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(), new Path("testdata"), output, new EuclideanDistanceMeasure(), 80, 55, 0.5, 10);
    }
  }
View Full Code Here

      ToolRunner.run(new Configuration(), new Job(), args);
    } else {
      log.info("Running with default arguments");
      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(), new Path("testdata"), output, new EuclideanDistanceMeasure(), 47.6, 1, 0.5, 10);
    }
  }
View Full Code Here

      Path output = new Path("output");
      HadoopUtil.overwriteOutput(output);
      new Job().run(new Configuration(),
                    new Path("testdata"),
                    output,
                    new EuclideanDistanceMeasure(),
                    80, 55, 10, (float) 2, 0.5);
    }
  }
View Full Code Here

  /** Story: Test the reference implementation */
  @Test
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = getPoints(REFERENCE);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // try all possible values of k
    for (int k = 0; k < points.size(); k++) {
      System.out.println("Test k=" + (k + 1) + ':');
      // pick k initial cluster centers at random
      List<Cluster> clusters = new ArrayList<Cluster>();
View Full Code Here

  /** Story: test that the mapper will map input points to the nearest cluster */
  @Test
  public void testKMeansMapper() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      List<Cluster> clusters = new ArrayList<Cluster>();

      for (int i = 0; i < k + 1; i++) {
        Cluster cluster = new Cluster(points.get(i).get(), i, measure);
        // add the center so the centroid will be correct upon output
        cluster.observe(cluster.getCenter(), 1);
        clusters.add(cluster);
      }
      mapper.setup(clusters, measure);

      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      assertEquals("Number of map results", k + 1, mapWriter.getData().size());
      Map<String, Cluster> clusterMap = loadClusterMap(clusters);
      for (Text key : mapWriter.getKeys()) {
        AbstractCluster cluster = clusterMap.get(key.toString());
        List<ClusterObservations> values = mapWriter.getValue(key);
        for (ClusterObservations value : values) {
          double distance = measure.distance(cluster.getCenter(), value.getS1());
          for (AbstractCluster c : clusters) {
            assertTrue("distance error", distance <= measure.distance(value.getS1(), c.getCenter()));
          }
        }
      }
    }
  }
View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansCombiner() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansReducer() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      System.out.println("K = " + k);
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      List<Cluster> clusters = new ArrayList<Cluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i).get();
        Cluster cluster = new Cluster(vec, i, measure);
        // add the center so the centroid will be correct upon output
        // cluster.addPoint(cluster.getCenter());
        clusters.add(cluster);
      }
      mapper.setup(clusters, new EuclideanDistanceMeasure());
      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      // now combine the data
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.distance.EuclideanDistanceMeasure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.