Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure


  /** Story: Test the reference implementation */
  @Test
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = getPoints(REFERENCE);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // try all possible values of k
    for (int k = 0; k < points.size(); k++) {
      System.out.println("Test k=" + (k + 1) + ':');
      // pick k initial cluster centers at random
      List<Cluster> clusters = Lists.newArrayList();
View Full Code Here


   * test datapoints are clustered in a reasonable manner.
   */
  @Test
  public void testReferenceImplementation() {
    MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(
        new EuclideanDistanceMeasure(), new TriangularKernelProfile(), 4.0,
        1.0, 0.5, true);
    List<MeanShiftCanopy> canopies = Lists.newArrayList();
    // add all points to the canopies
    int nextCanopyId = 0;
    for (Vector aRaw : raw) {
View Full Code Here

  /** Story: test that the mapper will map input points to the nearest cluster */
  @Test
  public void testKMeansMapper() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      Collection<Cluster> clusters = Lists.newArrayList();

      for (int i = 0; i < k + 1; i++) {
        Cluster cluster = new Cluster(points.get(i).get(), i, measure);
        // add the center so the centroid will be correct upon output
        cluster.observe(cluster.getCenter(), 1);
        clusters.add(cluster);
      }
      mapper.setup(clusters, measure);

      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      assertEquals("Number of map results", k + 1, mapWriter.getData().size());
      Map<String, Cluster> clusterMap = loadClusterMap(clusters);
      for (Text key : mapWriter.getKeys()) {
        AbstractCluster cluster = clusterMap.get(key.toString());
        List<ClusterObservations> values = mapWriter.getValue(key);
        for (ClusterObservations value : values) {
          double distance = measure.distance(cluster.getCenter(), value.getS1());
          for (AbstractCluster c : clusters) {
            assertTrue("distance error", distance <= measure.distance(value.getS1(), c.getCenter()));
          }
        }
      }
    }
  }
View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansCombiner() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      // pick k initial cluster centers at random
View Full Code Here

   * it sees
   */
  @Test
  public void testKMeansReducer() throws Exception {
    KMeansMapper mapper = new KMeansMapper();
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    Configuration conf = new Configuration();
    conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
    conf.set(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
    conf.set(KMeansConfigKeys.CLUSTER_PATH_KEY, "");
    List<VectorWritable> points = getPointsWritable(REFERENCE);
    for (int k = 0; k < points.size(); k++) {
      System.out.println("K = " + k);
      // pick k initial cluster centers at random
      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext = DummyRecordWriter
          .build(mapper, conf, mapWriter);
      Collection<Cluster> clusters = Lists.newArrayList();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = points.get(i).get();
        Cluster cluster = new Cluster(vec, i, measure);
        // add the center so the centroid will be correct upon output
        // cluster.addPoint(cluster.getCenter());
        clusters.add(cluster);
      }
      mapper.setup(clusters, new EuclideanDistanceMeasure());
      // map the data
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }
      // now combine the data
View Full Code Here

  }

  /** Story: User wishes to run kmeans job on reference data */
  @Test
  public void testKMeansSeqJob() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    List<VectorWritable> points = getPointsWritable(REFERENCE);

    Path pointsPath = getTestTempDirPath("points");
    Path clustersPath = getTestTempDirPath("clusters");
    Configuration conf = new Configuration();
View Full Code Here

  }

  /** Story: User wishes to run kmeans job on reference data */
  @Test
  public void testKMeansMRJob() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    List<VectorWritable> points = getPointsWritable(REFERENCE);

    Path pointsPath = getTestTempDirPath("points");
    Path clustersPath = getTestTempDirPath("clusters");
    Configuration conf = new Configuration();
View Full Code Here

    // now run the KMeans job
    KMeansDriver.run(pointsPath,
                     new Path(outputPath, "clusters-0-final"),
                     outputPath,
                     new EuclideanDistanceMeasure(),
                     0.001,
                     10,
                     true,
                     false);
View Full Code Here

    return null;
  }
 
  @Test
  public void testCanopy() throws Exception { // now run the Job
    DistanceMeasure measure = new EuclideanDistanceMeasure();
   
    Path output = getTestTempDirPath("output");
    CanopyDriver.run(new Configuration(), getTestTempDirPath("testdata"),
        output, measure, 8, 4, true, true);
    // run ClusterDumper
View Full Code Here

    clusterDumper.printClusters(termDictionary);
  }
 
  @Test
  public void testKmeans() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // now run the Canopy job to prime kMeans canopies
    Path output = getTestTempDirPath("output");
    Configuration conf = new Configuration();
    CanopyDriver.run(conf, getTestTempDirPath("testdata"), output, measure, 8,
        4, false, true);
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.distance.EuclideanDistanceMeasure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.