Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure


   * Story: User can exercise the reference implementation to verify that the test datapoints are clustered in
   * a reasonable manner.
   */
  @Test
  public void testReferenceImplementation() {
    MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(new EuclideanDistanceMeasure(), 4.0, 1.0, 0.5);
    List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
    // add all points to the canopies
    int nextCanopyId = 0;
    for (Vector aRaw : raw) {
      clusterer.mergeCanopy(new MeanShiftCanopy(aRaw, nextCanopyId++, euclideanDistanceMeasure), canopies);
View Full Code Here


  }

  @Test
  public void testFuzzyKmeans() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // now run the Canopy job to prime kMeans canopies
    CanopyDriver.run(new Configuration(), testdata, output, measure, 3.1, 2.1, false, true);
    // now run the KMeans job
    FuzzyKMeansDriver.run(testdata, new Path(output, "clusters-0"), output, measure, 0.001, 10, 2, true, true, 0, true);
    int numIterations = 10;
View Full Code Here

  }

  @Test
  public void testMeanShift() throws Exception {
    ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    new MeanShiftCanopyDriver().run(conf, testdata, output, measure, 2.1, 1.0, 0.001, 10, false, true, true);
    int numIterations = 10;
    Path clustersIn = new Path(output, "clusters-2");
    RepresentativePointsDriver.run(conf, clustersIn, new Path(output, "clusteredPoints"), output, measure, numIterations, true);
    CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
View Full Code Here

    Path clustersIn = new Path(output, "clusters-0");
    RepresentativePointsDriver.run(conf,
                                   clustersIn,
                                   new Path(output, "clusteredPoints"),
                                   output,
                                   new EuclideanDistanceMeasure(),
                                   numIterations,
                                   true);
    CDbwEvaluator evaluator = new CDbwEvaluator(conf, clustersIn);
    //printRepPoints(numIterations);
    // now print out the Results
View Full Code Here

  }

  @Test
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = TestKmeansClustering.getPoints(TestKmeansClustering.REFERENCE);
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    for (int k = 0; k < points.size(); k++) {
      System.out.println("test k= " + k);

      List<SoftCluster> clusterList = new ArrayList<SoftCluster>();
      // pick k initial cluster centers at random
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();
      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();
      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();

      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");

      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext =
          DummyRecordWriter.build(mapper, conf, mapWriter);
      mapper.setup(mapContext);
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }

      // run combiner
      FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
      DummyRecordWriter<Text, ClusterObservations> combinerWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Reducer<Text, ClusterObservations, Text, ClusterObservations>.Context combinerContext =
          DummyRecordWriter.build(combiner, conf, combinerWriter, Text.class, ClusterObservations.class);
      combiner.setup(combinerContext);
      for (Text key : mapWriter.getKeys()) {
        List<ClusterObservations> values = mapWriter.getValue(key);
        combiner.reduce(new Text(key), values, combinerContext);
      }

      // run reducer
      FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
      DummyRecordWriter<Text, SoftCluster> reducerWriter = new DummyRecordWriter<Text, SoftCluster>();
      Reducer<Text, ClusterObservations, Text, SoftCluster>.Context reducerContext =
          DummyRecordWriter.build(reducer, conf, reducerWriter, Text.class, ClusterObservations.class);
      reducer.setup(clusterList, conf);

      for (Text key : combinerWriter.getKeys()) {
        List<ClusterObservations> values = combinerWriter.getValue(key);
        reducer.reduce(new Text(key), values, reducerContext);
      }

      // run clusterMapper
      List<SoftCluster> reducerClusters = new ArrayList<SoftCluster>();
      for (Text key : reducerWriter.getKeys()) {
        List<SoftCluster> values = reducerWriter.getValue(key);
        reducerClusters.add(values.get(0));
      }
      for (SoftCluster softCluster : reducerClusters) {
        softCluster.computeParameters();
      }

      FuzzyKMeansClusterMapper clusterMapper = new FuzzyKMeansClusterMapper();
      DummyRecordWriter<IntWritable, WeightedVectorWritable> clusterWriter = new DummyRecordWriter<IntWritable, WeightedVectorWritable>();
      Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context clusterContext =
          DummyRecordWriter.build(clusterMapper, conf, clusterWriter);
      clusterMapper.setup(reducerClusters, conf);

      for (VectorWritable point : points) {
        clusterMapper.map(new Text(), point, clusterContext);
      }

      // compute the reference result after one iteration and compare
      List<SoftCluster> reference = new ArrayList<SoftCluster>();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = tweakValue(points.get(i).get());
        reference.add(new SoftCluster(vec, i, measure));
      }
      Map<Integer, List<WeightedVectorWritable>> refClusters = new HashMap<Integer, List<WeightedVectorWritable>>();
      Collection<Vector> pointsVectors = new ArrayList<Vector>();
      for (VectorWritable point : points) {
        pointsVectors.add(point.get());
      }

      List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(pointsVectors,
                                                                            reference,
                                                                            new EuclideanDistanceMeasure(),
                                                                            0.001,
                                                                            2,
                                                                            1);

      computeCluster(pointsVectors, clusters.get(clusters.size() - 1), new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(),
                                                                                                0.001,
                                                                                                2), refClusters);

      // Now compare the clustermapper results with reference implementation
      assertEquals("mapper and reference sizes", refClusters.size(), clusterWriter.getKeys().size());
View Full Code Here

    return null;
  }

  @Test
  public void testCanopy() throws Exception { // now run the Job
    DistanceMeasure measure = new EuclideanDistanceMeasure();

    Path output = getTestTempDirPath("output");
    CanopyDriver.run(new Configuration(), getTestTempDirPath("testdata"), output, measure, 8, 4, true, false);
    // run ClusterDumper
    ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-0"), new Path(output, "clusteredPoints"));
View Full Code Here

    clusterDumper.printClusters(termDictionary);
  }

  @Test
  public void testKmeans() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // now run the Canopy job to prime kMeans canopies
    Path output = getTestTempDirPath("output");
    Configuration conf = new Configuration();
    CanopyDriver.run(conf, getTestTempDirPath("testdata"), output, measure, 8, 4, false, false);
    // now run the KMeans job
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.distance.EuclideanDistanceMeasure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.