Package org.apache.mahout.common.distance

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure


    clusterDumper.printClusters(termDictionary);
  }

  @Test
  public void testFuzzyKmeans() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // now run the Canopy job to prime kMeans canopies
    Path output = getTestTempDirPath("output");
    Configuration conf = new Configuration();
    CanopyDriver.run(conf, getTestTempDirPath("testdata"), output, measure, 8, 4, false, false);
    // now run the Fuzzy KMeans job
View Full Code Here


    clusterDumper.printClusters(termDictionary);
  }

  @Test
  public void testKmeansSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 15;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration conf = new Configuration();
View Full Code Here

    clusterDumper.printClusters(termDictionary);
  }

  @Test
  public void testKmeansDSVD() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 13;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
View Full Code Here

    clusterDumper.printClusters(termDictionary);
  }

  @Test
  public void testKmeansDSVD2() throws Exception {
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    Path output = getTestTempDirPath("output");
    Path tmp = getTestTempDirPath("tmp");
    int desiredRank = 13;
    DistributedLanczosSolver solver = new DistributedLanczosSolver();
    Configuration config = new Configuration();
View Full Code Here

  }

  @Test
  public void testDMClusterDistribution2() {
    DistanceMeasureClusterDistribution dist =
        new DistanceMeasureClusterDistribution(new VectorWritable(new DenseVector(2)), new EuclideanDistanceMeasure());
    String json = dist.asJsonString();
    GsonBuilder builder = new GsonBuilder();
    builder.registerTypeAdapter(ModelDistribution.class, new JsonModelDistributionAdapter());
    builder.registerTypeAdapter(DistanceMeasure.class, new JsonDistanceMeasureAdapter());
    Gson gson = builder.create();
View Full Code Here

  }

  public static void main(String[] args) throws Exception {
    t1 = 1.5;
    t2 = 0.5;
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    significance = 0.02;

    Path samples = new Path("samples");
    Path output = new Path("output");
    HadoopUtil.overwriteOutput(samples);
View Full Code Here

  }

  @Test
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = TestKmeansClustering.getPoints(TestKmeansClustering.REFERENCE);
    EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
    for (int k = 0; k < points.size(); k++) {
      System.out.println("test k= " + k);

      List<SoftCluster> clusterList = Lists.newArrayList();
      // pick k initial cluster centers at random
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();
      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();
      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");
View Full Code Here

      }

      // run mapper
      FuzzyKMeansMapper mapper = new FuzzyKMeansMapper();
      mapper.config(clusterList);
      DistanceMeasure measure = new EuclideanDistanceMeasure();

      Configuration conf = new Configuration();
      conf.set(FuzzyKMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass().getName());
      conf.set(FuzzyKMeansConfigKeys.CLUSTER_CONVERGENCE_KEY, "0.001");
      conf.set(FuzzyKMeansConfigKeys.M_KEY, "2");
      conf.set(FuzzyKMeansConfigKeys.EMIT_MOST_LIKELY_KEY, "true");
      conf.set(FuzzyKMeansConfigKeys.THRESHOLD_KEY, "0");

      DummyRecordWriter<Text, ClusterObservations> mapWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations>.Context mapContext =
          DummyRecordWriter.build(mapper, conf, mapWriter);
      mapper.setup(mapContext);
      for (VectorWritable point : points) {
        mapper.map(new Text(), point, mapContext);
      }

      // run combiner
      FuzzyKMeansCombiner combiner = new FuzzyKMeansCombiner();
      DummyRecordWriter<Text, ClusterObservations> combinerWriter = new DummyRecordWriter<Text, ClusterObservations>();
      Reducer<Text, ClusterObservations, Text, ClusterObservations>.Context combinerContext =
          DummyRecordWriter.build(combiner, conf, combinerWriter, Text.class, ClusterObservations.class);
      combiner.setup(combinerContext);
      for (Text key : mapWriter.getKeys()) {
        List<ClusterObservations> values = mapWriter.getValue(key);
        combiner.reduce(new Text(key), values, combinerContext);
      }

      // run reducer
      FuzzyKMeansReducer reducer = new FuzzyKMeansReducer();
      DummyRecordWriter<Text, SoftCluster> reducerWriter = new DummyRecordWriter<Text, SoftCluster>();
      Reducer<Text, ClusterObservations, Text, SoftCluster>.Context reducerContext =
          DummyRecordWriter.build(reducer, conf, reducerWriter, Text.class, ClusterObservations.class);
      reducer.setup(clusterList, conf);

      for (Text key : combinerWriter.getKeys()) {
        List<ClusterObservations> values = combinerWriter.getValue(key);
        reducer.reduce(new Text(key), values, reducerContext);
      }

      // run clusterMapper
      Collection<SoftCluster> reducerClusters = Lists.newArrayList();
      for (Text key : reducerWriter.getKeys()) {
        List<SoftCluster> values = reducerWriter.getValue(key);
        reducerClusters.add(values.get(0));
      }
      for (SoftCluster softCluster : reducerClusters) {
        softCluster.computeParameters();
      }

      FuzzyKMeansClusterMapper clusterMapper = new FuzzyKMeansClusterMapper();
      DummyRecordWriter<IntWritable, WeightedVectorWritable> clusterWriter =
          new DummyRecordWriter<IntWritable, WeightedVectorWritable>();
      Mapper<WritableComparable<?>, VectorWritable, IntWritable, WeightedVectorWritable>.Context clusterContext =
          DummyRecordWriter.build(clusterMapper, conf, clusterWriter);
      clusterMapper.setup(reducerClusters, conf);

      for (VectorWritable point : points) {
        clusterMapper.map(new Text(), point, clusterContext);
      }

      // compute the reference result after one iteration and compare
      List<SoftCluster> reference = Lists.newArrayList();
      for (int i = 0; i < k + 1; i++) {
        Vector vec = tweakValue(points.get(i).get());
        reference.add(new SoftCluster(vec, i, measure));
      }
      Map<Integer, List<WeightedVectorWritable>> refClusters = Maps.newHashMap();
      Collection<Vector> pointsVectors = Lists.newArrayList();
      for (VectorWritable point : points) {
        pointsVectors.add(point.get());
      }

      List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(pointsVectors,
                                                                            reference,
                                                                            new EuclideanDistanceMeasure(),
                                                                            0.001,
                                                                            2,
                                                                            1);

      computeCluster(pointsVectors, clusters.get(clusters.size() - 1),
                     new FuzzyKMeansClusterer(new EuclideanDistanceMeasure(), 0.001, 2), refClusters);

      // Now compare the clustermapper results with reference implementation
      assertEquals("mapper and reference sizes", refClusters.size(), clusterWriter.getKeys().size());
      for (Map.Entry<Integer, List<WeightedVectorWritable>> entry : refClusters.entrySet()) {
        int key = entry.getKey();
View Full Code Here

TOP

Related Classes of org.apache.mahout.common.distance.EuclideanDistanceMeasure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.