Examples of EuclideanDistanceMeasure


Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    }
    System.out.printf("Generated query matrix.\n");

    for (int threads : new int[]{1, 2, 3, 4, 5, 6, 10, 20, 50}) {
      for (int block : new int[]{1, 10, 50}) {
        BruteSearch search = new BruteSearch(new EuclideanDistanceMeasure());
        search.addAll(referenceVectors);
        long t0 = System.nanoTime();
        search.search(queryVectors, block, threads);
        long t1 = System.nanoTime();
        System.out.printf("%d\t%d\t%.2f\n", threads, block, (t1 - t0) / 1e9);
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
        path, Text.class, Cluster.class);
   
    for (int i = 0; i < k; i++) {
      Vector vec = vectors.get(i);
      Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
      writer.append(new Text(cluster.getIdentifier()), cluster);
    }
    writer.close();
   
    KMeansDriver.run(conf, new Path("testdata/points"), new Path("testdata/clusters"),
      new Path("output"), new EuclideanDistanceMeasure(), 0.001, 10,
      true, false);
   
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
        new Path("output/" + Cluster.CLUSTERED_POINTS_DIR
                 + "/part-m-00000"), conf);
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(sampleData,  k);
    List<SoftCluster> clusters = new ArrayList<SoftCluster>();

    int clusterId = 0;
    for (Vector v : randomPoints) {
      clusters.add(new SoftCluster(v, clusterId++, new EuclideanDistanceMeasure()));
    }

    List<List<SoftCluster>> finalClusters = FuzzyKMeansClusterer
        .clusterPoints(sampleData, clusters,
            new EuclideanDistanceMeasure(), 0.01, 3, 10);
    for (SoftCluster cluster : finalClusters.get(finalClusters.size() - 1)) {
      System.out.println("Fuzzy Cluster id: " + cluster.getId()
          + " center: " + cluster.getCenter().asFormatString());
    }
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

        sampleData, k);
    List<Cluster> clusters = new ArrayList<Cluster>();

    int clusterId = 0;
    for (Vector v : randomPoints) {
      clusters.add(new Cluster(v, clusterId++, new EuclideanDistanceMeasure()));
    }

    List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(
        sampleData, clusters, new EuclideanDistanceMeasure(), 3, 0.01);
    for (Cluster cluster : finalClusters.get(finalClusters.size() - 1)) {
      System.out.println("Cluster id: " + cluster.getId() + " center: "
          + cluster.getCenter().asFormatString());
    }
  }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    Path vectorsFolder = new Path(outputDir, "tfidf-vectors");
    Path canopyCentroids = new Path(outputDir , "canopy-centroids");
    Path clusterOutput = new Path(outputDir , "clusters");
   
    CanopyDriver.run(vectorsFolder, canopyCentroids,
      new EuclideanDistanceMeasure(), 250, 120, false, false);
    KMeansDriver.run(conf, vectorsFolder, new Path(canopyCentroids, "clusters-0"),
      clusterOutput, new TanimotoDistanceMeasure(), 0.01,
      20, true, false);
   
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

    RandomPointsUtil.generateSamples(sampleData, 400, 1, 1, 2);
    RandomPointsUtil.generateSamples(sampleData, 300, 1, 0, 0.5);
    RandomPointsUtil.generateSamples(sampleData, 300, 0, 2, 0.1);

    List<Canopy> canopies = CanopyClusterer.createCanopies(sampleData,
        new EuclideanDistanceMeasure(), 3.0, 1.5);

    for (Canopy canopy : canopies) {
      System.out.println("Canopy id: " + canopy.getId() + " center: "
          + canopy.getCenter().asFormatString());
    }
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  /**
   * Story: User can exercise the reference implementation to verify that the test datapoints are clustered in
   * a reasonable manner.
   */
  public void testReferenceImplementation() {
    MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(new EuclideanDistanceMeasure(), 4.0,
        1.0, 0.5);
    List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
    // add all points to the canopies
    int nextCanopyId = 0;
    for (Vector aRaw : raw) {
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

        clusterList.add(cluster);
      }
      Map<String,String> pointClusterInfo = new HashMap<String,String>();
      // run reference FuzzyKmeans algorithm
      List<List<SoftCluster>> clusters = FuzzyKMeansClusterer.clusterPoints(points, clusterList,
        new EuclideanDistanceMeasure(), 0.001, 2, 2);
      computeCluster(points, clusters.get(clusters.size() - 1), new FuzzyKMeansClusterer(
          new EuclideanDistanceMeasure(), 0.001, 2), pointClusterInfo);
     
      // iterate for each point
      for (String value : pointClusterInfo.values()) {
        String clusterInfoStr = value.substring(1, value.length() - 1);
        String[] clusterInfoList = clusterInfoStr.split(" ");
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

  }
 
  /** Story: Test the reference implementation */
  public void testReferenceImplementation() throws Exception {
    List<Vector> points = getPoints(reference);
    DistanceMeasure measure = new EuclideanDistanceMeasure();
    // try all possible values of k
    for (int k = 0; k < points.size(); k++) {
      System.out.println("Test k=" + (k + 1) + ':');
      // pick k initial cluster centers at random
      List<Cluster> clusters = new ArrayList<Cluster>();
View Full Code Here

Examples of org.apache.mahout.common.distance.EuclideanDistanceMeasure

      for (VectorWritable point : points) {
        mapper.map(new Text(), point, collector, null);
      }
      assertEquals("Number of map results", k + 1, collector.getData().size());
      // now verify that all points are correctly allocated
      EuclideanDistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
      Map<String,Cluster> clusterMap = loadClusterMap(clusters);
      for (String key : collector.getKeys()) {
        Cluster cluster = clusterMap.get(key);
        List<KMeansInfo> values = collector.getValue(key);
        for (KMeansInfo value : values) {
          double distance = euclideanDistanceMeasure.distance(cluster.getCenter(), value.getPointTotal());
          for (Cluster c : clusters) {
            assertTrue("distance error", distance <= euclideanDistanceMeasure.distance(value.getPointTotal(),
              c.getCenter()));
          }
        }
      }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.