Examples of StreamingKMeans


Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        estimatePoints.add(datapointsIterator.next());
      }
      estimateDistanceCutoff = ClusteringUtils.estimateDistanceCutoff(estimatePoints, searcher.getDistanceMeasure());
    }

    StreamingKMeans clusterer = new StreamingKMeans(searcher, numClusters, estimateDistanceCutoff);
    while (datapointsIterator.hasNext()) {
      clusterer.cluster(datapointsIterator.next());
    }
    clusterer.reindexCentroids();

    return clusterer;
  }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
      estimateDistanceCutoff = true;
      estimatePoints = Lists.newArrayList();
    }
    // There is no way of estimating the distance cutoff unless we have some data.
    clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
  }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) {
      mapperCentroids.add(pair.getSecond().getCentroid());
    }

    // Clusters the data using local batch StreamingKMeans.
    StreamingKMeans batchClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF);
    batchClusterer.cluster(syntheticData.getFirst());
    List<Centroid> batchCentroids = Lists.newArrayList();
    for (Vector v : batchClusterer) {
      batchCentroids.add((Centroid) v);
    }

    // Clusters the data using point by point StreamingKMeans.
    StreamingKMeans perPointClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);
    for (Centroid datapoint : syntheticData.getFirst()) {
      perPointClusterer.cluster(datapoint);
    }
    List<Centroid> perPointCentroids = Lists.newArrayList();
    for (Vector v : perPointClusterer) {
      perPointCentroids.add((Centroid) v);
    }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        ReduceDriver.newReduceDriver(new StreamingKMeansReducer());
    Configuration configuration = reduceDriver.getConfiguration();
    configure(configuration);

    System.out.printf("%s reducer test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION));
    StreamingKMeans clusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR .searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);

    long start = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long end = System.currentTimeMillis();

    System.out.printf("%f [s]\n", (end - start) / 1000.0);
    List<CentroidWritable> reducerInputs = Lists.newArrayList();
    int postMapperTotalWeight = 0;
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        log.info("Estimated Points: {}", estimatePoints.size());
      }
      estimateDistanceCutoff = ClusteringUtils.estimateDistanceCutoff(estimatePoints, searcher.getDistanceMeasure());
    }

    StreamingKMeans streamingKMeans = new StreamingKMeans(searcher, numClusters, estimateDistanceCutoff);

    // datapointsIterator could be empty if no estimate distance was initially provided
    // hence creating the iterator again here for the clustering
    if (!dataPointsIterator.hasNext()) {
      dataPointsIterator = dataPoints.iterator();
    }

    while (dataPointsIterator.hasNext()) {
      streamingKMeans.cluster(dataPointsIterator.next());
    }

    streamingKMeans.reindexCentroids();
    return streamingKMeans;
  }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
      estimateDistanceCutoff = true;
      estimatePoints = Lists.newArrayList();
    }
    // There is no way of estimating the distance cutoff unless we have some data.
    clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
  }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

    for (org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable> pair : mapDriver.run()) {
      mapperCentroids.add(pair.getSecond().getCentroid());
    }

    // Clusters the data using local batch StreamingKMeans.
    StreamingKMeans batchClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            mapDriver.getConfiguration().getInt("estimatedNumMapClusters", -1), DISTANCE_CUTOFF);
    batchClusterer.cluster(syntheticData.getFirst());
    List<Centroid> batchCentroids = Lists.newArrayList();
    for (Vector v : batchClusterer) {
      batchCentroids.add((Centroid) v);
    }

    // Clusters the data using point by point StreamingKMeans.
    StreamingKMeans perPointClusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);
    for (Centroid datapoint : syntheticData.getFirst()) {
      perPointClusterer.cluster(datapoint);
    }
    List<Centroid> perPointCentroids = Lists.newArrayList();
    for (Vector v : perPointClusterer) {
      perPointCentroids.add((Centroid) v);
    }
View Full Code Here

Examples of org.apache.mahout.clustering.streaming.cluster.StreamingKMeans

        ReduceDriver.newReduceDriver(new StreamingKMeansReducer());
    Configuration configuration = reduceDriver.getConfiguration();
    configure(configuration);

    System.out.printf("%s reducer test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION));
    StreamingKMeans clusterer =
        new StreamingKMeans(StreamingKMeansUtilsMR .searcherFromConfiguration(configuration),
            (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF);

    long start = System.currentTimeMillis();
    clusterer.cluster(syntheticData.getFirst());
    long end = System.currentTimeMillis();

    System.out.printf("%f [s]\n", (end - start) / 1000.0);
    List<CentroidWritable> reducerInputs = Lists.newArrayList();
    int postMapperTotalWeight = 0;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.