Package com.aamend.hadoop.clustering.cluster

Examples of com.aamend.hadoop.clustering.cluster.Cluster


    }

    @Override
    protected void map(Text key, CanopyWritable value, Context context) throws IOException, InterruptedException {

        Cluster cluster = value.get();
        int[] point = cluster.getCenter();
        boolean stronglyBound = false;
        for (Cluster canopy : canopies) {
            double dist = measure.distance(canopy.getCenter(), point);
            if (dist < t1) {
                KEY.set(Arrays.toString(canopy.getCenter()));
                Cluster newCluster;
                if (dist < t2) {
                    newCluster = new Canopy(canopy.getId(), point, value.get().getNum());
                    LOGGER.debug("Adding (T2) {} to Cluster {}", Arrays.toString(point),
                            Arrays.toString(canopy.getCenter()));
                } else {
                    newCluster = new Canopy(canopy.getId(), point, 0L);
                    LOGGER.debug("Adding (T1) {} to Cluster {}", Arrays.toString(point),
                            Arrays.toString(canopy.getCenter()));
                }

                context.write(KEY, new CanopyWritable(newCluster));
            }

            stronglyBound = stronglyBound || dist < t2;
        }
        if (!stronglyBound) {
            nextCanopyId++;
            Cluster canopy = new Canopy(nextCanopyId, point, value.get().getNum());
            canopies.add(canopy);
            LOGGER.debug("Creating a new Cluster {}", canopy.asFormattedString());
            KEY.set(Arrays.toString(canopy.getCenter()));
            context.write(KEY, value);
        }

    }
View Full Code Here


        boolean stronglyBound = false;
        for (Cluster canopy : canopies) {
            double dist = measure.distance(canopy.getCenter(), point);
            if (dist < t1) {
                KEY.set(Arrays.toString(canopy.getCenter()));
                Cluster newCluster;
                if (dist < t2) {
                    newCluster = new Canopy(canopy.getId(), point, 1L);
                    LOGGER.debug("Adding (T2) {} to Cluster center {}", Arrays.toString((int[]) value.get()),
                            Arrays.toString(canopy.getCenter()));
                } else {
                    newCluster = new Canopy(canopy.getId(), point, 0L);
                    LOGGER.debug("Adding (T1) {} to Cluster center {}", Arrays.toString((int[]) value.get()),
                            Arrays.toString(canopy.getCenter()));
                }
                context.write(KEY, new CanopyWritable(newCluster));
            }
            stronglyBound = stronglyBound || dist < t2;
        }
        if (!stronglyBound) {
            nextCanopyId++;
            Cluster canopy = new Canopy(nextCanopyId, point, 1L);
            LOGGER.debug("Creating a new Cluster {}", canopy.asFormattedString());
            canopies.add(canopy);
            KEY.set(Arrays.toString(canopy.getCenter()));
            context.write(KEY, new CanopyWritable(canopy));
        }

    }
View Full Code Here

        // Try to find a center that could minimize all data points
        List<int[]> points = new ArrayList<int[]>();

        long obs = 0L;
        Cluster clusterTemplate = null;
        for (CanopyWritable value : values) {
            if (clusterTemplate == null) {
                clusterTemplate = value.get();
            } else {
                obs += value.get().getNum();
                points.add(value.get().getCenter());
            }
        }

        // Increment number of observations for this cluster
        clusterTemplate.observe(obs);

        if (lastIteration) {
            if (clusterTemplate.getNum() < minObservations) {
                context.getCounter(COUNTER, COUNTER_REJECTED_CANOPY).increment(1L);
                return;
            }
        }

        LOGGER.info("Minimizing distance across {} data points in cluster center {}",
                points.size(), Arrays.toString(clusterTemplate.getCenter()));

        clusterTemplate.computeCenter(points, measure);
        nextCanopyId++;
        Cluster newCluster = new Canopy(nextCanopyId, clusterTemplate.getCenter(), clusterTemplate.getNum());
        context.getCounter(COUNTER, COUNTER_CANOPY).increment(1L);
        context.write(KEY, new CanopyWritable(newCluster));

    }
View Full Code Here

                                .newInstance(reader.getValueClass(), conf);

                int i = 0;
                while (reader.next(key, value)) {
                    i++;
                    Cluster cluster = value.get();
                    clusters.add(cluster);
                }

                IOUtils.closeStream(reader);
            }
View Full Code Here

            throws IOException, InterruptedException {

        // Get distance from that point to any cluster center
        double[] pdf = new double[clusters.size()];
        for (int i = 0; i < clusters.size(); i++) {
            Cluster cluster = clusters.get(i);
            pdf[i] = cluster.pdf((int[]) value.get(), measure);
        }

        // Get the cluster with smallest distance to that point
        double maxSimilarity = pdf[0];
        int maxSimilarityId = 0;
        for (int i = 1; i < pdf.length; i++) {
            if (pdf[i] > maxSimilarity) {
                maxSimilarity = pdf[i];
                maxSimilarityId = i;
            }
        }

        if (maxSimilarity < minSimilarity) {
            // Point could not be added to any cluster
            context.getCounter(COUNTER, COUNTER_NON_CLUSTERED).increment(1L);
            return;
        }

        // Point has been added to that cluster
        context.getCounter(COUNTER, COUNTER_CLUSTERED).increment(1L);
        Cluster cluster = clusters.get(maxSimilarityId);

        KEY.set(cluster.getId());
        context.write(KEY, new ObjectWritable(key));

    }
View Full Code Here

TOP

Related Classes of com.aamend.hadoop.clustering.cluster.Cluster

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.