Package org.apache.mahout.math

Examples of org.apache.mahout.math.VectorWritable


    FileSystem fs = FileSystem.get(conf);
    String vectorsFolder = inputDir + "/tfidf-vectors";
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(vectorsFolder + "/part-r-00000"), conf);
    List<Vector> points = new ArrayList<Vector>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();
   
    while (reader.next(key, value)) {
      points.add(value.get());
    }
    System.out.println(points.size());
    reader.close();
    List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(points, k);
    List<SoftCluster> clusters = new ArrayList<SoftCluster>();
View Full Code Here


    FileSystem fs = FileSystem.get(conf);
    String vectorsFolder = inputDir + "/tfidf-vectors";
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(vectorsFolder + "/part-00000"), conf);
    List<Vector> points = new ArrayList<Vector>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();
   
    while (reader.next(key, value)) {
      points.add(value.get());
    }
    System.out.println(points.size());
    reader.close();
  
    List<Canopy> canopies = CanopyClusterer.createCanopies(points, new CosineDistanceMeasure(), 0.7, 0.5);
View Full Code Here

    FileSystem fs = FileSystem.get(conf);
    String vectorsFolder = inputDir + "/tfidf-vectors";
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(vectorsFolder + "/part-r-00000"), conf);
    List<Vector> points = new ArrayList<Vector>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();
   
    while (reader.next(key, value)) {
      points.add(value.get());
    }
    System.out.println(points.size());
    reader.close();
    List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(points, k);
    List<Cluster> clusters = new ArrayList<Cluster>();
View Full Code Here

    RandomPointsUtil.generateSamples(sampleData, 300, 1, 0, 0.5);
    RandomPointsUtil.generateSamples(sampleData, 300, 0, 2, 0.1);

    List<VectorWritable> points = new ArrayList<VectorWritable>();
    for (Vector sd : sampleData) {
      points.add(new VectorWritable(sd));
    }

    DirichletClusterer dc = new DirichletClusterer(points,
        new GaussianClusterDistribution(new VectorWritable(
            new DenseVector(2))), 1.0, 10, 2, 2);
    List<Cluster[]> result = dc.cluster(20);
    for (Cluster cluster : result.get(result.size() - 1)) {
      System.out.println("Cluster id: " + cluster.getId() + " center: "
          + cluster.getCenter().asFormatString());
View Full Code Here

    writer.write(iter);
    writer.close();
   
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
    LongWritable key = new LongWritable();
    VectorWritable value = new VectorWritable();
    int count = 0;
    while (seqReader.next(key, value)){
      count++;
    }
    Assert.assertEquals(count + " does not equal: " + 50, 50, count);
View Full Code Here

   
    int i = 0;
    for (Vector vector : iterable) {
      Assert.assertNotNull(vector);
      System.out.println("Vector[" + i++ + "]=" + formatVector(vector));
      sampleData.add(new VectorWritable(vector));
    }
  }
View Full Code Here

      System.out.println("Model[" + m + "] had " + count + " hits (!) and " + (samples.size() - count)
                         + " misses (? in pdf order) during the last iteration:");
      MapElement[] map = new MapElement[samples.size()];
      // sort the samples by pdf
      for (int i = 0; i < samples.size(); i++) {
        VectorWritable sample = samples.get(i);
        map[i] = new MapElement(model.pdf(sample), docs[i]);
      }
      Arrays.sort(map);
      // now find the n=model.count() most likely docs and output them
      for (int i = 0; i < map.length; i++) {
View Full Code Here

      while(it.hasNext()) {
        DistributedRowMatrix.MatrixEntryWritable e = it.next();
        tmp.setQuick(e.getCol(), e.getVal());
      }
      SequentialAccessSparseVector outVector = new SequentialAccessSparseVector(tmp);
      out.collect(outRow, new VectorWritable(outVector));
    }
View Full Code Here

      testData.mkdir();
    }
    FileSystem fs = FileSystem.get(new Path("testdata").toUri(), conf);
    List<VectorWritable> points = new ArrayList<VectorWritable>();
    for (Vector v : raw)
      points.add(new VectorWritable(v));
    ClusteringTestUtils.writePointsToFile(points, "testdata/file1", fs, conf);
    ClusteringTestUtils.writePointsToFile(points, "testdata/file2", fs, conf);
    // now run the Job
    MeanShiftCanopyJob.runJob("testdata", "output", EuclideanDistanceMeasure.class.getName(), 4, 1, 0.5, 10);
    JobConf conf = new JobConf(MeanShiftCanopyDriver.class);
View Full Code Here

      }
      while(it.hasNext()) {
        Vector row = it.next().get();
        row.addTo(accumulator);
      }
      out.collect(rowNum, new VectorWritable(new SequentialAccessSparseVector(accumulator)));
    }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.VectorWritable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.