Package org.apache.mahout.math

Examples of org.apache.mahout.math.NamedVector


    @Override
    protected void reduce(LongWritable id, Iterable<VectorWritable> vectors,
            Context context) throws IOException, InterruptedException {
        for (VectorWritable nextVectorWritable : vectors) {
            NamedVector nextVectorizedDocument = (NamedVector) nextVectorWritable
                    .get();
            String docLabel = nextVectorizedDocument.getName();
            String[] parts = docLabel.split(",");
            String docName = parts[0];
            String label = parts[1];

            log.debug("Writing out " + vectorCount + "th vector of doc: "
                    + docName);
            context.write(
                    new Text(docName),
                    new VectorWritable(new NamedVector(nextVectorizedDocument
                            .getDelegate(), label)));
        }
    }
View Full Code Here


    String docLabel = ((NamedVector) nextVectorizedDocument).getName();
    String[] parts = docLabel.split(",");
    String docName = parts[0];
    String label = parts[1];

    writer.append(new Text(docName), new VectorWritable(new NamedVector(
      nextVectorizedDocument, label)));
      }
      log.info("Sequence file written to HDFS successfully. Docs written: " + index);
  } finally {
      IOUtils.closeStream(writer);
View Full Code Here

        Vector vector = new SequentialAccessSparseVector(cardinality);
        vector.assign(features);
        log.debug(String.format("Vector: label:%s Fields: %d", docLabel,
                vector.size()));

        return new NamedVector(vector, docLabel);
    }
View Full Code Here

    SequenceFile.Writer seqWriter =
        new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class, VectorWritable.class);
    IntWritable iw = new IntWritable();
    for (int i = 0; i < numEigenVectors; i++) {
      // Persist eigenvectors sorted by eigenvalues in descending order\
      NamedVector v = new NamedVector(state.getRightSingularVector(numEigenVectors - 1 - i),
          "eigenVector" + i + ", eigenvalue = " + state.getSingularValue(numEigenVectors - 1 - i));
      Writable vw = new VectorWritable(v);
      iw.set(i);
      seqWriter.append(iw, vw);
    }
View Full Code Here

        name = indexReader.document(doc, idFieldSelector).get(idField);
      } else {
        name = String.valueOf(doc);
      }
      if (normPower == LuceneIterable.NO_NORMALIZING) {
        result = new NamedVector(result, name);
      } else {
        result = new NamedVector(result.normalize(normPower), name);
      }
      return result;
    } catch (IOException ioe) {
      throw new IllegalStateException(ioe);
    }
View Full Code Here

  }

  @Test
  public void testDirichlet2() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description =
        new DistributionDescription(GaussianClusterDistribution.class.getName(),
                                    RandomAccessSparseVector.class.getName(),
                                    null,
                                    prototype.getDelegate().size());
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output, description, 15, 10, 1.0, true, true, 0, true);
    // run ClusterDumper
    ClusterDumper clusterDumper =
        new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
View Full Code Here

  }

  @Test
  public void testDirichlet3() throws Exception {
    Path output = getTestTempDirPath("output");
    NamedVector prototype = (NamedVector) sampleData.get(0).get();
    DistributionDescription description =
        new DistributionDescription(DistanceMeasureClusterDistribution.class.getName(),
                                    RandomAccessSparseVector.class.getName(),
                                    ManhattanDistanceMeasure.class.getName(),
                                    prototype.getDelegate().size());
    Configuration conf = new Configuration();
    DirichletDriver.run(conf, getTestTempDirPath("testdata"), output, description, 15, 10, 1.0, true, true, 0, true);
    // run ClusterDumper
    ClusterDumper clusterDumper =
        new ClusterDumper(finalClusterPath(conf, output, 10), new Path(output, "clusteredPoints"));
View Full Code Here

    Configuration conf = new Configuration();
    Collection<Double> newEigenValues = new ArrayList<Double>();

    int i = 0;
    for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors, conf)) {
      NamedVector v = (NamedVector) value.get();
      eigenVectors.assignRow(i, v);
      log.info(v.getName());
      if(EigenVector.getCosAngleError(v.getName()) < 1e-3) {
        eigenvalues.add(EigenVector.getEigenValue(v.getName()));
      }
      i++;
    }
    assertEquals("number of clean eigenvectors", 23, i);

    i = 0;
    for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(cleanEigenvectors2, conf)) {
      NamedVector v = (NamedVector) value.get();
      log.info(v.getName());
      eigenVectors2.assignRow(i, v);
      newEigenValues.add(EigenVector.getEigenValue(v.getName()));
      i++;
    }

    Collection<Integer> oldEigensFound = new ArrayList<Integer>();
    for(int row = 0; row < eigenVectors.numRows(); row++) {
View Full Code Here

    Iterable<Vector> iterable = new LuceneIterable(reader, "id", "content", mapper);

    i = 0;
    for (Vector vector : iterable) {
      assertNotNull(vector);
      NamedVector namedVector;
      if (vector instanceof NamedVector) {
        //rename it for testing purposes
        namedVector = new NamedVector(((NamedVector) vector).getDelegate(), "P(" + i + ')');

      } else {
        namedVector = new NamedVector(vector, "P(" + i + ')');
      }
      System.out.println(AbstractCluster.formatVector(namedVector, termDictionary));
      sampleData.add(new VectorWritable(namedVector));
      i++;
    }
View Full Code Here

    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
      vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
      VectorWritable vectorWritable = new VectorWritable(vector);
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.NamedVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.