Package org.apache.mahout.math

Examples of org.apache.mahout.math.RandomAccessSparseVector


    if (!it.hasNext()) {
      return;
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);

      do {
        String term = (sf.getAttribute(TermAttribute.class)).term();
        if (term.length() > 0 && dictionary.containsKey(term)) { // ngram
          int termId = dictionary.get(term);
          vector.setQuick(termId, vector.getQuick(termId) + 1);
        }
      } while (sf.incrementToken());

      sf.end();
      sf.close();
    } else {
      for (String term : value.getEntries()) {
        if (term.length() > 0 && dictionary.containsKey(term)) { // unigram
          int termId = dictionary.get(term);
          vector.setQuick(termId, vector.getQuick(termId) + 1);
        }
      }
    }
    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }
   
    if (namedVector) {
      vector = new NamedVector(vector, key.toString());
    }
   
    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
      VectorWritable vectorWritable = new VectorWritable(vector);
      context.write(key, vectorWritable);
    } else {
      context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
View Full Code Here


      throws IOException, InterruptedException {
     
      // set up the return value and perform the computations
      double norm = vectorNorm(vector.get());
      Vector w = vector.get().assign(Functions.div(norm));
      RandomAccessSparseVector out = new RandomAccessSparseVector(w);
     
      // finally write the output
      context.write(row, new VectorWritable(out));
    }
View Full Code Here

  @Override
  protected void reduce(VarLongWritable userID,
                        Iterable<VarLongWritable> itemPrefs,
                        Context context) throws IOException, InterruptedException {
    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (VarLongWritable itemPref : itemPrefs) {
      int index = TasteHadoopUtils.idToIndex(itemPref.get());
      float value = itemPref instanceof EntityPrefWritable ? ((EntityPrefWritable) itemPref).getPrefValue() : 1.0f;
      userVector.set(index, value);
    }

    if (userVector.getNumNondefaultElements() >= minPreferences) {
      VectorWritable vw = new VectorWritable(userVector);
      vw.setWritesLaxPrecision(true);
      context.getCounter(Counters.USERS).increment(1);
      context.write(userID, vw);
    }
View Full Code Here

                    Reporter reporter) throws IOException {
      int row = r.get();
      Iterator<Vector.Element> it = v.get().iterateNonZero();
      while (it.hasNext()) {
        Vector.Element e = it.next();
        RandomAccessSparseVector tmp = new RandomAccessSparseVector(newNumCols, 1);
        tmp.setQuick(row, e.get());
        r.set(e.index());
        out.collect(r, new VectorWritable(tmp));
      }
    }
View Full Code Here

                       OutputCollector<IntWritable,VectorWritable> out,
                       Reporter reporter) throws IOException {
      if (!it.hasNext()) {
        return;
      }
      Vector accumulator = new RandomAccessSparseVector(it.next().get());
      while (it.hasNext()) {
        Vector row = it.next().get();
        accumulator.assign(row, Functions.PLUS);
      }
      out.collect(rowNum, new VectorWritable(new SequentialAccessSparseVector(accumulator)));
    }
View Full Code Here

  protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    Vector vector;
    if (sequentialVecs) {
      vector = new SequentialAccessSparseVector(cardinality);
    } else {
      vector = new RandomAccessSparseVector(cardinality);
    }
    if (namedVectors){
      vector = new NamedVector(vector, key.toString());
    }
    encoder.addToVector(value.toString(), vector);
View Full Code Here

    if (!it.hasNext()) {
      return;
    }
    Vector value = it.next().get();
    Iterator<Vector.Element> it1 = value.iterateNonZero();
    Vector vector = new RandomAccessSparseVector((int) featureCount, value.getNumNondefaultElements());
    while (it1.hasNext()) {
      Vector.Element e = it1.next();
      if (!dictionary.containsKey(e.index())) {
        continue;
      }
      long df = dictionary.get(e.index());
      if (maxDf > -1 && df > maxDf) {
        continue;
      }
      if (df < minDf) {
        df = minDf;
      }
      vector.setQuick(e.index(), tfidf.calculate((int) e.get(), (int) df, (int) featureCount, (int) vectorCount));
    }
    if (sequentialAccess) {
      vector = new SequentialAccessSparseVector(vector);
    }
   
View Full Code Here

          Closeables.closeQuietly(iterator);
        }

        int outDim = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
        outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
                     ? new RandomAccessSparseVector(outDim, 10)
                     : new DenseVector(outDim);
      } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
      }
    }
View Full Code Here

    @Override
    public void configure(JobConf conf) {
      int outputDimension = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
      outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
                   ? new RandomAccessSparseVector(outputDimension, 10)
                   : new DenseVector(outputDimension);
    }
View Full Code Here

      IntWritable iw = new IntWritable();
      VectorWritable vw = new VectorWritable();
      int avgNZero = 40;
      int n = 4500000;
      for (int i = 1; i < n; i++) {
        Vector vector = new RandomAccessSparseVector(n);
        double nz = Math.round(avgNZero * (rnd.nextGaussian() + 1));
        if (nz < 0) {
          nz = 0;
        }
        for (int j = 1; j < nz; j++) {
          vector.set(rnd.nextInt(n), rnd.nextGaussian() * 25 + 3);
        }
        iw.set(i);
        vw.set(vector);
        w.append(iw, vw);
      }
View Full Code Here

TOP

Related Classes of org.apache.mahout.math.RandomAccessSparseVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.