Package de.jungblut.math.sparse

Examples of de.jungblut.math.sparse.SparseDoubleVector


  }

  @Override
  public DoubleVector predict(DoubleVector features) {
    if (features.isSparse()) {
      SparseDoubleVector tmp = new SparseDoubleVector(
          features.getDimension() + 1);
      tmp.set(0, 1d);
      Iterator<DoubleVectorElement> iterateNonZero = features.iterateNonZero();
      while (iterateNonZero.hasNext()) {
        DoubleVectorElement next = iterateNonZero.next();
        tmp.set(next.getIndex() + 1, next.getValue());
      }
      features = tmp;
    } else {
      features = new DenseDoubleVector(1d, features.toArray());
    }
View Full Code Here


     */
    @Override
    protected void reduce(Text key, Iterable<TextIntIntIntWritable> values,
        Context context) throws IOException, InterruptedException {

      SparseDoubleVector vector = new SparseDoubleVector(numTokens);
      for (TextIntIntIntWritable pair : values) {
        if (documentThreshold > pair.getSecond().get()) {
          double val = 0d;
          if (wordCount) {
            val = pair.getThird().get();
          } else {
            val = pair.getThird().get()
                * (FastMath.log(numDocs) - FastMath.log(pair.getSecond().get()));
          }
          vector.set(pair.getFourth().get(), val);
        }
      }

      context.write(key, new VectorWritable(vector));

View Full Code Here

      List<String[]> tokenizedDocuments, String[] dictionary) {

    List<DoubleVector> vectorList = new ArrayList<>(tokenizedDocuments.size());
    int oovIndex = Arrays.binarySearch(dictionary, OUT_OF_VOCABULARY);
    for (String[] arr : tokenizedDocuments) {
      DoubleVector vector = new SparseDoubleVector(dictionary.length);
      HashMultiset<String> set = HashMultiset.create(Arrays.asList(arr));
      for (String s : arr) {
        int foundIndex = Arrays.binarySearch(dictionary, s);
        // simply ignore tokens we don't know or that are spam
        if (foundIndex >= 0) {
          // the index is equal to its mapped dimension
          vector.set(foundIndex, set.count(s));
        } else if (oovIndex >= 0) {
          vector.set(oovIndex, 1);
        }
      }
      vectorList.add(vector);
    }
View Full Code Here

   */
  public static DoubleVector tfIdfVectorize(int numDocuments,
      String[] document, String[] dictionary, int[] termDocumentCount) {

    final int numTokens = dictionary.length;
    DoubleVector vector = new SparseDoubleVector(numTokens);
    HashMultiset<String> termFrequencySet = HashMultiset.create(Arrays
        .asList(document));

    int oovIndex = Arrays.binarySearch(dictionary, OUT_OF_VOCABULARY);
    double docLog = FastMath.log(numDocuments);

    for (String token : document) {
      int index = Arrays.binarySearch(dictionary, token);
      if (index >= 0) {
        double tfIdf = termFrequencySet.count(token)
            * (docLog - FastMath.log(termDocumentCount[index]));
        vector.set(index, tfIdf);
      } else {
        if (oovIndex >= 0) {
          vector.set(oovIndex, 1d);
        }
      }
    }
    return vector;
  }
View Full Code Here

      clz = 0;
    }
    if (binaryClassification) {
      return new DenseDoubleVector(new double[] { clz });
    } else {
      DoubleVector vec = outcomeDimension > 10 ? new SparseDoubleVector(
          outcomeDimension) : new DenseDoubleVector(outcomeDimension);
      vec.set(clz, 1);
      return vec;
    }
  }
View Full Code Here

    }
  }

  protected DoubleVector newInstance(DoubleVector v) {
    if (v.isSparse()) {
      return new SparseDoubleVector(v.getDimension());
    } else {
      return new DenseDoubleVector(v.getDimension());
    }
  }
View Full Code Here

    final int numRowIndices = in.readInt();
    for (int i = 0; i < numRowIndices; i++) {
      final int rowIndex = in.readInt();
      final int numColumns = in.readInt();
      DoubleVector row = new SparseDoubleVector(mat.getColumnCount());
      for (int j = 0; j < numColumns; j++) {
        row.set(in.readInt(), in.readDouble());
      }
      mat.setRowVector(rowIndex, row);
    }

    return mat;
View Full Code Here

    DoubleVector vector = null;
    switch (typeByte) {
      case SPARSE:
        int length = in.readInt();
        int dim = in.readInt();
        vector = new SparseDoubleVector(dim);
        for (int i = 0; i < length; i++) {
          int index = in.readInt();
          double value = in.readDouble();
          vector.set(index, value);
        }
View Full Code Here

        new CosineDistance());

    // we seperate stuff in two dimensions each
    DoubleVector left = new SingleEntryDoubleVector(0d);
    DoubleVector right = new SingleEntryDoubleVector(1d);
    DoubleVector v1 = new SparseDoubleVector(4);
    v1.set(0, 1d);
    v1.set(1, 1d);

    DoubleVector v2 = new SparseDoubleVector(4);
    v2.set(2, 1d);
    v2.set(3, 2.5);

    DoubleVector v3 = new SparseDoubleVector(4);
    v3.set(0, 2d);
    v3.set(1, 2d);

    DoubleVector v4 = new SparseDoubleVector(4);
    v4.set(2, 0.5);
    v4.set(3, 1.5);

    DoubleVector[] trainingSet = new DoubleVector[] { v1, v2, v3, v4 };
    DoubleVector[] outcomeSet = new DoubleVector[] { left, right, left, right };

    neighbours.train(trainingSet, outcomeSet);

    DoubleVector predict = neighbours.predict(v4);
    assertEquals(right, predict);

    predict = neighbours.predict(v2);
    assertEquals(right, predict);

    predict = neighbours.predict(v1);
    assertEquals(left, predict);

    predict = neighbours.predict(v3);
    assertEquals(left, predict);

    // predict between, slightly to the right
    DoubleVector vx = new SparseDoubleVector(4);
    vx.set(1, 1d);
    vx.set(3, 2.5);

    predict = neighbours.predict(vx);
    assertEquals(right, predict);
  }
View Full Code Here

  @Test
  public void testVectorInvertedIndex() {

    InvertedIndex<DoubleVector, Integer> invIndex = InvertedIndex
        .createVectorIndex(new CosineDistance());
    DoubleVector v1 = new SparseDoubleVector(4);
    v1.set(1, 0.6931471805599453);
    v1.set(0, 1.3862943611198906);
    DoubleVector v2 = new SparseDoubleVector(4);
    v2.set(2, 0.6931471805599453);
    v2.set(1, 1.3862943611198906);

    invIndex.build(Arrays.asList(v1, v2));

    DoubleVector v3 = new SparseDoubleVector(4);
    v3.set(3, 0.2);
    v3.set(1, 1);
    List<DistanceResult<DoubleVector>> res = invIndex.query(v3);
    assertEquals(2, res.size());
    assertEquals(v2, res.get(0).get());
    assertEquals(v1, res.get(1).get());
View Full Code Here

TOP

Related Classes of de.jungblut.math.sparse.SparseDoubleVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.