Package org.apache.mahout.knn

Source Code of org.apache.mahout.knn.OverlapTest$Score

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.knn;

import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.knn.search.FastProjectionSearch;
import org.apache.mahout.knn.search.Searcher;
import org.apache.mahout.math.*;
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.math.function.VectorFunction;
import org.apache.mahout.math.random.Normal;
import org.apache.mahout.math.random.WeightedThing;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;

/**
* Measure speed and overlap for different search methods.
*/

public class OverlapTest {
  Logger log = LoggerFactory.getLogger(this.getClass());

  @Test
  public void testOverlap() {
    int dimension = 10;
    int numDataVectors = 1000000;
    int numQueries = 100;
    int depth = 2000;

    Matrix data = new DenseMatrix(numDataVectors, dimension);
    data.assign(new Normal());

    Matrix queries = new DenseMatrix(numQueries, dimension);
    queries.assign(new Normal());

    Vector queryNorms = queries.aggregateRows(new VectorFunction() {
      @Override
      public double apply(Vector f) {
        return f.getLengthSquared();
      }
    });

    Matrix scores = data.times(queries.transpose()).assign(Functions.mult(-2));
    for (MatrixSlice row : scores) {
      Vector v1 = data.viewRow(row.index());
      row.vector().assign(Functions.plus(v1.getLengthSquared()));
      row.vector().assign(queryNorms, Functions.PLUS);
    }
    log.warn("Reference scoring done");

    List<PriorityQueue<Score>> bestScores = Lists.newArrayList();
    for (int i = 0; i < numQueries; i++) {
      bestScores.add(new PriorityQueue<Score>(depth, Ordering.natural().reverse()));
    }

    for (MatrixSlice row : scores) {
      for (int i = 0; i < numQueries; i++) {
        final PriorityQueue<Score> q = bestScores.get(i);
        q.add(new Score(row.index(), row.vector().get(i)));
        if (q.size() > depth) {
          q.poll();
        }
      }
    }

    List<Set<Integer>> reference = Lists.newArrayList();
    List<Double> radius = Lists.newArrayList();
    for (MatrixSlice query : queries) {
      Set<Integer> x = Sets.newHashSet();
      double furthest = -Double.MAX_VALUE;
      for (Score best : bestScores.get(query.index())) {
        x.add(best.index);
        furthest = Math.max(best.score, furthest);
      }
      reference.add(x);
      radius.add(furthest);
    }
    log.warn("Reference data stored");
    log.warn("Starting add with speedup of {}", numDataVectors / (dimension * 2.0 * depth * 4.0));

    Searcher sut = new FastProjectionSearch(new SquaredEuclideanDistanceMeasure(), dimension * 2, depth * 4);
    sut.addAllMatrixSlicesAsWeightedVectors(data);
    log.warn("Added data with speedup of {}", numDataVectors / (dimension * 2.0 * depth * 4.0));

    long t0 = System.nanoTime();
    for (MatrixSlice query : queries) {
      List<WeightedThing<Vector>> r = sut.search(query.vector(), depth);
      Set<Integer> x = Sets.newHashSet();
      for (WeightedThing<Vector> vector : r) {
        x.add(((WeightedVector)vector.getValue()).getIndex());
      }
      double overlap = Sets.intersection(reference.get(query.index()), x).size() / (double) depth;
      System.out.printf("%4.1f %10.3f\n", 100 * overlap, r.get(depth - 1).getWeight() / radius.get(query.index()));
    }
    long t1 = System.nanoTime();
    log.warn("Done query time = {}", String.format("%.4f", (t1 - t0) / 1e9 / numQueries));
  }

  private static class Score implements Comparable<Score> {
    int index;
    double score;

    public Score(int index, double score) {
      this.index = index;
      this.score = score;
    }

    @Override
    public int compareTo(Score other) {
      return Double.compare(score, other.score);
    }
  }
}
TOP

Related Classes of org.apache.mahout.knn.OverlapTest$Score

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.