Package org.apache.mahout.utils

Source Code of org.apache.mahout.utils.TanimotoDistanceMeasure

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.utils;

import org.apache.mahout.matrix.Vector;

import java.util.HashSet;
import java.util.Set;


/**
* Tanimoto coefficient implementation.
*
* http://en.wikipedia.org/wiki/Jaccard_index
*/
public class TanimotoDistanceMeasure extends WeightedDistanceMeasure {


  /**
   * Calculates the distance between two vectors.
   *
   * ((a^2 + b^2 - ab) / ab) - 1;
   *
   * @param vector0
   * @param vector1
   * @return 0 for perfect match, > 0 for greater distance
   */
  @Override
  public double distance(Vector vector0, Vector vector1) {

    // this whole distance measurent thing
    // should be evaluated using an intermediate vector and BinaryFunction or something?
   
    Set<Integer> featuresSeen = new HashSet<Integer>((int)((vector0.size() + vector1.size()) * 0.75));

    double ab = 0.0;
    double a2 = 0.0;
    double b2 = 0.0;

    for (Vector.Element feature : vector0) {
      if (!featuresSeen.add(feature.index())) {

        double a = feature.get();

        double b = vector1.get(feature.index());

        Vector weights = getWeights();
        double weight = weights == null ? 1.0 : weights.get(feature.index());

        ab += a * b * weight;
        a2 += a * a * weight;
        b2 += b * b * weight;
      }
    }


    for (Vector.Element feature : vector1) {
      if (!featuresSeen.add(feature.index())) {

        double a = vector0.get(feature.index());

        double b = feature.get();

        Vector weights = getWeights();
        double weight = weights == null ? 1.0 : weights.get(feature.index());

        ab += a * b * weight;
        a2 += a * a * weight;
        b2 += b * b * weight;
      }
    }

    return ((a2 + b2 - ab) / ab) - 1.0;
  }

}
TOP

Related Classes of org.apache.mahout.utils.TanimotoDistanceMeasure

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.