Package org.apache.mahout.math

Source Code of org.apache.mahout.math.MurmurHash

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.math;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

/**
* This is a very fast, non-cryptographic hash suitable for general hash-based
* lookup.  See http://murmurhash.googlepages.com/ for more details.
* <p/>
* <p>The C version of MurmurHash 2.0 found at that site was ported
* to Java by Andrzej Bialecki (ab at getopt org).</p>
*/
public final class MurmurHash {

  private MurmurHash() {
  }

  /**
   * Hashes bytes in an array.
   * @param data The bytes to hash.
   * @param seed The seed for the hash.
   * @return The 32 bit hash of the bytes in question.
   */
  public static int hash(byte[] data, int seed) {
    return hash(ByteBuffer.wrap(data), seed);
  }

  /**
   * Hashes bytes in part of an array.
   * @param data    The data to hash.
   * @param offset  Where to start munging.
   * @param length  How many bytes to process.
   * @param seed    The seed to start with.
   * @return        The 32-bit hash of the data in question.
   */
  public static int hash(byte[] data, int offset, int length, int seed) {
    return hash(ByteBuffer.wrap(data, offset, length), seed);
  }

  /**
   * Hashes the bytes in a buffer from the current position to the limit.
   * @param buf    The bytes to hash.
   * @param seed   The seed for the hash.
   * @return       The 32 bit murmur hash of the bytes in the buffer.
   */
  public static int hash(ByteBuffer buf, int seed) {
    // save byte order for later restoration
    ByteOrder byteOrder = buf.order();
    buf.order(ByteOrder.LITTLE_ENDIAN);

    int m = 0x5bd1e995;
    int r = 24;

    int h = seed ^ buf.remaining();

    while (buf.remaining() >= 4) {
      int k = buf.getInt();

      k *= m;
      k ^= k >>> r;
      k *= m;

      h *= m;
      h ^= k;
    }

    if (buf.remaining() > 0) {
      ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN);
      // for big-endian version, use this first:
      // finish.position(4-buf.remaining());
      finish.put(buf).rewind();
      h ^= finish.getInt();
      h *= m;
    }

    h ^= h >>> 13;
    h *= m;
    h ^= h >>> 15;

    buf.order(byteOrder);
    return h;
  }


  public static long hash64A(byte[] data, int seed) {
    return hash64A(ByteBuffer.wrap(data), seed);
  }

  public static long hash64A(byte[] data, int offset, int length, int seed) {
    return hash64A(ByteBuffer.wrap(data, offset, length), seed);
  }

  public static long hash64A(ByteBuffer buf, int seed) {
    ByteOrder byteOrder = buf.order();
    buf.order(ByteOrder.LITTLE_ENDIAN);

    long m = 0xc6a4a7935bd1e995L;
    int r = 47;

    long h = seed ^ (buf.remaining() * m);

    while (buf.remaining() >= 8) {
      long k = buf.getLong();

      k *= m;
      k ^= k >>> r;
      k *= m;

      h ^= k;
      h *= m;
    }

    if (buf.remaining() > 0) {
      ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
      // for big-endian version, do this first:
      // finish.position(8-buf.remaining());
      finish.put(buf).rewind();
      h ^= finish.getLong();
      h *= m;
    }

    h ^= h >>> r;
    h *= m;
    h ^= h >>> r;

    buf.order(byteOrder);
    return h;
  }

}
TOP

Related Classes of org.apache.mahout.math.MurmurHash

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.