Package com.somethingsimilar.opposite_of_a_bloom_filter

Source Code of com.somethingsimilar.opposite_of_a_bloom_filter.OoaBFilter$Element

// Copyright 2012 Jeff Hodges and Jeff Smick. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package com.somethingsimilar.opposite_of_a_bloom_filter;

import java.math.RoundingMode;
import java.util.Arrays;
import java.nio.ByteBuffer;

import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;
import com.google.common.math.IntMath;

/**
* OoaBFilter is used to filter out duplicate elements from a given dataset or stream. It is
* guaranteed to never return a false positive (that is, it will never say that an item has already
* been seen by the filter when it has not) but may return a false negative.
*
* The check is syncronized on the individual buffer. Depending on the dataset, hash function
* and size of the underlying array lock contention should be very low. Dataset/hash function
* combinations that cause many collisions will result in more contention.
*
* OoaBFilter is thread-safe.
*/
public class OoaBFilter {
  /**
   * The interface that must be implemented by an element to be filtered.
   */
  public interface Element {
    /**
     * Provide a ByteBuffer representation of the element.
     *
     * Ensure the buffer is rewound or the equality check will not work correctly.
     *
     * @return A ByteBuffer that represents the element.
     */
    public ByteBuffer getByteBuffer();
  }

  private static final HashFunction HASH_FUNC = Hashing.murmur3_32();
  private final int sizeMask;
  private final ByteBuffer[] array;
  private static final int MAX_SIZE = 1 << 30;

  /**
   * Constructs a OoaBFilter with an underlying array of the given size, rounded up to the next
   * power of two.
   *
   * This rounding occurs because the hashing is much faster on an array the size of a power of two.
   *
   * @param size The size of the underlying array.
   * @param bufSize The size of the buffers occupying each slot in the array.
   */
  public OoaBFilter(int size, int bufSize) {
    if (size <= 0) {
      throw new IllegalArgumentException("array size must be greater than zero, was " + size);
    }
    if (size > MAX_SIZE) {
      throw new IllegalArgumentException(
          "array size may not be larger than 2**31-1, but will be rounded to larger. was " + size);
    }
    // round to the next largest power of two
    int poweredSize = IntMath.pow(2, IntMath.log2(size, RoundingMode.CEILING));
    this.sizeMask = poweredSize - 1;
    this.array = new ByteBuffer[poweredSize];

    // pre-allocate a ByteBuffer for each slot in the array
    int i = 0;
    while (i < poweredSize) {
      array[i] = ByteBuffer.allocate(bufSize);
      i++;
    }
  }

  /**
   * Returns whether the given elemtn has been previously seen by this filter. That is, if a byte
   * buffer with the same bytes as elem has been passed to to this method before.
   *
   * This method may return false when it has seen an element before. This occurs if the element passed in
   * hashes to the same index in the underlying array as another element previously checked. On the
   * flip side, this method will never return true incorrectly.
   *
   * @param element The byte array that may have been previously seen.
   * @return Whether the element is contained in the OoaBFilter.
   */
  public boolean containsAndAdd(Element element) {
    ByteBuffer eBytes = element.getByteBuffer();
    HashCode code = HASH_FUNC.hashBytes(eBytes.array());
    int index = code.asInt() & sizeMask;

    boolean seen = true;
    ByteBuffer buffer = array[index];

    synchronized(buffer) {
      if (!buffer.equals(eBytes)) {
        seen = false;
        buffer.put(eBytes);
        buffer.rewind();
      }
    }

    return seen;
  }

  /**
   * Returns the size of the underlying array. Welp.
   *
   * @return The size of the underlying array.
   */
  public int getSize() {
    return array.length;
  }
}
TOP

Related Classes of com.somethingsimilar.opposite_of_a_bloom_filter.OoaBFilter$Element

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.