Examples of org.apache.hadoop.util.bloom.BloomFilter

org.apache.hadoop.util.bloom.BloomFilter
ne-lab.org">European Commission One-Lab Project 034819. @see Filter The general behavior of a filter @see Space/Time Trade-Offs in Hash Coding with Allowable Errors

        ArgumentCaptor<BloomFilter> argument = ArgumentCaptor.forClass(BloomFilter.class);
        verify(context).write(
                any(),
                argument.capture());


        BloomFilter f=argument.getValue();
        assertFalse(f.membershipTest(BloomReducer.toKey("Michigan")));
        assertTrue(f.membershipTest(BloomReducer.toKey("New Jersey")));
        assertTrue(f.membershipTest(BloomReducer.toKey("New Mexico")));
        assertTrue(f.membershipTest(BloomReducer.toKey("Lady Gaga")));
        assertTrue(f.membershipTest(BloomReducer.toKey("Beyonce")));
        assertFalse(f.membershipTest(BloomReducer.toKey("Olivia Newton-John")));
    }

View Full Code Here

        assertFalse(f.membershipTest(BloomReducer.toKey("Olivia Newton-John")));
    }


    @Test
    public void justBloom() {
        BloomFilter f=new BloomFilter(100000,10, Hash.parseHashType("murmur"));
        f.add(new Key(new Text("New Jersey").getBytes()));
        assertTrue(f.membershipTest(new Key(new Text("New Jersey").getBytes())));
    }

View Full Code Here

        super.setup(context);
        Configuration c=context.getConfiguration();
        int vectorSize=c.getInt(VECTOR_SIZE,0);
        int nbHash=c.getInt(NB_HASH,0);
        String hashType=c.get(HASH_TYPE, "murmur");
        f=new BloomFilter(vectorSize,nbHash, Hash.parseHashType(hashType));
    }

View Full Code Here

    }
    
    @Override
    public void initialize() {
      super.initialize();
      bloomFilter = new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
      ptype.initialize(getConfiguration());
      keyToBytesFn = getKeyToBytesMapFn(ptype, getConfiguration());
    }

View Full Code Here

        iterable = sourceTarget.read(getConfiguration());
      } catch (IOException e) {
        throw new CrunchRuntimeException("Error reading right-side of map side join: ", e);
      }


      bloomFilter = new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
      for (BloomFilter subFilter : iterable) {
        bloomFilter.or(subFilter);
      }
    }

View Full Code Here

    // Calculate our vector size and optimal K value based on approximations
    int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
    int nbHash = getOptimalK(numMembers, vectorSize);


    // create new Bloom filter
    BloomFilter filter = new BloomFilter(vectorSize, nbHash,
        Hash.MURMUR_HASH);


    // Open file for read


    System.out.println("Training Bloom filter of size " + vectorSize
        + " with " + nbHash + " hash functions, " + numMembers
        + " approximate number of records, and " + falsePosRate
        + " false positive rate");


    String line = null;
    int numRecords = 0;
    for (FileStatus status : fs.listStatus(inputFile)) {
      BufferedReader rdr;
      // if file is gzipped, wrap it in a GZIPInputStream
      if (status.getPath().getName().endsWith(".gz")) {
        rdr = new BufferedReader(new InputStreamReader(
            new GZIPInputStream(fs.open(status.getPath()))));
      } else {
        rdr = new BufferedReader(new InputStreamReader(fs.open(status
            .getPath())));
      }


      System.out.println("Reading " + status.getPath());
      while ((line = rdr.readLine()) != null) {
        filter.add(new Key(line.getBytes()));
        ++numRecords;
      }


      rdr.close();
    }


    System.out.println("Trained Bloom filter with " + numRecords
        + " entries.");


    System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
    FSDataOutputStream strm = fs.create(bfFile);
    filter.write(strm);


    strm.flush();
    strm.close();


    System.out.println("Done training Bloom filter.");

View Full Code Here

            byte[] b;
            if (t.size() == 1) b = DataType.toBytes(t.get(0));
            else b = DataType.toBytes(t, DataType.TUPLE);


            Key k = new Key(b);
            filter = new BloomFilter(vSize, numHash, hType);
            filter.add(k);


            return TupleFactory.getInstance().newTuple(bloomOut());
        }

View Full Code Here

        }
        return list;
    }


    private void init() throws IOException {
        filter = new BloomFilter();
        String dir = "./" + getFilenameFromPath(bloomFile);
        String[] partFiles = new File(dir)
                .list(new FilenameFilter() {
                    @Override
                    public boolean accept(File current, String name) {

View Full Code Here

     * For testing only, do not use directly.
     */
    public void setFilter(DataByteArray dba) throws IOException {
        DataInputStream dis = new DataInputStream(new
            ByteArrayInputStream(dba.get()));
        filter = new BloomFilter();
        filter.readFields(dis);
    }

View Full Code Here

    String filterName = getConfiguration().get(CRUNCH_FILTER_NAME);
    emitter.emit(Pair.of(filterName, bloomFilter));
  }


  static BloomFilter initializeFilter(int size) {
    return new BloomFilter(size, 5, Hash.MURMUR_HASH);
  }

View Full Code Here

0 1 2 3

TOP

Related Classes of org.apache.hadoop.util.bloom.BloomFilter

brickhouse.udf.bloom.BloomFactory

cc.bf.NgramsToBloomFilter$BFBase

cc.bf.ReadNgram

com.cloudera.util.bloom.BloomSet

com.manning.hip.ch7.bloom.BloomFilterDumper

com.ontology2.bakemono.bloom.BloomReducer

com.ontology2.bakemono.bloom.BloomReducerTest

mrdp.appendixA.BloomFilterDriver

org.apache.crunch.contrib.bloomfilter.BloomFilterFn

org.apache.crunch.contrib.bloomfilter.BloomFiltersIT

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.