Package org.apache.hadoop.util.bloom

Examples of org.apache.hadoop.util.bloom.BloomFilter


        ArgumentCaptor<BloomFilter> argument = ArgumentCaptor.forClass(BloomFilter.class);
        verify(context).write(
                any(),
                argument.capture());

        BloomFilter f=argument.getValue();
        assertFalse(f.membershipTest(BloomReducer.toKey("Michigan")));
        assertTrue(f.membershipTest(BloomReducer.toKey("New Jersey")));
        assertTrue(f.membershipTest(BloomReducer.toKey("New Mexico")));
        assertTrue(f.membershipTest(BloomReducer.toKey("Lady Gaga")));
        assertTrue(f.membershipTest(BloomReducer.toKey("Beyonce")));
        assertFalse(f.membershipTest(BloomReducer.toKey("Olivia Newton-John")));
    }
View Full Code Here


        assertFalse(f.membershipTest(BloomReducer.toKey("Olivia Newton-John")));
    }

    @Test
    public void justBloom() {
        BloomFilter f=new BloomFilter(100000,10, Hash.parseHashType("murmur"));
        f.add(new Key(new Text("New Jersey").getBytes()));
        assertTrue(f.membershipTest(new Key(new Text("New Jersey").getBytes())));
    }
View Full Code Here

        super.setup(context);
        Configuration c=context.getConfiguration();
        int vectorSize=c.getInt(VECTOR_SIZE,0);
        int nbHash=c.getInt(NB_HASH,0);
        String hashType=c.get(HASH_TYPE, "murmur");
        f=new BloomFilter(vectorSize,nbHash, Hash.parseHashType(hashType));
    }
View Full Code Here

    }
   
    @Override
    public void initialize() {
      super.initialize();
      bloomFilter = new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
      ptype.initialize(getConfiguration());
      keyToBytesFn = getKeyToBytesMapFn(ptype, getConfiguration());
    }
View Full Code Here

        iterable = sourceTarget.read(getConfiguration());
      } catch (IOException e) {
        throw new CrunchRuntimeException("Error reading right-side of map side join: ", e);
      }

      bloomFilter = new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH);
      for (BloomFilter subFilter : iterable) {
        bloomFilter.or(subFilter);
      }
    }
View Full Code Here

    // Calculate our vector size and optimal K value based on approximations
    int vectorSize = getOptimalBloomFilterSize(numMembers, falsePosRate);
    int nbHash = getOptimalK(numMembers, vectorSize);

    // create new Bloom filter
    BloomFilter filter = new BloomFilter(vectorSize, nbHash,
        Hash.MURMUR_HASH);

    // Open file for read

    System.out.println("Training Bloom filter of size " + vectorSize
        + " with " + nbHash + " hash functions, " + numMembers
        + " approximate number of records, and " + falsePosRate
        + " false positive rate");

    String line = null;
    int numRecords = 0;
    for (FileStatus status : fs.listStatus(inputFile)) {
      BufferedReader rdr;
      // if file is gzipped, wrap it in a GZIPInputStream
      if (status.getPath().getName().endsWith(".gz")) {
        rdr = new BufferedReader(new InputStreamReader(
            new GZIPInputStream(fs.open(status.getPath()))));
      } else {
        rdr = new BufferedReader(new InputStreamReader(fs.open(status
            .getPath())));
      }

      System.out.println("Reading " + status.getPath());
      while ((line = rdr.readLine()) != null) {
        filter.add(new Key(line.getBytes()));
        ++numRecords;
      }

      rdr.close();
    }

    System.out.println("Trained Bloom filter with " + numRecords
        + " entries.");

    System.out.println("Serializing Bloom filter to HDFS at " + bfFile);
    FSDataOutputStream strm = fs.create(bfFile);
    filter.write(strm);

    strm.flush();
    strm.close();

    System.out.println("Done training Bloom filter.");
View Full Code Here

            byte[] b;
            if (t.size() == 1) b = DataType.toBytes(t.get(0));
            else b = DataType.toBytes(t, DataType.TUPLE);

            Key k = new Key(b);
            filter = new BloomFilter(vSize, numHash, hType);
            filter.add(k);

            return TupleFactory.getInstance().newTuple(bloomOut());
        }
View Full Code Here

        }
        return list;
    }

    private void init() throws IOException {
        filter = new BloomFilter();
        String dir = "./" + getFilenameFromPath(bloomFile);
        String[] partFiles = new File(dir)
                .list(new FilenameFilter() {
                    @Override
                    public boolean accept(File current, String name) {
View Full Code Here

     * For testing only, do not use directly.
     */
    public void setFilter(DataByteArray dba) throws IOException {
        DataInputStream dis = new DataInputStream(new
            ByteArrayInputStream(dba.get()));
        filter = new BloomFilter();
        filter.readFields(dis);
    }
View Full Code Here

    String filterName = getConfiguration().get(CRUNCH_FILTER_NAME);
    emitter.emit(Pair.of(filterName, bloomFilter));
  }

  static BloomFilter initializeFilter(int size) {
    return new BloomFilter(size, 5, Hash.MURMUR_HASH);
  }
View Full Code Here

TOP

Related Classes of org.apache.hadoop.util.bloom.BloomFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.