Line data Source code
1 : // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 :
5 : #include "leveldb/filter_policy.h"
6 :
7 : #include "leveldb/slice.h"
8 : #include "util/hash.h"
9 :
10 : namespace leveldb {
11 :
12 : namespace {
13 : static uint32_t BloomHash(const Slice& key) {
14 35127 : return Hash(key.data(), key.size(), 0xbc9f1d34);
15 : }
16 :
17 244 : class BloomFilterPolicy : public FilterPolicy {
18 : private:
19 : size_t bits_per_key_;
20 : size_t k_;
21 :
22 : public:
23 : explicit BloomFilterPolicy(int bits_per_key)
24 488 : : bits_per_key_(bits_per_key) {
25 : // We intentionally round down to reduce probing cost a little bit
26 244 : k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
27 244 : if (k_ < 1) k_ = 1;
28 244 : if (k_ > 30) k_ = 30;
29 : }
30 :
31 282 : virtual const char* Name() const {
32 282 : return "leveldb.BuiltinBloomFilter2";
33 : }
34 :
35 517 : virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
36 : // Compute bloom filter size (in both bits and bytes)
37 517 : size_t bits = n * bits_per_key_;
38 :
39 : // For small n, we can see a very high false positive rate. Fix it
40 : // by enforcing a minimum bloom filter length.
41 517 : if (bits < 64) bits = 64;
42 :
43 517 : size_t bytes = (bits + 7) / 8;
44 517 : bits = bytes * 8;
45 :
46 517 : const size_t init_size = dst->size();
47 517 : dst->resize(init_size + bytes, 0);
48 517 : dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
49 517 : char* array = &(*dst)[init_size];
50 18754 : for (size_t i = 0; i < n; i++) {
51 : // Use double-hashing to generate a sequence of hash values.
52 : // See analysis in [Kirsch,Mitzenmacher 2006].
53 36474 : uint32_t h = BloomHash(keys[i]);
54 18237 : const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
55 127659 : for (size_t j = 0; j < k_; j++) {
56 109422 : const uint32_t bitpos = h % bits;
57 109422 : array[bitpos/8] |= (1 << (bitpos % 8));
58 109422 : h += delta;
59 : }
60 : }
61 517 : }
62 :
63 16890 : virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
64 16890 : const size_t len = bloom_filter.size();
65 16890 : if (len < 2) return false;
66 :
67 16890 : const char* array = bloom_filter.data();
68 16890 : const size_t bits = (len - 1) * 8;
69 :
70 : // Use the encoded k so that we can read filters generated by
71 : // bloom filters created using different parameters.
72 16890 : const size_t k = array[len-1];
73 16890 : if (k > 30) {
74 : // Reserved for potentially new encodings for short bloom filters.
75 : // Consider it a match.
76 : return true;
77 : }
78 :
79 16890 : uint32_t h = BloomHash(key);
80 16890 : const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
81 91969 : for (size_t j = 0; j < k; j++) {
82 80001 : const uint32_t bitpos = h % bits;
83 80001 : if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
84 75079 : h += delta;
85 : }
86 : return true;
87 : }
88 : };
89 : }
90 :
91 244 : const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
92 488 : return new BloomFilterPolicy(bits_per_key);
93 : }
94 :
95 : } // namespace leveldb
|