1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <stdio.h>
#include <inttypes.h>
class BloomFilter
{
private:
BloomFilter(const BloomFilter &);
BloomFilter& operator=(const BloomFilter &);
public:
BloomFilter(int size, int hashes, uint32_t *buf = NULL);
~BloomFilter();
bool check(const uint32_t *data, int len, bool add);
bool check(const char *data, int len, bool add);
bool check(const char *data, bool add);
private:
int _size;
int _hashes;
uint32_t *_buf;
bool _mine;
static const uint32_t MULT1 = 1500450271;
static const uint32_t MULT2 = 2860486313U;
uint32_t hash(const uint32_t *data, int len, uint32_t multiplier, uint32_t max);
uint32_t hash(const char *data, int len, uint32_t multiplier, uint32_t max);
uint32_t hash(const char *data, uint32_t multiplier, uint32_t max);
bool check(uint32_t hash1, uint32_t hash2, bool add);
bool isSet(uint32_t pos, bool set);
};
uint32_t
BloomFilter::hash(const uint32_t *data, int len, uint32_t multiplier, uint32_t max)
{
uint32_t val = 1;
for (int i = 0; i < len; i++) {
val = (multiplier * val + data[i]) % max;
}
return val;
}
uint32_t
BloomFilter::hash(const char *data, int len, uint32_t multiplier, uint32_t max)
{
uint32_t val = 1;
for (int i = 0; i < len; i++) {
val = (multiplier * val + data[i]) % max;
}
return val;
}
uint32_t
BloomFilter::hash(const char *data, uint32_t multiplier, uint32_t max)
{
uint32_t val = 1;
for (int i = 0; data[i]; i++) {
val = (multiplier * val + data[i]) % max;
}
return val;
}
bool
BloomFilter::check(const uint32_t *data, int len, bool add)
{
uint32_t hash1 = hash(data, len, MULT1, _size);
uint32_t hash2 = hash(data, len, MULT2, _size);
return check(hash1, hash2, add);
}
bool
BloomFilter::check(const char *data, int len, bool add)
{
uint32_t hash1 = hash(data, len, MULT1, _size);
uint32_t hash2 = hash(data, len, MULT2, _size);
return check(hash1, hash2, add);
}
bool
BloomFilter::check(const char *data, bool add)
{
uint32_t hash1 = hash(data, MULT1, _size);
uint32_t hash2 = hash(data, MULT2, _size);
return check(hash1, hash2, add);
}
bool
BloomFilter::check(uint32_t hash1, uint32_t hash2, bool add)
{
bool found = true;
for (int i = 0; i < _hashes; i++) {
hash1 = (hash1 + hash2) % _size;
hash2 = (hash2 + i) % _size;
if (!isSet(hash1, add)) {
if (!add) {
return false;
}
found = false;
}
}
return found;
}
bool
BloomFilter::isSet(uint32_t pos, bool add)
{
if ((_buf[pos >> 5] & (1 << (pos & 31))) == 0) {
if (add) {
_buf[pos >> 5] |= (1 << (pos & 31));
}
return false;
}
return true;
}
|