1//! @file filter.hpp
2//! @author ryftchen
3//! @brief The declarations (filter) in the data structure module.
4//! @version 0.1.0
5//! @copyright Copyright (c) 2022-2026 ryftchen. All rights reserved.
6
7#pragma once
8
9#include <memory>
10
11//! @brief The data structure module.
12namespace data_structure // NOLINT(modernize-concat-nested-namespaces)
13{
14//! @brief Filter-related functions in the data structure module.
15namespace filter
16{
17//! @brief Brief function description.
18//! @return function description (module_function)
19inline static const char* description() noexcept
20{
21 return "DS_FILTER";
22}
23extern const char* version() noexcept;
24
25//! @brief The Bloom filter. A space-efficient probabilistic data structure.
26class Bloom
27{
28public:
29 //! @brief Construct a new Bloom object.
30 //! @param capacity - expected number of elements in the filter
31 //! @param falsePositiveProb - desired false positive probability
32 //! @param hashSeed - hash seed
33 Bloom(const std::uint32_t capacity, const double falsePositiveProb, const std::uint32_t hashSeed);
34
35 //! @brief Insert a key into the filter.
36 //! @param key - key to hash
37 //! @param length - length of the key
38 //! @return success or failure
39 bool insert(const void* const key, const int length);
40 //! @brief Check whether a key may be in the filter.
41 //! @param key - key to hash
42 //! @param length - length of the key
43 //! @return may contain or not
44 bool mayContain(const void* const key, const int length);
45 //! @brief Clear the filter.
46 void clear();
47
48private:
49 //! @brief The capacity of the filter.
50 const std::uint32_t capacity{0};
51 //! @brief Number of bits.
52 const std::uint32_t filterBitsNum{0};
53 //! @brief Number of hash functions.
54 const std::uint32_t hashFuncNum{0};
55 //! @brief The hash seed.
56 const std::uint32_t hashSeed{0};
57 //! @brief Size of the filter in bytes.
58 const std::uint32_t filterSize{0};
59 //! @brief The filter data.
60 const std::unique_ptr<std::uint8_t[]> filter;
61 //! @brief The position of the hash values.
62 const std::unique_ptr<std::uint32_t[]> hashPos;
63 //! @brief Total number of entries.
64 std::uint32_t entries{0};
65 //! @brief Number of bits in a byte.
66 static constexpr std::uint8_t byteBits{8};
67
68 //! @brief Hash a key and calculate the positions in the filter.
69 //! @param key - key to hash
70 //! @param length - length of the key
71 void bloomHash(const void* const key, const int length);
72 //! @brief Set a bit in the filter.
73 //! @param filter - filter to set the bit in
74 //! @param pos - hash position to set the bit
75 static void setBit(std::uint8_t filter[], const std::uint32_t pos);
76 //! @brief Get a bit from the filter.
77 //! @param filter - filter to get the bit from
78 //! @param pos - hash position to get the bit
79 //! @return bit from the filter
80 static std::uint8_t getBit(const std::uint8_t filter[], const std::uint32_t pos);
81 //! @brief Calculate the parameter m.
82 //! @param n - expected number of elements in the filter
83 //! @param p - desired false positive probability
84 //! @return number of bits in the filter
85 static std::uint32_t calculateParamM(const std::uint32_t n, const double p);
86 //! @brief Calculate the parameter k.
87 //! @param m - number of bits in the filter
88 //! @param n - expected number of elements in the filter
89 //! @return number of hash functions to use
90 static std::uint32_t calculateParamK(const std::uint32_t m, const std::uint32_t n);
91};
92
93//! @brief The quotient filter. A space-efficient probabilistic data structure.
94class Quotient
95{
96public:
97 //! @brief Construct a new Quotient object.
98 //! @param qBits - number of quotient bits
99 //! @param rBits - number of reminder bits
100 //! @param hashSeed - hash seed
101 Quotient(const std::uint8_t qBits, const std::uint8_t rBits, const std::uint32_t hashSeed);
102
103 //! @brief Insert a key into the filter.
104 //! @param key - key to hash
105 //! @param length - length of the key
106 //! @return success or failure
107 bool insert(const void* const key, const int length);
108 //! @brief Check whether a key may be in the filter.
109 //! @param key - key to hash
110 //! @param length - length of the key
111 //! @return may contain or not
112 bool mayContain(const void* const key, const int length);
113 //! @brief Clear the filter.
114 void clear();
115 //! @brief Remove a key from the filter.
116 //! @param key - key to hash
117 //! @param length - length of the key
118 //! @return success or failure
119 bool remove(const void* const key, const int length);
120 //! @brief Merge other quotient filters into self.
121 //! @tparam QFs - type of other input quotient filters
122 //! @param qf - first input quotient filter
123 //! @param others - other input quotient filters
124 //! @return success or failure
125 template <typename... QFs>
126 bool merge(const Quotient& qf, const QFs&... others);
127
128private:
129 //! @brief Number of quotient bits.
130 const std::uint8_t qBits{0};
131 //! @brief Number of reminder bits.
132 const std::uint8_t rBits{0};
133 //! @brief Number of bits per element.
134 const std::uint8_t elemBits{0};
135 //! @brief Mask for extracting the index.
136 const std::uint64_t indexMask{0};
137 //! @brief Mask for extracting the reminder.
138 const std::uint64_t rMask{0};
139 //! @brief Mask for extracting the element.
140 const std::uint64_t elemMask{0};
141 //! @brief The capacity of the filter.
142 const std::uint64_t capacity{0};
143 //! @brief The hash seed.
144 const std::uint32_t hashSeed{0};
145 //! @brief Size of the filter in bytes.
146 const std::uint64_t filterSize{0};
147 //! @brief The filter data.
148 const std::unique_ptr<std::uint64_t[]> filter;
149 //! @brief Total number of entries.
150 std::uint64_t entries{0};
151
152 //! @brief The iterator for the filter.
153 struct Iterator
154 {
155 //! @brief Current index in the filter.
156 std::uint64_t index{0};
157 //! @brief Current quotient value.
158 std::uint64_t quotient{0};
159 //! @brief Number of visited elements.
160 std::uint64_t visited{0};
161 };
162
163 //! @brief Insert a hash value into the filter.
164 //! @param hash - hash value
165 //! @return success or failure
166 bool insert(const std::uint64_t hash);
167 //! @brief Insert an element into the filter at a given start index.
168 //! @param start - start index to insert the element
169 //! @param elem - element to insert
170 void insertAt(const std::uint64_t start, const std::uint64_t elem);
171 //! @brief Insert all the hash values from the other quotient filter.
172 //! @param qf - other quotient filter
173 void insertFrom(const Quotient& qf);
174 //! @brief Check whether it is compatible with the other quotient filter.
175 //! @param qf - other quotient filter
176 //! @return be compatible or not
177 [[nodiscard]] bool isCompatibleWith(const Quotient& qf) const;
178 //! @brief Check whether a hash value may be in the filter.
179 //! @param hash - hash value
180 //! @return may contain or not
181 bool mayContain(const std::uint64_t hash);
182 //! @brief Remove a hash value from the filter.
183 //! @param hash - hash value
184 //! @return success or failure
185 bool remove(const std::uint64_t hash);
186 //! @brief Remove an element from the filter at a given start index.
187 //! @param start - start index to remove the element
188 //! @param quot - quotient involving the element to be removed
189 void removeAt(const std::uint64_t start, const std::uint64_t quot);
190 //! @brief Hash a key for the filter.
191 //! @param key - key to hash
192 //! @param length - length of the key
193 //! @return hash value
194 std::uint64_t quotientHash(const void* const key, const int length) const;
195 //! @brief Convert the hash value into a quotient.
196 //! @param hash - hash value
197 //! @return quotient
198 [[nodiscard]] std::uint64_t hashToQuotient(const std::uint64_t hash) const;
199 //! @brief Convert the hash value into a reminder.
200 //! @param hash - hash value
201 //! @return reminder
202 [[nodiscard]] std::uint64_t hashToRemainder(const std::uint64_t hash) const;
203 //! @brief Find the start index of a run.
204 //! @param quot - quotient converted from hash value
205 //! @return start index of a run
206 [[nodiscard]] std::uint64_t findRunIndex(const std::uint64_t quot) const;
207
208 //! @brief Start iterating over the filter.
209 //! @param qf - quotient filter
210 //! @param iter - filter iterator
211 static void start(const Quotient& qf, Iterator& iter);
212 //! @brief Check whether the iterator has visited all entries.
213 //! @param qf - quotient filter
214 //! @param iter - filter iterator
215 //! @return be done or not
216 static bool done(const Quotient& qf, const Iterator& iter);
217 //! @brief Get the next hash value from the filter.
218 //! @param qf - quotient filter
219 //! @param iter - filter iterator
220 //! @return next hash value
221 static std::uint64_t next(const Quotient& qf, Iterator& iter);
222 //! @brief Set the element at a given index.
223 //! @param qf - quotient filter
224 //! @param index - index of the element
225 //! @param elem - element to set
226 static void setElement(Quotient& qf, const std::uint64_t index, std::uint64_t elem);
227 //! @brief Get the element at a given index.
228 //! @param qf - quotient filter
229 //! @param index - index of the element
230 //! @return element at the index
231 static std::uint64_t getElement(const Quotient& qf, const std::uint64_t index);
232 //! @brief Increase an index.
233 //! @param qf - quotient filter
234 //! @param index - index to increment
235 //! @return increased index
236 static std::uint64_t increase(const Quotient& qf, const std::uint64_t index);
237 //! @brief Decrease an index.
238 //! @param qf - quotient filter
239 //! @param index - index to decrement
240 //! @return decreased index
241 static std::uint64_t decrease(const Quotient& qf, const std::uint64_t index);
242 //! @brief Check whether an element is occupied.
243 //! @param elem - element to check
244 //! @return be occupied or not
245 static bool isOccupied(const std::uint64_t elem);
246 //! @brief Set an element as occupied.
247 //! @param elem - element to set
248 //! @return element with occupied status set
249 static std::uint64_t setOccupied(const std::uint64_t elem);
250 //! @brief Clear the occupied status of an element.
251 //! @param elem - element to clear
252 //! @return element with occupied status cleared
253 static std::uint64_t clearOccupied(const std::uint64_t elem);
254 //! @brief Check whether an element is a continuation.
255 //! @param elem - element to check
256 //! @return be a continuation or not
257 static bool isContinuation(const std::uint64_t elem);
258 //! @brief Set an element as a continuation.
259 //! @param elem - element to set
260 //! @return element with continuation status set
261 static std::uint64_t setContinuation(const std::uint64_t elem);
262 //! @brief Clear the continuation status of an element.
263 //! @param elem - element to clear
264 //! @return element with continuation status cleared
265 static std::uint64_t clearContinuation(const std::uint64_t elem);
266 //! @brief Check whether an element is shifted.
267 //! @param elem - element to check
268 //! @return be shifted or not
269 static bool isShifted(const std::uint64_t elem);
270 //! @brief Set an element as shifted.
271 //! @param elem - element to set
272 //! @return element with shifted status set
273 static std::uint64_t setShifted(const std::uint64_t elem);
274 //! @brief Clear the shifted status of an element.
275 //! @param elem - element to clear
276 //! @return element with shifted status cleared
277 static std::uint64_t clearShifted(const std::uint64_t elem);
278 //! @brief Get the remainder from an element.
279 //! @param elem - element to get the remainder from
280 //! @return remainder of the element
281 static std::uint64_t getRemainder(const std::uint64_t elem);
282 //! @brief Check whether an element is empty.
283 //! @param elem - element to check
284 //! @return be empty or not
285 static bool isEmptyElement(const std::uint64_t elem);
286 //! @brief Check whether an element is a cluster start.
287 //! @param elem - element to check
288 //! @return be a cluster start or not
289 static bool isClusterStart(const std::uint64_t elem);
290 //! @brief Check whether an element is a run start.
291 //! @param elem - element to check
292 //! @return be a run start or not
293 static bool isRunStart(const std::uint64_t elem);
294 //! @brief Get the low mask for a given number of bits.
295 //! @param n - number of bits
296 //! @return low mask
297 static std::uint64_t lowMask(const std::uint64_t n);
298 //! @brief Get the size of the filter in bytes.
299 //! @param q - number of quotient bits
300 //! @param r - number of reminder bits
301 //! @return size of the filter in bytes
302 static std::uint64_t filterSizeInBytes(const std::uint8_t q, const std::uint8_t r);
303};
304
305template <typename... QFs>
306bool Quotient::merge(const Quotient& qf, const QFs&... others)
307{
308 if (!isCompatibleWith(qf) || !(isCompatibleWith(qf: others) && ...))
309 {
310 return false;
311 }
312
313 insertFrom(qf);
314 (insertFrom(qf: others), ...);
315 return true;
316}
317} // namespace filter
318} // namespace data_structure
319