| 1 | //! @file filter.hpp |
| 2 | //! @author ryftchen |
| 3 | //! @brief The declarations (filter) in the data structure module. |
| 4 | //! @version 0.1.0 |
| 5 | //! @copyright Copyright (c) 2022-2026 ryftchen. All rights reserved. |
| 6 | |
| 7 | #pragma once |
| 8 | |
| 9 | #include <memory> |
| 10 | |
| 11 | //! @brief The data structure module. |
| 12 | namespace data_structure // NOLINT(modernize-concat-nested-namespaces) |
| 13 | { |
| 14 | //! @brief Filter-related functions in the data structure module. |
| 15 | namespace filter |
| 16 | { |
| 17 | //! @brief Brief function description. |
| 18 | //! @return function description (module_function) |
| 19 | inline static const char* description() noexcept |
| 20 | { |
| 21 | return "DS_FILTER" ; |
| 22 | } |
| 23 | extern const char* version() noexcept; |
| 24 | |
| 25 | //! @brief The Bloom filter. A space-efficient probabilistic data structure. |
| 26 | class Bloom |
| 27 | { |
| 28 | public: |
| 29 | //! @brief Construct a new Bloom object. |
| 30 | //! @param capacity - expected number of elements in the filter |
| 31 | //! @param falsePositiveProb - desired false positive probability |
| 32 | //! @param hashSeed - hash seed |
| 33 | Bloom(const std::uint32_t capacity, const double falsePositiveProb, const std::uint32_t hashSeed); |
| 34 | |
| 35 | //! @brief Insert a key into the filter. |
| 36 | //! @param key - key to hash |
| 37 | //! @param length - length of the key |
| 38 | //! @return success or failure |
| 39 | bool insert(const void* const key, const int length); |
| 40 | //! @brief Check whether a key may be in the filter. |
| 41 | //! @param key - key to hash |
| 42 | //! @param length - length of the key |
| 43 | //! @return may contain or not |
| 44 | bool mayContain(const void* const key, const int length); |
| 45 | //! @brief Clear the filter. |
| 46 | void clear(); |
| 47 | |
| 48 | private: |
| 49 | //! @brief The capacity of the filter. |
| 50 | const std::uint32_t capacity{0}; |
| 51 | //! @brief Number of bits. |
| 52 | const std::uint32_t filterBitsNum{0}; |
| 53 | //! @brief Number of hash functions. |
| 54 | const std::uint32_t hashFuncNum{0}; |
| 55 | //! @brief The hash seed. |
| 56 | const std::uint32_t hashSeed{0}; |
| 57 | //! @brief Size of the filter in bytes. |
| 58 | const std::uint32_t filterSize{0}; |
| 59 | //! @brief The filter data. |
| 60 | const std::unique_ptr<std::uint8_t[]> filter; |
| 61 | //! @brief The position of the hash values. |
| 62 | const std::unique_ptr<std::uint32_t[]> hashPos; |
| 63 | //! @brief Total number of entries. |
| 64 | std::uint32_t entries{0}; |
| 65 | //! @brief Number of bits in a byte. |
| 66 | static constexpr std::uint8_t byteBits{8}; |
| 67 | |
| 68 | //! @brief Hash a key and calculate the positions in the filter. |
| 69 | //! @param key - key to hash |
| 70 | //! @param length - length of the key |
| 71 | void bloomHash(const void* const key, const int length); |
| 72 | //! @brief Set a bit in the filter. |
| 73 | //! @param filter - filter to set the bit in |
| 74 | //! @param pos - hash position to set the bit |
| 75 | static void setBit(std::uint8_t filter[], const std::uint32_t pos); |
| 76 | //! @brief Get a bit from the filter. |
| 77 | //! @param filter - filter to get the bit from |
| 78 | //! @param pos - hash position to get the bit |
| 79 | //! @return bit from the filter |
| 80 | static std::uint8_t getBit(const std::uint8_t filter[], const std::uint32_t pos); |
| 81 | //! @brief Calculate the parameter m. |
| 82 | //! @param n - expected number of elements in the filter |
| 83 | //! @param p - desired false positive probability |
| 84 | //! @return number of bits in the filter |
| 85 | static std::uint32_t calculateParamM(const std::uint32_t n, const double p); |
| 86 | //! @brief Calculate the parameter k. |
| 87 | //! @param m - number of bits in the filter |
| 88 | //! @param n - expected number of elements in the filter |
| 89 | //! @return number of hash functions to use |
| 90 | static std::uint32_t calculateParamK(const std::uint32_t m, const std::uint32_t n); |
| 91 | }; |
| 92 | |
| 93 | //! @brief The quotient filter. A space-efficient probabilistic data structure. |
| 94 | class Quotient |
| 95 | { |
| 96 | public: |
| 97 | //! @brief Construct a new Quotient object. |
| 98 | //! @param qBits - number of quotient bits |
| 99 | //! @param rBits - number of reminder bits |
| 100 | //! @param hashSeed - hash seed |
| 101 | Quotient(const std::uint8_t qBits, const std::uint8_t rBits, const std::uint32_t hashSeed); |
| 102 | |
| 103 | //! @brief Insert a key into the filter. |
| 104 | //! @param key - key to hash |
| 105 | //! @param length - length of the key |
| 106 | //! @return success or failure |
| 107 | bool insert(const void* const key, const int length); |
| 108 | //! @brief Check whether a key may be in the filter. |
| 109 | //! @param key - key to hash |
| 110 | //! @param length - length of the key |
| 111 | //! @return may contain or not |
| 112 | bool mayContain(const void* const key, const int length); |
| 113 | //! @brief Clear the filter. |
| 114 | void clear(); |
| 115 | //! @brief Remove a key from the filter. |
| 116 | //! @param key - key to hash |
| 117 | //! @param length - length of the key |
| 118 | //! @return success or failure |
| 119 | bool remove(const void* const key, const int length); |
| 120 | //! @brief Merge other quotient filters into self. |
| 121 | //! @tparam QFs - type of other input quotient filters |
| 122 | //! @param qf - first input quotient filter |
| 123 | //! @param others - other input quotient filters |
| 124 | //! @return success or failure |
| 125 | template <typename... QFs> |
| 126 | bool merge(const Quotient& qf, const QFs&... others); |
| 127 | |
| 128 | private: |
| 129 | //! @brief Number of quotient bits. |
| 130 | const std::uint8_t qBits{0}; |
| 131 | //! @brief Number of reminder bits. |
| 132 | const std::uint8_t rBits{0}; |
| 133 | //! @brief Number of bits per element. |
| 134 | const std::uint8_t elemBits{0}; |
| 135 | //! @brief Mask for extracting the index. |
| 136 | const std::uint64_t indexMask{0}; |
| 137 | //! @brief Mask for extracting the reminder. |
| 138 | const std::uint64_t rMask{0}; |
| 139 | //! @brief Mask for extracting the element. |
| 140 | const std::uint64_t elemMask{0}; |
| 141 | //! @brief The capacity of the filter. |
| 142 | const std::uint64_t capacity{0}; |
| 143 | //! @brief The hash seed. |
| 144 | const std::uint32_t hashSeed{0}; |
| 145 | //! @brief Size of the filter in bytes. |
| 146 | const std::uint64_t filterSize{0}; |
| 147 | //! @brief The filter data. |
| 148 | const std::unique_ptr<std::uint64_t[]> filter; |
| 149 | //! @brief Total number of entries. |
| 150 | std::uint64_t entries{0}; |
| 151 | |
| 152 | //! @brief The iterator for the filter. |
| 153 | struct Iterator |
| 154 | { |
| 155 | //! @brief Current index in the filter. |
| 156 | std::uint64_t index{0}; |
| 157 | //! @brief Current quotient value. |
| 158 | std::uint64_t quotient{0}; |
| 159 | //! @brief Number of visited elements. |
| 160 | std::uint64_t visited{0}; |
| 161 | }; |
| 162 | |
| 163 | //! @brief Insert a hash value into the filter. |
| 164 | //! @param hash - hash value |
| 165 | //! @return success or failure |
| 166 | bool insert(const std::uint64_t hash); |
| 167 | //! @brief Insert an element into the filter at a given start index. |
| 168 | //! @param start - start index to insert the element |
| 169 | //! @param elem - element to insert |
| 170 | void insertAt(const std::uint64_t start, const std::uint64_t elem); |
| 171 | //! @brief Insert all the hash values from the other quotient filter. |
| 172 | //! @param qf - other quotient filter |
| 173 | void insertFrom(const Quotient& qf); |
| 174 | //! @brief Check whether it is compatible with the other quotient filter. |
| 175 | //! @param qf - other quotient filter |
| 176 | //! @return be compatible or not |
| 177 | [[nodiscard]] bool isCompatibleWith(const Quotient& qf) const; |
| 178 | //! @brief Check whether a hash value may be in the filter. |
| 179 | //! @param hash - hash value |
| 180 | //! @return may contain or not |
| 181 | bool mayContain(const std::uint64_t hash); |
| 182 | //! @brief Remove a hash value from the filter. |
| 183 | //! @param hash - hash value |
| 184 | //! @return success or failure |
| 185 | bool remove(const std::uint64_t hash); |
| 186 | //! @brief Remove an element from the filter at a given start index. |
| 187 | //! @param start - start index to remove the element |
| 188 | //! @param quot - quotient involving the element to be removed |
| 189 | void removeAt(const std::uint64_t start, const std::uint64_t quot); |
| 190 | //! @brief Hash a key for the filter. |
| 191 | //! @param key - key to hash |
| 192 | //! @param length - length of the key |
| 193 | //! @return hash value |
| 194 | std::uint64_t quotientHash(const void* const key, const int length) const; |
| 195 | //! @brief Convert the hash value into a quotient. |
| 196 | //! @param hash - hash value |
| 197 | //! @return quotient |
| 198 | [[nodiscard]] std::uint64_t hashToQuotient(const std::uint64_t hash) const; |
| 199 | //! @brief Convert the hash value into a reminder. |
| 200 | //! @param hash - hash value |
| 201 | //! @return reminder |
| 202 | [[nodiscard]] std::uint64_t hashToRemainder(const std::uint64_t hash) const; |
| 203 | //! @brief Find the start index of a run. |
| 204 | //! @param quot - quotient converted from hash value |
| 205 | //! @return start index of a run |
| 206 | [[nodiscard]] std::uint64_t findRunIndex(const std::uint64_t quot) const; |
| 207 | |
| 208 | //! @brief Start iterating over the filter. |
| 209 | //! @param qf - quotient filter |
| 210 | //! @param iter - filter iterator |
| 211 | static void start(const Quotient& qf, Iterator& iter); |
| 212 | //! @brief Check whether the iterator has visited all entries. |
| 213 | //! @param qf - quotient filter |
| 214 | //! @param iter - filter iterator |
| 215 | //! @return be done or not |
| 216 | static bool done(const Quotient& qf, const Iterator& iter); |
| 217 | //! @brief Get the next hash value from the filter. |
| 218 | //! @param qf - quotient filter |
| 219 | //! @param iter - filter iterator |
| 220 | //! @return next hash value |
| 221 | static std::uint64_t next(const Quotient& qf, Iterator& iter); |
| 222 | //! @brief Set the element at a given index. |
| 223 | //! @param qf - quotient filter |
| 224 | //! @param index - index of the element |
| 225 | //! @param elem - element to set |
| 226 | static void setElement(Quotient& qf, const std::uint64_t index, std::uint64_t elem); |
| 227 | //! @brief Get the element at a given index. |
| 228 | //! @param qf - quotient filter |
| 229 | //! @param index - index of the element |
| 230 | //! @return element at the index |
| 231 | static std::uint64_t getElement(const Quotient& qf, const std::uint64_t index); |
| 232 | //! @brief Increase an index. |
| 233 | //! @param qf - quotient filter |
| 234 | //! @param index - index to increment |
| 235 | //! @return increased index |
| 236 | static std::uint64_t increase(const Quotient& qf, const std::uint64_t index); |
| 237 | //! @brief Decrease an index. |
| 238 | //! @param qf - quotient filter |
| 239 | //! @param index - index to decrement |
| 240 | //! @return decreased index |
| 241 | static std::uint64_t decrease(const Quotient& qf, const std::uint64_t index); |
| 242 | //! @brief Check whether an element is occupied. |
| 243 | //! @param elem - element to check |
| 244 | //! @return be occupied or not |
| 245 | static bool isOccupied(const std::uint64_t elem); |
| 246 | //! @brief Set an element as occupied. |
| 247 | //! @param elem - element to set |
| 248 | //! @return element with occupied status set |
| 249 | static std::uint64_t setOccupied(const std::uint64_t elem); |
| 250 | //! @brief Clear the occupied status of an element. |
| 251 | //! @param elem - element to clear |
| 252 | //! @return element with occupied status cleared |
| 253 | static std::uint64_t clearOccupied(const std::uint64_t elem); |
| 254 | //! @brief Check whether an element is a continuation. |
| 255 | //! @param elem - element to check |
| 256 | //! @return be a continuation or not |
| 257 | static bool isContinuation(const std::uint64_t elem); |
| 258 | //! @brief Set an element as a continuation. |
| 259 | //! @param elem - element to set |
| 260 | //! @return element with continuation status set |
| 261 | static std::uint64_t setContinuation(const std::uint64_t elem); |
| 262 | //! @brief Clear the continuation status of an element. |
| 263 | //! @param elem - element to clear |
| 264 | //! @return element with continuation status cleared |
| 265 | static std::uint64_t clearContinuation(const std::uint64_t elem); |
| 266 | //! @brief Check whether an element is shifted. |
| 267 | //! @param elem - element to check |
| 268 | //! @return be shifted or not |
| 269 | static bool isShifted(const std::uint64_t elem); |
| 270 | //! @brief Set an element as shifted. |
| 271 | //! @param elem - element to set |
| 272 | //! @return element with shifted status set |
| 273 | static std::uint64_t setShifted(const std::uint64_t elem); |
| 274 | //! @brief Clear the shifted status of an element. |
| 275 | //! @param elem - element to clear |
| 276 | //! @return element with shifted status cleared |
| 277 | static std::uint64_t clearShifted(const std::uint64_t elem); |
| 278 | //! @brief Get the remainder from an element. |
| 279 | //! @param elem - element to get the remainder from |
| 280 | //! @return remainder of the element |
| 281 | static std::uint64_t getRemainder(const std::uint64_t elem); |
| 282 | //! @brief Check whether an element is empty. |
| 283 | //! @param elem - element to check |
| 284 | //! @return be empty or not |
| 285 | static bool isEmptyElement(const std::uint64_t elem); |
| 286 | //! @brief Check whether an element is a cluster start. |
| 287 | //! @param elem - element to check |
| 288 | //! @return be a cluster start or not |
| 289 | static bool isClusterStart(const std::uint64_t elem); |
| 290 | //! @brief Check whether an element is a run start. |
| 291 | //! @param elem - element to check |
| 292 | //! @return be a run start or not |
| 293 | static bool isRunStart(const std::uint64_t elem); |
| 294 | //! @brief Get the low mask for a given number of bits. |
| 295 | //! @param n - number of bits |
| 296 | //! @return low mask |
| 297 | static std::uint64_t lowMask(const std::uint64_t n); |
| 298 | //! @brief Get the size of the filter in bytes. |
| 299 | //! @param q - number of quotient bits |
| 300 | //! @param r - number of reminder bits |
| 301 | //! @return size of the filter in bytes |
| 302 | static std::uint64_t filterSizeInBytes(const std::uint8_t q, const std::uint8_t r); |
| 303 | }; |
| 304 | |
| 305 | template <typename... QFs> |
| 306 | bool Quotient::merge(const Quotient& qf, const QFs&... others) |
| 307 | { |
| 308 | if (!isCompatibleWith(qf) || !(isCompatibleWith(qf: others) && ...)) |
| 309 | { |
| 310 | return false; |
| 311 | } |
| 312 | |
| 313 | insertFrom(qf); |
| 314 | (insertFrom(qf: others), ...); |
| 315 | return true; |
| 316 | } |
| 317 | } // namespace filter |
| 318 | } // namespace data_structure |
| 319 | |