| 1 | //! @file filter.cpp |
| 2 | //! @author ryftchen |
| 3 | //! @brief The definitions (filter) in the data structure module. |
| 4 | //! @version 0.1.0 |
| 5 | //! @copyright Copyright (c) 2022-2025 ryftchen. All rights reserved. |
| 6 | |
| 7 | #include "filter.hpp" |
| 8 | |
| 9 | #include <cmath> |
| 10 | #include <cstring> |
| 11 | |
| 12 | namespace data_structure::filter |
| 13 | { |
| 14 | //! @brief Function version number. |
| 15 | //! @return version number (major.minor.patch) |
| 16 | const char* version() noexcept |
| 17 | { |
| 18 | static const char* const ver = "0.1.0" ; |
| 19 | return ver; |
| 20 | } |
| 21 | |
| 22 | // NOLINTBEGIN(readability-magic-numbers) |
| 23 | //! @brief The MurmurHash2 (64-bit) function. |
| 24 | //! @param key - key to hash |
| 25 | //! @param length - length of the key |
| 26 | //! @param seed - hash seed |
| 27 | //! @return hash value |
| 28 | static std::uint64_t murmurHash2X64(const void* const key, const int length, const std::uint32_t seed) noexcept |
| 29 | { |
| 30 | constexpr std::uint64_t mix = 0xC6A4A7935BD1E995; |
| 31 | constexpr int shift = 47; |
| 32 | const auto* data1 = static_cast<const std::uint64_t*>(key); |
| 33 | const std::uint64_t* end = data1 + (length / 8); |
| 34 | std::uint64_t hash = seed ^ (length * mix); |
| 35 | while (data1 != end) |
| 36 | { |
| 37 | std::uint64_t block = *data1++; |
| 38 | block *= mix; |
| 39 | block ^= block >> shift; |
| 40 | block *= mix; |
| 41 | |
| 42 | hash ^= block; |
| 43 | hash *= mix; |
| 44 | } |
| 45 | |
| 46 | const auto* data2 = |
| 47 | reinterpret_cast<const unsigned char*>(data1); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) |
| 48 | switch (length & 7) |
| 49 | { |
| 50 | case 7: |
| 51 | hash ^= static_cast<std::uint64_t>(data2[6]) << 48; |
| 52 | [[fallthrough]]; |
| 53 | case 6: |
| 54 | hash ^= static_cast<std::uint64_t>(data2[5]) << 40; |
| 55 | [[fallthrough]]; |
| 56 | case 5: |
| 57 | hash ^= static_cast<std::uint64_t>(data2[4]) << 32; |
| 58 | [[fallthrough]]; |
| 59 | case 4: |
| 60 | hash ^= static_cast<std::uint64_t>(data2[3]) << 24; |
| 61 | [[fallthrough]]; |
| 62 | case 3: |
| 63 | hash ^= static_cast<std::uint64_t>(data2[2]) << 16; |
| 64 | [[fallthrough]]; |
| 65 | case 2: |
| 66 | hash ^= static_cast<std::uint64_t>(data2[1]) << 8; |
| 67 | [[fallthrough]]; |
| 68 | case 1: |
| 69 | hash ^= static_cast<std::uint64_t>(data2[0]); |
| 70 | hash *= mix; |
| 71 | [[fallthrough]]; |
| 72 | default: |
| 73 | break; |
| 74 | }; |
| 75 | |
| 76 | hash ^= hash >> shift; |
| 77 | hash *= mix; |
| 78 | hash ^= hash >> shift; |
| 79 | return hash; |
| 80 | } |
| 81 | |
| 82 | Bloom::Bloom(const std::uint32_t capacity, const double falsePositiveProb, const std::uint32_t hashSeed) : |
| 83 | capacity{capacity}, |
| 84 | filterBitsNum{calculateParamM(n: capacity, p: falsePositiveProb)}, |
| 85 | hashFuncNum{calculateParamK(m: filterBitsNum, n: capacity)}, |
| 86 | hashSeed{hashSeed}, |
| 87 | filterSize{filterBitsNum / byteBits}, |
| 88 | filter{std::make_unique<std::uint8_t[]>(num: filterSize)}, |
| 89 | hashPos{std::make_unique<std::uint32_t[]>(num: hashFuncNum)} |
| 90 | { |
| 91 | std::memset(s: filter.get(), c: 0, n: filterSize * sizeof(std::uint8_t)); |
| 92 | std::memset(s: hashPos.get(), c: 0, n: hashFuncNum * sizeof(std::uint32_t)); |
| 93 | } |
| 94 | |
| 95 | bool Bloom::insert(const void* const key, const int length) |
| 96 | { |
| 97 | if (length <= 0) |
| 98 | { |
| 99 | return false; |
| 100 | } |
| 101 | |
| 102 | bloomHash(key, length); |
| 103 | for (std::uint32_t i = 0; i < hashFuncNum; ++i) |
| 104 | { |
| 105 | setBit(filter: filter.get(), pos: hashPos[i]); |
| 106 | } |
| 107 | ++entries; |
| 108 | return entries <= capacity; |
| 109 | } |
| 110 | |
| 111 | bool Bloom::mayContain(const void* const key, const int length) |
| 112 | { |
| 113 | if (!key || (length <= 0)) |
| 114 | { |
| 115 | return false; |
| 116 | } |
| 117 | |
| 118 | bloomHash(key, length); |
| 119 | for (std::uint32_t i = 0; i < hashFuncNum; ++i) |
| 120 | { |
| 121 | if (getBit(filter: filter.get(), pos: hashPos[i]) == 0) |
| 122 | { |
| 123 | return false; |
| 124 | } |
| 125 | } |
| 126 | return true; |
| 127 | } |
| 128 | |
| 129 | void Bloom::clear() |
| 130 | { |
| 131 | entries = 0; |
| 132 | std::memset(s: filter.get(), c: 0, n: filterSize * sizeof(std::uint8_t)); |
| 133 | std::memset(s: hashPos.get(), c: 0, n: hashFuncNum * sizeof(std::uint32_t)); |
| 134 | } |
| 135 | |
| 136 | void Bloom::bloomHash(const void* const key, const int length) |
| 137 | { |
| 138 | const std::uint64_t hash1 = murmurHash2X64(key, length, seed: hashSeed); |
| 139 | const std::uint64_t hash2 = murmurHash2X64(key, length, seed: static_cast<std::uint32_t>((hash1 >> 32) ^ hash1)); |
| 140 | for (std::uint32_t i = 0; i < hashFuncNum; ++i) |
| 141 | { |
| 142 | hashPos[i] = (hash1 + i * hash2) % filterBitsNum; |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | void Bloom::setBit(std::uint8_t filter[], const std::uint32_t pos) |
| 147 | { |
| 148 | filter[pos / byteBits] |= 1 << (pos % byteBits); |
| 149 | } |
| 150 | |
| 151 | std::uint8_t Bloom::getBit(const std::uint8_t filter[], const std::uint32_t pos) |
| 152 | { |
| 153 | return filter[pos / byteBits] & (1 << (pos % byteBits)); |
| 154 | } |
| 155 | |
| 156 | std::uint32_t Bloom::calculateParamM(const std::uint32_t n, const double p) |
| 157 | { |
| 158 | if ((n == 0) || (p <= 0.0) || (p >= 1.0)) |
| 159 | { |
| 160 | return 0; |
| 161 | } |
| 162 | |
| 163 | std::uint32_t m = std::ceil(x: -1.0 * n * std::log(x: p) / (std::numbers::ln2 * std::numbers::ln2)); |
| 164 | m = (m - m % 64) + 64; |
| 165 | return m; |
| 166 | } |
| 167 | |
| 168 | std::uint32_t Bloom::calculateParamK(const std::uint32_t m, const std::uint32_t n) |
| 169 | { |
| 170 | if ((m == 0) || (n == 0)) |
| 171 | { |
| 172 | return 0; |
| 173 | } |
| 174 | |
| 175 | const std::uint32_t k = std::round(x: std::numbers::ln2 * m / n); |
| 176 | return k; |
| 177 | } |
| 178 | |
| 179 | Quotient::Quotient(const std::uint8_t qBits, const std::uint8_t rBits, const std::uint32_t hashSeed) : |
| 180 | qBits{qBits}, |
| 181 | rBits{rBits}, |
| 182 | elemBits{static_cast<std::uint8_t>(rBits + 3)}, |
| 183 | indexMask{lowMask(n: qBits)}, |
| 184 | rMask{lowMask(n: rBits)}, |
| 185 | elemMask{lowMask(n: elemBits)}, |
| 186 | capacity{static_cast<std::uint64_t>(1 << qBits)}, |
| 187 | hashSeed{hashSeed}, |
| 188 | filterSize{filterSizeInBytes(q: qBits, r: rBits)}, |
| 189 | filter{std::make_unique<std::uint64_t[]>(num: filterSize)} |
| 190 | { |
| 191 | std::memset(s: filter.get(), c: 0, n: filterSize * sizeof(std::uint64_t)); |
| 192 | } |
| 193 | |
| 194 | bool Quotient::insert(const void* const key, const int length) |
| 195 | { |
| 196 | return (key && (length > 0)) && insert(hash: quotientHash(key, length)); |
| 197 | } |
| 198 | |
| 199 | bool Quotient::mayContain(const void* const key, const int length) |
| 200 | { |
| 201 | return (key && (length > 0)) && mayContain(hash: quotientHash(key, length)); |
| 202 | } |
| 203 | |
| 204 | bool Quotient::remove(const void* const key, const int length) |
| 205 | { |
| 206 | return (key && (length > 0)) && remove(hash: quotientHash(key, length)); |
| 207 | } |
| 208 | |
| 209 | void Quotient::clear() |
| 210 | { |
| 211 | entries = 0; |
| 212 | std::memset(s: filter.get(), c: 0, n: filterSize * sizeof(std::uint64_t)); |
| 213 | } |
| 214 | |
| 215 | bool Quotient::insert(const std::uint64_t hash) |
| 216 | { |
| 217 | if (entries >= capacity) |
| 218 | { |
| 219 | return false; |
| 220 | } |
| 221 | |
| 222 | const std::uint64_t hq = hashToQuotient(hash); |
| 223 | const std::uint64_t hr = hashToRemainder(hash); |
| 224 | const std::uint64_t hqElem = getElement(qf: *this, index: hq); |
| 225 | std::uint64_t entry = (hr << 3) & ~7; |
| 226 | if (isEmptyElement(elem: hqElem)) |
| 227 | { |
| 228 | setElement(qf&: *this, index: hq, elem: setOccupied(entry)); |
| 229 | ++entries; |
| 230 | return true; |
| 231 | } |
| 232 | if (!isOccupied(elem: hqElem)) |
| 233 | { |
| 234 | setElement(qf&: *this, index: hq, elem: setOccupied(hqElem)); |
| 235 | } |
| 236 | |
| 237 | const std::uint64_t start = findRunIndex(quot: hq); |
| 238 | std::uint64_t slot = start; |
| 239 | if (isOccupied(elem: hqElem)) |
| 240 | { |
| 241 | do |
| 242 | { |
| 243 | const std::uint64_t rem = getRemainder(elem: getElement(qf: *this, index: slot)); |
| 244 | if (rem == hr) |
| 245 | { |
| 246 | return true; |
| 247 | } |
| 248 | if (rem > hr) |
| 249 | { |
| 250 | break; |
| 251 | } |
| 252 | slot = increase(qf: *this, index: slot); |
| 253 | } |
| 254 | while (isContinuation(elem: getElement(qf: *this, index: slot))); |
| 255 | |
| 256 | if (slot == start) |
| 257 | { |
| 258 | const std::uint64_t oldHead = getElement(qf: *this, index: start); |
| 259 | setElement(qf&: *this, index: start, elem: setContinuation(oldHead)); |
| 260 | } |
| 261 | else |
| 262 | { |
| 263 | entry = setContinuation(entry); |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | if (slot != hq) |
| 268 | { |
| 269 | entry = setShifted(entry); |
| 270 | } |
| 271 | |
| 272 | insertAt(start: slot, elem: entry); |
| 273 | ++entries; |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | void Quotient::insertAt(const std::uint64_t start, const std::uint64_t elem) |
| 278 | { |
| 279 | bool isEmpty = false; |
| 280 | std::uint64_t slot = start; |
| 281 | std::uint64_t currElem = elem; |
| 282 | do |
| 283 | { |
| 284 | std::uint64_t prevElem = getElement(qf: *this, index: slot); |
| 285 | isEmpty = isEmptyElement(elem: prevElem); |
| 286 | if (!isEmpty) |
| 287 | { |
| 288 | prevElem = setShifted(prevElem); |
| 289 | if (isOccupied(elem: prevElem)) |
| 290 | { |
| 291 | currElem = setOccupied(currElem); |
| 292 | prevElem = clearOccupied(elem: prevElem); |
| 293 | } |
| 294 | } |
| 295 | setElement(qf&: *this, index: slot, elem: currElem); |
| 296 | currElem = prevElem; |
| 297 | slot = increase(qf: *this, index: slot); |
| 298 | } |
| 299 | while (!isEmpty); |
| 300 | } |
| 301 | |
| 302 | void Quotient::insertFrom(const Quotient& qf) |
| 303 | { |
| 304 | Iterator iter{}; |
| 305 | start(qf, iter); |
| 306 | while (!done(qf, iter)) |
| 307 | { |
| 308 | insert(hash: next(qf, iter)); |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | bool Quotient::isCompatibleWith(const Quotient& qf) const |
| 313 | { |
| 314 | return (qf.qBits == qBits) && (qf.rBits == rBits) && (qf.hashSeed == hashSeed); |
| 315 | } |
| 316 | |
| 317 | bool Quotient::mayContain(const std::uint64_t hash) |
| 318 | { |
| 319 | const std::uint64_t hq = hashToQuotient(hash); |
| 320 | const std::uint64_t hr = hashToRemainder(hash); |
| 321 | const std::uint64_t hqElem = getElement(qf: *this, index: hq); |
| 322 | if (!isOccupied(elem: hqElem)) |
| 323 | { |
| 324 | return false; |
| 325 | } |
| 326 | |
| 327 | std::uint64_t slot = findRunIndex(quot: hq); |
| 328 | do |
| 329 | { |
| 330 | const std::uint64_t rem = getRemainder(elem: getElement(qf: *this, index: slot)); |
| 331 | if (rem == hr) |
| 332 | { |
| 333 | return true; |
| 334 | } |
| 335 | if (rem > hr) |
| 336 | { |
| 337 | return false; |
| 338 | } |
| 339 | slot = increase(qf: *this, index: slot); |
| 340 | } |
| 341 | while (isContinuation(elem: getElement(qf: *this, index: slot))); |
| 342 | return false; |
| 343 | } |
| 344 | |
| 345 | bool Quotient::remove(const std::uint64_t hash) |
| 346 | { |
| 347 | const std::uint64_t highBits = hash >> (qBits + rBits); |
| 348 | if (highBits) |
| 349 | { |
| 350 | return false; |
| 351 | } |
| 352 | |
| 353 | const std::uint64_t hq = hashToQuotient(hash); |
| 354 | const std::uint64_t hr = hashToRemainder(hash); |
| 355 | std::uint64_t hqElem = getElement(qf: *this, index: hq); |
| 356 | if (!isOccupied(elem: hqElem) || (entries == 0)) |
| 357 | { |
| 358 | return true; |
| 359 | } |
| 360 | |
| 361 | std::uint64_t slot = findRunIndex(quot: hq); |
| 362 | std::uint64_t rem = 0; |
| 363 | do |
| 364 | { |
| 365 | rem = getRemainder(elem: getElement(qf: *this, index: slot)); |
| 366 | if (rem >= hr) |
| 367 | { |
| 368 | break; |
| 369 | } |
| 370 | slot = increase(qf: *this, index: slot); |
| 371 | } |
| 372 | while (isContinuation(elem: getElement(qf: *this, index: slot))); |
| 373 | if (rem != hr) |
| 374 | { |
| 375 | return true; |
| 376 | } |
| 377 | |
| 378 | const std::uint64_t entryToRemove = (slot == hq) ? hqElem : getElement(qf: *this, index: slot); |
| 379 | const bool toReplaceRunStart = isRunStart(elem: entryToRemove); |
| 380 | if (toReplaceRunStart) |
| 381 | { |
| 382 | const std::uint64_t nextElem = getElement(qf: *this, index: increase(qf: *this, index: slot)); |
| 383 | if (!isContinuation(elem: nextElem)) |
| 384 | { |
| 385 | hqElem = clearOccupied(elem: hqElem); |
| 386 | setElement(qf&: *this, index: hq, elem: hqElem); |
| 387 | } |
| 388 | } |
| 389 | |
| 390 | removeAt(start: slot, quot: hq); |
| 391 | if (toReplaceRunStart) |
| 392 | { |
| 393 | const std::uint64_t nextElem = getElement(qf: *this, index: slot); |
| 394 | std::uint64_t updatedNextElem = isContinuation(elem: nextElem) ? clearContinuation(elem: nextElem) : nextElem; |
| 395 | if ((slot == hq) && isRunStart(elem: updatedNextElem)) |
| 396 | { |
| 397 | updatedNextElem = clearShifted(elem: updatedNextElem); |
| 398 | } |
| 399 | if (updatedNextElem != nextElem) |
| 400 | { |
| 401 | setElement(qf&: *this, index: slot, elem: updatedNextElem); |
| 402 | } |
| 403 | } |
| 404 | --entries; |
| 405 | return true; |
| 406 | } |
| 407 | |
| 408 | void Quotient::removeAt(const std::uint64_t start, const std::uint64_t quot) |
| 409 | { |
| 410 | const std::uint64_t orig = start; |
| 411 | std::uint64_t slot = start; |
| 412 | std::uint64_t currElem = getElement(qf: *this, index: slot); |
| 413 | std::uint64_t scanPos = increase(qf: *this, index: slot); |
| 414 | std::uint64_t quotient = quot; |
| 415 | for (;;) |
| 416 | { |
| 417 | const std::uint64_t nextElem = getElement(qf: *this, index: scanPos); |
| 418 | if (isEmptyElement(elem: nextElem) || isClusterStart(elem: nextElem) || (scanPos == orig)) |
| 419 | { |
| 420 | setElement(qf&: *this, index: slot, elem: 0); |
| 421 | return; |
| 422 | } |
| 423 | |
| 424 | const bool isCurrOccupied = isOccupied(elem: currElem); |
| 425 | std::uint64_t updatedNextElem = nextElem; |
| 426 | if (isRunStart(elem: nextElem)) |
| 427 | { |
| 428 | do |
| 429 | { |
| 430 | quotient = increase(qf: *this, index: quotient); |
| 431 | } |
| 432 | while (!isOccupied(elem: getElement(qf: *this, index: quotient))); |
| 433 | |
| 434 | if (isCurrOccupied && (slot == quotient)) |
| 435 | { |
| 436 | updatedNextElem = clearShifted(elem: nextElem); |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | setElement(qf&: *this, index: slot, elem: isCurrOccupied ? setOccupied(updatedNextElem) : clearOccupied(elem: updatedNextElem)); |
| 441 | slot = scanPos; |
| 442 | scanPos = increase(qf: *this, index: scanPos); |
| 443 | currElem = nextElem; |
| 444 | } |
| 445 | } |
| 446 | |
| 447 | std::uint64_t Quotient::quotientHash(const void* const key, const int length) const |
| 448 | { |
| 449 | return murmurHash2X64(key, length, seed: hashSeed) & lowMask(n: qBits + rBits); |
| 450 | } |
| 451 | |
| 452 | std::uint64_t Quotient::hashToQuotient(const std::uint64_t hash) const |
| 453 | { |
| 454 | return (hash >> rBits) & indexMask; |
| 455 | } |
| 456 | |
| 457 | std::uint64_t Quotient::hashToRemainder(const std::uint64_t hash) const |
| 458 | { |
| 459 | return hash & rMask; |
| 460 | } |
| 461 | |
| 462 | std::uint64_t Quotient::findRunIndex(const std::uint64_t quot) const |
| 463 | { |
| 464 | std::uint64_t start = quot; |
| 465 | while (isShifted(elem: getElement(qf: *this, index: start))) |
| 466 | { |
| 467 | start = decrease(qf: *this, index: start); |
| 468 | } |
| 469 | |
| 470 | std::uint64_t slot = start; |
| 471 | while (start != quot) |
| 472 | { |
| 473 | do |
| 474 | { |
| 475 | slot = increase(qf: *this, index: slot); |
| 476 | } |
| 477 | while (isContinuation(elem: getElement(qf: *this, index: slot))); |
| 478 | |
| 479 | do |
| 480 | { |
| 481 | start = increase(qf: *this, index: start); |
| 482 | } |
| 483 | while (!isOccupied(elem: getElement(qf: *this, index: start))); |
| 484 | } |
| 485 | return slot; |
| 486 | } |
| 487 | |
| 488 | void Quotient::start(const Quotient& qf, Iterator& iter) |
| 489 | { |
| 490 | iter.visited = qf.entries; |
| 491 | if (qf.entries == 0) |
| 492 | { |
| 493 | return; |
| 494 | } |
| 495 | |
| 496 | std::uint64_t start = 0; |
| 497 | for (start = 0; start < qf.capacity; ++start) |
| 498 | { |
| 499 | if (isClusterStart(elem: getElement(qf, index: start))) |
| 500 | { |
| 501 | break; |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | iter.visited = 0; |
| 506 | iter.index = start; |
| 507 | } |
| 508 | |
| 509 | bool Quotient::done(const Quotient& qf, const Iterator& iter) |
| 510 | { |
| 511 | return qf.entries == iter.visited; |
| 512 | } |
| 513 | |
| 514 | std::uint64_t Quotient::next(const Quotient& qf, Iterator& iter) |
| 515 | { |
| 516 | while (!done(qf, iter)) |
| 517 | { |
| 518 | const std::uint64_t entry = getElement(qf, index: iter.index); |
| 519 | if (isClusterStart(elem: entry)) |
| 520 | { |
| 521 | iter.quotient = iter.index; |
| 522 | } |
| 523 | else if (isRunStart(elem: entry)) |
| 524 | { |
| 525 | std::uint64_t quotient = iter.quotient; |
| 526 | do |
| 527 | { |
| 528 | quotient = increase(qf, index: quotient); |
| 529 | } |
| 530 | while (!isOccupied(elem: getElement(qf, index: quotient))); |
| 531 | iter.quotient = quotient; |
| 532 | } |
| 533 | |
| 534 | iter.index = increase(qf, index: iter.index); |
| 535 | if (!isEmptyElement(elem: entry)) |
| 536 | { |
| 537 | const std::uint64_t quotient = iter.quotient; |
| 538 | const std::uint64_t rem = getRemainder(elem: entry); |
| 539 | const std::uint64_t hash = (quotient << qf.rBits) | rem; |
| 540 | ++iter.visited; |
| 541 | return hash; |
| 542 | } |
| 543 | } |
| 544 | return 0; |
| 545 | } |
| 546 | |
| 547 | void Quotient::setElement(Quotient& qf, const std::uint64_t index, std::uint64_t elem) |
| 548 | { |
| 549 | const std::size_t bitPos = qf.elemBits * index; |
| 550 | const std::size_t slotPos = bitPos % 64; |
| 551 | const int spillBits = (slotPos + qf.elemBits) - 64; |
| 552 | std::size_t tabPos = bitPos / 64; |
| 553 | |
| 554 | elem &= qf.elemMask; |
| 555 | qf.filter[tabPos] &= ~(qf.elemMask << slotPos); |
| 556 | qf.filter[tabPos] |= elem << slotPos; |
| 557 | if (spillBits > 0) |
| 558 | { |
| 559 | ++tabPos; |
| 560 | qf.filter[tabPos] &= ~lowMask(n: spillBits); |
| 561 | qf.filter[tabPos] |= elem >> (qf.elemBits - spillBits); |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | std::uint64_t Quotient::getElement(const Quotient& qf, const std::uint64_t index) |
| 566 | { |
| 567 | const std::size_t bitPos = qf.elemBits * index; |
| 568 | const std::size_t slotPos = bitPos % 64; |
| 569 | const int spillBits = (slotPos + qf.elemBits) - 64; |
| 570 | std::size_t tabPos = bitPos / 64; |
| 571 | std::uint64_t elem = (qf.filter[tabPos] >> slotPos) & qf.elemMask; |
| 572 | |
| 573 | if (spillBits > 0) |
| 574 | { |
| 575 | ++tabPos; |
| 576 | const std::uint64_t x = qf.filter[tabPos] & lowMask(n: spillBits); |
| 577 | elem |= x << (qf.elemBits - spillBits); |
| 578 | } |
| 579 | return elem; |
| 580 | } |
| 581 | |
| 582 | std::uint64_t Quotient::increase(const Quotient& qf, const std::uint64_t index) |
| 583 | { |
| 584 | return (index + 1) & qf.indexMask; |
| 585 | } |
| 586 | |
| 587 | std::uint64_t Quotient::decrease(const Quotient& qf, const std::uint64_t index) |
| 588 | { |
| 589 | return (index - 1) & qf.indexMask; |
| 590 | } |
| 591 | |
| 592 | bool Quotient::isOccupied(const std::uint64_t elem) |
| 593 | { |
| 594 | return (elem & 1) != 0; |
| 595 | } |
| 596 | |
| 597 | std::uint64_t Quotient::setOccupied(const std::uint64_t elem) |
| 598 | { |
| 599 | return elem | 1; |
| 600 | } |
| 601 | |
| 602 | std::uint64_t Quotient::clearOccupied(const std::uint64_t elem) |
| 603 | { |
| 604 | return elem & ~1; |
| 605 | } |
| 606 | |
| 607 | bool Quotient::isContinuation(const std::uint64_t elem) |
| 608 | { |
| 609 | return (elem & 2) != 0; |
| 610 | } |
| 611 | |
| 612 | std::uint64_t Quotient::setContinuation(const std::uint64_t elem) |
| 613 | { |
| 614 | return elem | 2; |
| 615 | } |
| 616 | |
| 617 | std::uint64_t Quotient::clearContinuation(const std::uint64_t elem) |
| 618 | { |
| 619 | return elem & ~2; |
| 620 | } |
| 621 | |
| 622 | bool Quotient::isShifted(const std::uint64_t elem) |
| 623 | { |
| 624 | return (elem & 4) != 0; |
| 625 | } |
| 626 | |
| 627 | std::uint64_t Quotient::setShifted(const std::uint64_t elem) |
| 628 | { |
| 629 | return elem | 4; |
| 630 | } |
| 631 | |
| 632 | std::uint64_t Quotient::clearShifted(const std::uint64_t elem) |
| 633 | { |
| 634 | return elem & ~4; |
| 635 | } |
| 636 | |
| 637 | std::uint64_t Quotient::getRemainder(const std::uint64_t elem) |
| 638 | { |
| 639 | return elem >> 3; |
| 640 | } |
| 641 | |
| 642 | bool Quotient::isEmptyElement(const std::uint64_t elem) |
| 643 | { |
| 644 | return (elem & 7) == 0; |
| 645 | } |
| 646 | |
| 647 | bool Quotient::isClusterStart(const std::uint64_t elem) |
| 648 | { |
| 649 | return isOccupied(elem) && !isContinuation(elem) && !isShifted(elem); |
| 650 | } |
| 651 | |
| 652 | bool Quotient::isRunStart(const std::uint64_t elem) |
| 653 | { |
| 654 | return !isContinuation(elem) && (isOccupied(elem) || isShifted(elem)); |
| 655 | } |
| 656 | |
| 657 | std::uint64_t Quotient::lowMask(const std::uint64_t n) |
| 658 | { |
| 659 | return (1ULL << n) - 1ULL; |
| 660 | } |
| 661 | |
| 662 | std::uint64_t Quotient::filterSizeInBytes(const std::uint8_t q, const std::uint8_t r) |
| 663 | { |
| 664 | if ((q == 0) || (r == 0) || ((q + r) > 64)) |
| 665 | { |
| 666 | return 0; |
| 667 | } |
| 668 | |
| 669 | const std::uint64_t bits = static_cast<std::uint64_t>(1 << q) * (r + 3); |
| 670 | const std::uint64_t bytes = bits / 8; |
| 671 | return (bits % 8) ? (bytes + 1) : bytes; |
| 672 | } |
| 673 | // NOLINTEND(readability-magic-numbers) |
| 674 | } // namespace data_structure::filter |
| 675 | |