/root/bitcoin/src/minisketch/src/false_positives.h
| Line | Count | Source | 
| 1 |  | /********************************************************************** | 
| 2 |  |  * Copyright (c) 2020 Pieter Wuille, Greg Maxwell, Gleb Naumenko      * | 
| 3 |  |  * Distributed under the MIT software license, see the accompanying   * | 
| 4 |  |  * file LICENSE or http://www.opensource.org/licenses/mit-license.php.* | 
| 5 |  |  **********************************************************************/ | 
| 6 |  |  | 
| 7 |  | #ifndef _MINISKETCH_FALSE_POSITIVES_H_ | 
| 8 |  | #define _MINISKETCH_FALSE_POSITIVES_H_ | 
| 9 |  |  | 
| 10 |  | #include "util.h" | 
| 11 |  |  | 
| 12 |  | #include "int_utils.h" | 
| 13 |  |  | 
| 14 |  | #include <stdint.h> | 
| 15 |  |  | 
| 16 |  | namespace { | 
| 17 |  |  | 
| 18 |  | /** Compute floor(log2(x!)), exactly up to x=57; an underestimate up to x=2^32-1. */ | 
| 19 | 0 | uint64_t Log2Factorial(uint32_t x) { | 
| 20 |  |     //! Values of floor(106*log2(1 + i/32)) for i=0..31 | 
| 21 | 0 |     static constexpr uint8_t T[32] = { | 
| 22 | 0 |         0, 4, 9, 13, 18, 22, 26, 30, 34, 37, 41, 45, 48, 52, 55, 58, 62, 65, 68, | 
| 23 | 0 |         71, 74, 77, 80, 82, 85, 88, 90, 93, 96, 98, 101, 103 | 
| 24 | 0 |     }; | 
| 25 | 0 |     int bits = CountBits(x, 32); | 
| 26 |  |     // Compute an (under)estimate of floor(106*log2(x)). | 
| 27 |  |     // This works by relying on floor(log2(x)) = countbits(x)-1, and adding | 
| 28 |  |     // precision using the top 6 bits of x (the highest one of which is always | 
| 29 |  |     // one). | 
| 30 | 0 |     unsigned l2_106 = 106 * (bits - 1) + T[((x << (32 - bits)) >> 26) & 31]; | 
| 31 |  |     // Based on Stirling approximation for log2(x!): | 
| 32 |  |     //   log2(x!) = log(x!) / log(2) | 
| 33 |  |     //            = ((x + 1/2) * log(x) - x + log(2*pi)/2 + ...) / log(2) | 
| 34 |  |     //            = (x + 1/2) * log2(x) - x/log(2) + log2(2*pi)/2 + ... | 
| 35 |  |     //            = 1/2*(2*x+1)*log2(x) - (1/log(2))*x + log2(2*pi)/2 + ... | 
| 36 |  |     //            = 1/212*(2*x+1)*(106*log2(x)) + (-1/log(2))*x + log2(2*pi)/2 + ... | 
| 37 |  |     // where 418079/88632748 is exactly 1/212 | 
| 38 |  |     //       -127870026/88632748 is slightly less than -1/log(2) | 
| 39 |  |     //       117504694/88632748 is less than log2(2*pi)/2 | 
| 40 |  |     // A correction term is only needed for x < 3. | 
| 41 |  |     // | 
| 42 |  |     // See doc/log2_factorial.sage for how these constants were obtained. | 
| 43 | 0 |     return (418079 * (2 * uint64_t{x} + 1) * l2_106 - 127870026 * uint64_t{x} + 117504694 + 88632748 * (x < 3)) / 88632748; | 
| 44 | 0 | } | 
| 45 |  |  | 
| 46 |  | /** Compute floor(log2(2^(bits * capacity) / sum((2^bits - 1) choose k, k=0..capacity))), for bits>1 | 
| 47 |  |  * | 
| 48 |  |  * See doc/gen_basefpbits.sage for how the tables were obtained. */ | 
| 49 | 0 | uint64_t BaseFPBits(uint32_t bits, uint32_t capacity) { | 
| 50 |  |     // Correction table for low bits/capacities | 
| 51 | 0 |     static constexpr uint8_t ADD5[] = {1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 12}; | 
| 52 | 0 |     static constexpr uint8_t ADD6[] = {1, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8, 10, 10, 11, 12, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 24, 24, 24, 24}; | 
| 53 | 0 |     static constexpr uint8_t ADD7[] = {1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 7, 8, 9, 9, 9, 10, 11, 11, 12, 12, 13, 13, 15, 15, 15, 16, 17, 17, 18, 19, 20, 20}; | 
| 54 | 0 |     static constexpr uint8_t ADD8[] = {1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 5, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9}; | 
| 55 | 0 |     static constexpr uint8_t ADD9[] = {1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 3, 3, 3, 3, 4, 3, 3, 4, 4, 4, 4}; | 
| 56 |  | 
 | 
| 57 | 0 |     if (capacity == 0) return 0; | 
| 58 | 0 |     uint64_t ret = 0; | 
| 59 | 0 |     if (bits < 32 && capacity >= (1U << bits)) { | 
| 60 | 0 |         ret = uint64_t{bits} * (capacity - (1U << bits) + 1); | 
| 61 | 0 |         capacity = (1U << bits) - 1; | 
| 62 | 0 |     } | 
| 63 | 0 |     ret += Log2Factorial(capacity); | 
| 64 | 0 |     switch (bits) { | 
| 65 | 0 |         case 2: return ret + (capacity <= 2 ? 0 : 1); | 
| 66 | 0 |         case 3: return ret + (capacity <= 2 ? 0 : (0x2a5 >> 2 * (capacity - 3)) & 3); | 
| 67 | 0 |         case 4: return ret + (capacity <= 3 ? 0 : (0xb6d91a449 >> 3 * (capacity - 4)) & 7); | 
| 68 | 0 |         case 5: return ret + (capacity <= 4 ? 0 : ADD5[capacity - 5]); | 
| 69 | 0 |         case 6: return ret + (capacity <= 4 ? 0 : capacity > 54 ? 25 : ADD6[capacity - 5]); | 
| 70 | 0 |         case 7: return ret + (capacity <= 4 ? 0 : capacity > 57 ? 21 : ADD7[capacity - 5]); | 
| 71 | 0 |         case 8: return ret + (capacity <= 9 ? 0 : capacity > 56 ? 10 : ADD8[capacity - 10]); | 
| 72 | 0 |         case 9: return ret + (capacity <= 11 ? 0 : capacity > 54 ? 5 : ADD9[capacity - 12]); | 
| 73 | 0 |         case 10: return ret + (capacity <= 21 ? 0 : capacity > 50 ? 2 : (0x1a6665545555041 >> 2 * (capacity - 22)) & 3); | 
| 74 | 0 |         case 11: return ret + (capacity <= 21 ? 0 : capacity > 45 ? 1 : (0x5b3dc1 >> (capacity - 22)) & 1); | 
| 75 | 0 |         case 12: return ret + (capacity <= 21 ? 0 : capacity > 57 ? 0 : (0xe65522041 >> (capacity - 22)) & 1); | 
| 76 | 0 |         case 13: return ret + (capacity <= 27 ? 0 : capacity > 55 ? 0 : (0x8904081 >> (capacity - 28)) & 1); | 
| 77 | 0 |         case 14: return ret + (capacity <= 47 ? 0 : capacity > 48 ? 0 : 1); | 
| 78 | 0 |         default: return ret; | 
| 79 | 0 |     } | 
| 80 | 0 | } | 
| 81 |  |  | 
| 82 | 0 | size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) { | 
| 83 | 0 |     if (bits == 0) return 0; | 
| 84 | 0 |     if (max_elements > 0xffffffff) return max_elements; | 
| 85 | 0 |     uint64_t base_fpbits = BaseFPBits(bits, static_cast<uint32_t>(max_elements)); | 
| 86 |  |     // The fpbits provided by the base max_elements==capacity case are sufficient. | 
| 87 | 0 |     if (base_fpbits >= fpbits) return max_elements; | 
| 88 |  |     // Otherwise, increment capacity by ceil(fpbits / bits) beyond that. | 
| 89 | 0 |     return max_elements + (fpbits - base_fpbits + bits - 1) / bits; | 
| 90 | 0 | } | 
| 91 |  |  | 
| 92 | 0 | size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) { | 
| 93 | 0 |     if (bits == 0) return 0; | 
| 94 | 0 |     if (capacity > 0xffffffff) return capacity; | 
| 95 |  |     // Start with max_elements=capacity, and decrease max_elements until the corresponding capacity is capacity. | 
| 96 | 0 |     size_t max_elements = capacity; | 
| 97 | 0 |     while (true) { | 
| 98 | 0 |         size_t capacity_for_max_elements = ComputeCapacity(bits, max_elements, fpbits); | 
| 99 | 0 |         CHECK_SAFE(capacity_for_max_elements >= capacity); | 
| 100 | 0 |         if (capacity_for_max_elements <= capacity) return max_elements; | 
| 101 | 0 |         size_t adjust = capacity_for_max_elements - capacity; | 
| 102 |  |         // Decrementing max_elements by N will at most decrement the corresponding capacity by N. | 
| 103 |  |         // As the observed capacity is adjust too high, we can safely decrease max_elements by adjust. | 
| 104 |  |         // If that brings us into negative max_elements territory, no solution exists and we return 0. | 
| 105 | 0 |         if (max_elements < adjust) return 0; | 
| 106 | 0 |         max_elements -= adjust; | 
| 107 | 0 |     } | 
| 108 | 0 | } | 
| 109 |  |  | 
| 110 |  | }  // namespace | 
| 111 |  |  | 
| 112 |  | #endif |