/root/bitcoin/src/bech32.cpp
| Line | Count | Source | 
| 1 |  | // Copyright (c) 2017, 2021 Pieter Wuille | 
| 2 |  | // Copyright (c) 2021-present The Bitcoin Core developers | 
| 3 |  | // Distributed under the MIT software license, see the accompanying | 
| 4 |  | // file COPYING or http://www.opensource.org/licenses/mit-license.php. | 
| 5 |  |  | 
| 6 |  | #include <bech32.h> | 
| 7 |  | #include <util/vector.h> | 
| 8 |  |  | 
| 9 |  | #include <array> | 
| 10 |  | #include <cassert> | 
| 11 |  | #include <numeric> | 
| 12 |  | #include <optional> | 
| 13 |  |  | 
| 14 |  | namespace bech32 | 
| 15 |  | { | 
| 16 |  |  | 
| 17 |  | namespace | 
| 18 |  | { | 
| 19 |  |  | 
| 20 |  | typedef std::vector<uint8_t> data; | 
| 21 |  |  | 
| 22 |  | /** The Bech32 and Bech32m character set for encoding. */ | 
| 23 |  | const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; | 
| 24 |  |  | 
| 25 |  | /** The Bech32 and Bech32m character set for decoding. */ | 
| 26 |  | const int8_t CHARSET_REV[128] = { | 
| 27 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
| 28 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
| 29 |  |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
| 30 |  |     15, -1, 10, 17, 21, 20, 26, 30,  7,  5, -1, -1, -1, -1, -1, -1, | 
| 31 |  |     -1, 29, -1, 24, 13, 25,  9,  8, 23, -1, 18, 22, 31, 27, 19, -1, | 
| 32 |  |      1,  0,  3, 16, 11, 28, 12, 14,  6,  4,  2, -1, -1, -1, -1, -1, | 
| 33 |  |     -1, 29, -1, 24, 13, 25,  9,  8, 23, -1, 18, 22, 31, 27, 19, -1, | 
| 34 |  |      1,  0,  3, 16, 11, 28, 12, 14,  6,  4,  2, -1, -1, -1, -1, -1 | 
| 35 |  | }; | 
| 36 |  |  | 
| 37 |  | /** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32) | 
| 38 |  |  * The defining polynomial of the extension is x^2 + 9x + 23. | 
| 39 |  |  * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024), | 
| 40 |  |  * that is, a generator of the field. Every non-zero element of the field can then be represented | 
| 41 |  |  * as (e)^k for some power k. | 
| 42 |  |  * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024). | 
| 43 |  |  * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so | 
| 44 |  |  * GF1024_LOG[GF1024_EXP[k]] == k. | 
| 45 |  |  * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */ | 
| 46 |  | constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables() | 
| 47 | 0 | { | 
| 48 | 0 |     // Build table for GF(32). | 
| 49 | 0 |     // We use these tables to perform arithmetic in GF(32) below, when constructing the | 
| 50 | 0 |     // tables for GF(1024). | 
| 51 | 0 |     std::array<int8_t, 31> GF32_EXP{}; | 
| 52 | 0 |     std::array<int8_t, 32> GF32_LOG{}; | 
| 53 | 0 | 
 | 
| 54 | 0 |     // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1. | 
| 55 | 0 |     // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001. | 
| 56 | 0 |     const int fmod = 41; | 
| 57 | 0 | 
 | 
| 58 | 0 |     // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is, | 
| 59 | 0 |     // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial | 
| 60 | 0 |     // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod). | 
| 61 | 0 |     // For example, 00001 = 1 is the multiplicative identity. | 
| 62 | 0 |     GF32_EXP[0] = 1; | 
| 63 | 0 |     GF32_LOG[0] = -1; | 
| 64 | 0 |     GF32_LOG[1] = 0; | 
| 65 | 0 |     int v = 1; | 
| 66 | 0 |     for (int i = 1; i < 31; ++i) { | 
| 67 | 0 |         // Multiplication by x is the same as shifting left by 1, as | 
| 68 | 0 |         // every coefficient of the polynomial is moved up one place. | 
| 69 | 0 |         v = v << 1; | 
| 70 | 0 |         // If the polynomial now has an x^5 term, we subtract fmod from it | 
| 71 | 0 |         // to remain working modulo fmod. Subtraction is the same as XOR in characteristic | 
| 72 | 0 |         // 2 fields. | 
| 73 | 0 |         if (v & 32) v ^= fmod; | 
| 74 | 0 |         GF32_EXP[i] = v; | 
| 75 | 0 |         GF32_LOG[v] = i; | 
| 76 | 0 |     } | 
| 77 | 0 | 
 | 
| 78 | 0 |     // Build table for GF(1024) | 
| 79 | 0 |     std::array<int16_t, 1023> GF1024_EXP{}; | 
| 80 | 0 |     std::array<int16_t, 1024> GF1024_LOG{}; | 
| 81 | 0 | 
 | 
| 82 | 0 |     GF1024_EXP[0] = 1; | 
| 83 | 0 |     GF1024_LOG[0] = -1; | 
| 84 | 0 |     GF1024_LOG[1] = 0; | 
| 85 | 0 | 
 | 
| 86 | 0 |     // Each element v of GF(1024) is encoded as a 10 bit integer in the following way: | 
| 87 | 0 |     // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)). | 
| 88 | 0 |     // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element | 
| 89 | 0 |     // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v), | 
| 90 | 0 |     // we compute (e)*(v) by multiplying in the following way: | 
| 91 | 0 |     // | 
| 92 | 0 |     // v0' = 23*v1 | 
| 93 | 0 |     // v1' = 9*v1 + v0 | 
| 94 | 0 |     // e*v = v1' || v0' | 
| 95 | 0 |     // | 
| 96 | 0 |     // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32) | 
| 97 | 0 |     // is done using the log/exp tables: | 
| 98 | 0 |     // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ] | 
| 99 | 0 |     // for non-zero a and b. | 
| 100 | 0 | 
 | 
| 101 | 0 |     v = 1; | 
| 102 | 0 |     for (int i = 1; i < 1023; ++i) { | 
| 103 | 0 |         int v0 = v & 31; | 
| 104 | 0 |         int v1 = v >> 5; | 
| 105 | 0 | 
 | 
| 106 | 0 |         int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0; | 
| 107 | 0 |         int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0; | 
| 108 | 0 | 
 | 
| 109 | 0 |         v = v1n << 5 | v0n; | 
| 110 | 0 |         GF1024_EXP[i] = v; | 
| 111 | 0 |         GF1024_LOG[v] = i; | 
| 112 | 0 |     } | 
| 113 | 0 | 
 | 
| 114 | 0 |     return std::make_pair(GF1024_EXP, GF1024_LOG); | 
| 115 | 0 | } | 
| 116 |  |  | 
| 117 |  | constexpr auto tables = GenerateGFTables(); | 
| 118 |  | constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first; | 
| 119 |  | constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second; | 
| 120 |  |  | 
| 121 |  | /* Determine the final constant to use for the specified encoding. */ | 
| 122 | 0 | uint32_t EncodingConstant(Encoding encoding) { | 
| 123 | 0 |     assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M); | 
| 124 | 0 |     return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3; | 
| 125 | 0 | } | 
| 126 |  |  | 
| 127 |  | /** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to | 
| 128 |  |  *  make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher | 
| 129 |  |  *  bits correspond to earlier values. */ | 
| 130 |  | uint32_t PolyMod(const data& v) | 
| 131 | 0 | { | 
| 132 |  |     // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an | 
| 133 |  |     // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) = | 
| 134 |  |     // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that | 
| 135 |  |     // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...]. | 
| 136 |  |  | 
| 137 |  |     // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of | 
| 138 |  |     // v(x) mod g(x), where g(x) is the Bech32 generator, | 
| 139 |  |     // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way | 
| 140 |  |     // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a | 
| 141 |  |     // window of 1023 characters. Among the various possible BCH codes, one was selected to in | 
| 142 |  |     // fact guarantee detection of up to 4 errors within a window of 89 characters. | 
| 143 |  |  | 
| 144 |  |     // Note that the coefficients are elements of GF(32), here represented as decimal numbers | 
| 145 |  |     // between {}. In this finite field, addition is just XOR of the corresponding numbers. For | 
| 146 |  |     // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires | 
| 147 |  |     // treating the bits of values themselves as coefficients of a polynomial over a smaller field, | 
| 148 |  |     // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} = | 
| 149 |  |     // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a | 
| 150 |  |     // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}. | 
| 151 |  |  | 
| 152 |  |     // During the course of the loop below, `c` contains the bitpacked coefficients of the | 
| 153 |  |     // polynomial constructed from just the values of v that were processed so far, mod g(x). In | 
| 154 |  |     // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of | 
| 155 |  |     // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value | 
| 156 |  |     // for `c`. | 
| 157 |  |  | 
| 158 |  |     // The following Sage code constructs the generator used: | 
| 159 |  |     // | 
| 160 |  |     // B = GF(2) # Binary field | 
| 161 |  |     // BP.<b> = B[] # Polynomials over the binary field | 
| 162 |  |     // F_mod = b**5 + b**3 + 1 | 
| 163 |  |     // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition | 
| 164 |  |     // FP.<x> = F[] # Polynomials over GF(32) | 
| 165 |  |     // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23) | 
| 166 |  |     // E.<e> = F.extension(E_mod) # GF(1024) extension field definition | 
| 167 |  |     // for p in divisors(E.order() - 1): # Verify e has order 1023. | 
| 168 |  |     //    assert((e**p == 1) == (p % 1023 == 0)) | 
| 169 |  |     // G = lcm([(e**i).minpoly() for i in range(997,1000)]) | 
| 170 |  |     // print(G) # Print out the generator | 
| 171 |  |     // | 
| 172 |  |     // It demonstrates that g(x) is the least common multiple of the minimal polynomials | 
| 173 |  |     // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024). | 
| 174 |  |     // That guarantees it is, in fact, the generator of a primitive BCH code with cycle | 
| 175 |  |     // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details. | 
| 176 |  | 
 | 
| 177 | 0 |     uint32_t c = 1; | 
| 178 | 0 |     for (const auto v_i : v) { | 
| 179 |  |         // We want to update `c` to correspond to a polynomial with one extra term. If the initial | 
| 180 |  |         // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to | 
| 181 |  |         // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to | 
| 182 |  |         // process. Simplifying: | 
| 183 |  |         // c'(x) = (f(x) * x + v_i) mod g(x) | 
| 184 |  |         //         ((f(x) mod g(x)) * x + v_i) mod g(x) | 
| 185 |  |         //         (c(x) * x + v_i) mod g(x) | 
| 186 |  |         // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute | 
| 187 |  |         // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x) | 
| 188 |  |         //       = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x) | 
| 189 |  |         //       = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i | 
| 190 |  |         // If we call (x^6 mod g(x)) = k(x), this can be written as | 
| 191 |  |         // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x) | 
| 192 |  |  | 
| 193 |  |         // First, determine the value of c0: | 
| 194 | 0 |         uint8_t c0 = c >> 25; | 
| 195 |  |  | 
| 196 |  |         // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i: | 
| 197 | 0 |         c = ((c & 0x1ffffff) << 5) ^ v_i; | 
| 198 |  |  | 
| 199 |  |         // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be | 
| 200 |  |         // computed using the following Sage code (continuing the code above): | 
| 201 |  |         // | 
| 202 |  |         // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers. | 
| 203 |  |         //     v = 0 | 
| 204 |  |         //     for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)): | 
| 205 |  |         //         v = v*32 + coef.integer_representation() | 
| 206 |  |         //     print("0x%x" % v) | 
| 207 |  |         // | 
| 208 | 0 |         if (c0 & 1)  c ^= 0x3b6a57b2; //     k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18} | 
| 209 | 0 |         if (c0 & 2)  c ^= 0x26508e6d; //  {2}k(x) = {19}x^5 +  {5}x^4 +     x^3 +  {3}x^2 + {19}x + {13} | 
| 210 | 0 |         if (c0 & 4)  c ^= 0x1ea119fa; //  {4}k(x) = {15}x^5 + {10}x^4 +  {2}x^3 +  {6}x^2 + {15}x + {26} | 
| 211 | 0 |         if (c0 & 8)  c ^= 0x3d4233dd; //  {8}k(x) = {30}x^5 + {20}x^4 +  {4}x^3 + {12}x^2 + {30}x + {29} | 
| 212 | 0 |         if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 +     x^4 +  {8}x^3 + {24}x^2 + {21}x + {19} | 
| 213 |  | 
 | 
| 214 | 0 |     } | 
| 215 | 0 |     return c; | 
| 216 | 0 | } | 
| 217 |  |  | 
| 218 |  | /** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the | 
| 219 |  |  * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999. | 
| 220 |  |  * | 
| 221 |  |  * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed | 
| 222 |  |  * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid | 
| 223 |  |  * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all | 
| 224 |  |  * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j). | 
| 225 |  |  * | 
| 226 |  |  * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6 | 
| 227 |  |  * | 
| 228 |  |  * To compute R((e)^j), we are really computing: | 
| 229 |  |  * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6 | 
| 230 |  |  * | 
| 231 |  |  * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed. | 
| 232 |  |  * But even more than that, we can consider each coefficient as a bit-string. | 
| 233 |  |  * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then: | 
| 234 |  |  * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j) | 
| 235 |  |  * where all the (2^i*(e)^j) are constants and can be precomputed. | 
| 236 |  |  * | 
| 237 |  |  * Then we just add each of these corresponding constants to our final value based on the | 
| 238 |  |  * bit values b_i. This is exactly what is done in the Syndrome function below. | 
| 239 |  |  */ | 
| 240 | 0 | constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() { | 
| 241 | 0 |     std::array<uint32_t, 25> SYNDROME_CONSTS{}; | 
| 242 | 0 |     for (int k = 1; k < 6; ++k) { | 
| 243 | 0 |         for (int shift = 0; shift < 5; ++shift) { | 
| 244 | 0 |             int16_t b = GF1024_LOG.at(size_t{1} << shift); | 
| 245 | 0 |             int16_t c0 = GF1024_EXP.at((997*k + b) % 1023); | 
| 246 | 0 |             int16_t c1 = GF1024_EXP.at((998*k + b) % 1023); | 
| 247 | 0 |             int16_t c2 = GF1024_EXP.at((999*k + b) % 1023); | 
| 248 | 0 |             uint32_t c = c2 << 20 | c1 << 10 | c0; | 
| 249 | 0 |             int ind = 5*(k-1) + shift; | 
| 250 | 0 |             SYNDROME_CONSTS[ind] = c; | 
| 251 | 0 |         } | 
| 252 | 0 |     } | 
| 253 | 0 |     return SYNDROME_CONSTS; | 
| 254 | 0 | } | 
| 255 |  | constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants(); | 
| 256 |  |  | 
| 257 |  | /** | 
| 258 |  |  * Syndrome returns the three values s_997, s_998, and s_999 described above, | 
| 259 |  |  * packed into a 30-bit integer, where each group of 10 bits encodes one value. | 
| 260 |  |  */ | 
| 261 | 0 | uint32_t Syndrome(const uint32_t residue) { | 
| 262 |  |     // low is the first 5 bits, corresponding to the r6 in the residue | 
| 263 |  |     // (the constant term of the polynomial). | 
| 264 | 0 |     uint32_t low = residue & 0x1f; | 
| 265 |  |  | 
| 266 |  |     // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional. | 
| 267 | 0 |     uint32_t result = low ^ (low << 10) ^ (low << 20); | 
| 268 |  |  | 
| 269 |  |     // Then for each following bit, we add the corresponding precomputed constant if the bit is 1. | 
| 270 |  |     // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10 | 
| 271 |  |     // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in | 
| 272 |  |     // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999) | 
| 273 |  |     // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field. | 
| 274 | 0 |     for (int i = 0; i < 25; ++i) { | 
| 275 | 0 |         result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0); | 
| 276 | 0 |     } | 
| 277 | 0 |     return result; | 
| 278 | 0 | } | 
| 279 |  |  | 
| 280 |  | /** Convert to lower case. */ | 
| 281 |  | inline unsigned char LowerCase(unsigned char c) | 
| 282 | 0 | { | 
| 283 | 0 |     return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c; | 
| 284 | 0 | } | 
| 285 |  |  | 
| 286 |  | /** Return indices of invalid characters in a Bech32 string. */ | 
| 287 |  | bool CheckCharacters(const std::string& str, std::vector<int>& errors) | 
| 288 | 0 | { | 
| 289 | 0 |     bool lower = false, upper = false; | 
| 290 | 0 |     for (size_t i = 0; i < str.size(); ++i) { | 
| 291 | 0 |         unsigned char c{(unsigned char)(str[i])}; | 
| 292 | 0 |         if (c >= 'a' && c <= 'z') { | 
| 293 | 0 |             if (upper) { | 
| 294 | 0 |                 errors.push_back(i); | 
| 295 | 0 |             } else { | 
| 296 | 0 |                 lower = true; | 
| 297 | 0 |             } | 
| 298 | 0 |         } else if (c >= 'A' && c <= 'Z') { | 
| 299 | 0 |             if (lower) { | 
| 300 | 0 |                 errors.push_back(i); | 
| 301 | 0 |             } else { | 
| 302 | 0 |                 upper = true; | 
| 303 | 0 |             } | 
| 304 | 0 |         } else if (c < 33 || c > 126) { | 
| 305 | 0 |             errors.push_back(i); | 
| 306 | 0 |         } | 
| 307 | 0 |     } | 
| 308 | 0 |     return errors.empty(); | 
| 309 | 0 | } | 
| 310 |  |  | 
| 311 |  | std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values) | 
| 312 | 0 | { | 
| 313 | 0 |     data ret; | 
| 314 | 0 |     ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE); | 
| 315 |  |  | 
| 316 |  |     /** Expand a HRP for use in checksum computation. */ | 
| 317 | 0 |     for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5); | 
| 318 | 0 |     ret.push_back(0); | 
| 319 | 0 |     for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f); | 
| 320 |  | 
 | 
| 321 | 0 |     ret.insert(ret.end(), values.begin(), values.end()); | 
| 322 |  | 
 | 
| 323 | 0 |     return ret; | 
| 324 | 0 | } | 
| 325 |  |  | 
| 326 |  | /** Verify a checksum. */ | 
| 327 |  | Encoding VerifyChecksum(const std::string& hrp, const data& values) | 
| 328 | 0 | { | 
| 329 |  |     // PolyMod computes what value to xor into the final values to make the checksum 0. However, | 
| 330 |  |     // if we required that the checksum was 0, it would be the case that appending a 0 to a valid | 
| 331 |  |     // list of values would result in a new valid list. For that reason, Bech32 requires the | 
| 332 |  |     // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See | 
| 333 |  |     // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details. | 
| 334 | 0 |     auto enc = PreparePolynomialCoefficients(hrp, values); | 
| 335 | 0 |     const uint32_t check = PolyMod(enc); | 
| 336 | 0 |     if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32; | 
| 337 | 0 |     if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M; | 
| 338 | 0 |     return Encoding::INVALID; | 
| 339 | 0 | } | 
| 340 |  |  | 
| 341 |  | /** Create a checksum. */ | 
| 342 |  | data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values) | 
| 343 | 0 | { | 
| 344 | 0 |     auto enc = PreparePolynomialCoefficients(hrp, values); | 
| 345 | 0 |     enc.insert(enc.end(), CHECKSUM_SIZE, 0x00); | 
| 346 | 0 |     uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes. | 
| 347 | 0 |     data ret(CHECKSUM_SIZE); | 
| 348 | 0 |     for (size_t i = 0; i < CHECKSUM_SIZE; ++i) { | 
| 349 |  |         // Convert the 5-bit groups in mod to checksum values. | 
| 350 | 0 |         ret[i] = (mod >> (5 * (5 - i))) & 31; | 
| 351 | 0 |     } | 
| 352 | 0 |     return ret; | 
| 353 | 0 | } | 
| 354 |  |  | 
| 355 |  | } // namespace | 
| 356 |  |  | 
| 357 |  | /** Encode a Bech32 or Bech32m string. */ | 
| 358 | 0 | std::string Encode(Encoding encoding, const std::string& hrp, const data& values) { | 
| 359 |  |     // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder | 
| 360 |  |     // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the | 
| 361 |  |     // result will always be invalid. | 
| 362 | 0 |     for (const char& c : hrp) assert(c < 'A' || c > 'Z'); | 
| 363 |  |  | 
| 364 | 0 |     std::string ret; | 
| 365 | 0 |     ret.reserve(hrp.size() + 1 + values.size() + CHECKSUM_SIZE); | 
| 366 | 0 |     ret += hrp; | 
| 367 | 0 |     ret += SEPARATOR; | 
| 368 | 0 |     for (const uint8_t& i : values) ret += CHARSET[i]; | 
| 369 | 0 |     for (const uint8_t& i : CreateChecksum(encoding, hrp, values)) ret += CHARSET[i]; | 
| 370 | 0 |     return ret; | 
| 371 | 0 | } | 
| 372 |  |  | 
| 373 |  | /** Decode a Bech32 or Bech32m string. */ | 
| 374 | 0 | DecodeResult Decode(const std::string& str, CharLimit limit) { | 
| 375 | 0 |     std::vector<int> errors; | 
| 376 | 0 |     if (!CheckCharacters(str, errors)) return {}; | 
| 377 | 0 |     size_t pos = str.rfind(SEPARATOR); | 
| 378 | 0 |     if (str.size() > limit) return {}; | 
| 379 | 0 |     if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) { | 
| 380 | 0 |         return {}; | 
| 381 | 0 |     } | 
| 382 | 0 |     data values(str.size() - 1 - pos); | 
| 383 | 0 |     for (size_t i = 0; i < str.size() - 1 - pos; ++i) { | 
| 384 | 0 |         unsigned char c = str[i + pos + 1]; | 
| 385 | 0 |         int8_t rev = CHARSET_REV[c]; | 
| 386 |  | 
 | 
| 387 | 0 |         if (rev == -1) { | 
| 388 | 0 |             return {}; | 
| 389 | 0 |         } | 
| 390 | 0 |         values[i] = rev; | 
| 391 | 0 |     } | 
| 392 | 0 |     std::string hrp; | 
| 393 | 0 |     hrp.reserve(pos); | 
| 394 | 0 |     for (size_t i = 0; i < pos; ++i) { | 
| 395 | 0 |         hrp += LowerCase(str[i]); | 
| 396 | 0 |     } | 
| 397 | 0 |     Encoding result = VerifyChecksum(hrp, values); | 
| 398 | 0 |     if (result == Encoding::INVALID) return {}; | 
| 399 | 0 |     return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)}; | 
| 400 | 0 | } | 
| 401 |  |  | 
| 402 |  | /** Find index of an incorrect character in a Bech32 string. */ | 
| 403 | 0 | std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) { | 
| 404 | 0 |     std::vector<int> error_locations{}; | 
| 405 |  | 
 | 
| 406 | 0 |     if (str.size() > limit) { | 
| 407 | 0 |         error_locations.resize(str.size() - limit); | 
| 408 | 0 |         std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit)); | 
| 409 | 0 |         return std::make_pair("Bech32 string too long", std::move(error_locations)); | 
| 410 | 0 |     } | 
| 411 |  |  | 
| 412 | 0 |     if (!CheckCharacters(str, error_locations)){ | 
| 413 | 0 |         return std::make_pair("Invalid character or mixed case", std::move(error_locations)); | 
| 414 | 0 |     } | 
| 415 |  |  | 
| 416 | 0 |     size_t pos = str.rfind(SEPARATOR); | 
| 417 | 0 |     if (pos == str.npos) { | 
| 418 | 0 |         return std::make_pair("Missing separator", std::vector<int>{}); | 
| 419 | 0 |     } | 
| 420 | 0 |     if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) { | 
| 421 | 0 |         error_locations.push_back(pos); | 
| 422 | 0 |         return std::make_pair("Invalid separator position", std::move(error_locations)); | 
| 423 | 0 |     } | 
| 424 |  |  | 
| 425 | 0 |     std::string hrp; | 
| 426 | 0 |     hrp.reserve(pos); | 
| 427 | 0 |     for (size_t i = 0; i < pos; ++i) { | 
| 428 | 0 |         hrp += LowerCase(str[i]); | 
| 429 | 0 |     } | 
| 430 |  | 
 | 
| 431 | 0 |     size_t length = str.size() - 1 - pos; // length of data part | 
| 432 | 0 |     data values(length); | 
| 433 | 0 |     for (size_t i = pos + 1; i < str.size(); ++i) { | 
| 434 | 0 |         unsigned char c = str[i]; | 
| 435 | 0 |         int8_t rev = CHARSET_REV[c]; | 
| 436 | 0 |         if (rev == -1) { | 
| 437 | 0 |             error_locations.push_back(i); | 
| 438 | 0 |             return std::make_pair("Invalid Base 32 character", std::move(error_locations)); | 
| 439 | 0 |         } | 
| 440 | 0 |         values[i - pos - 1] = rev; | 
| 441 | 0 |     } | 
| 442 |  |  | 
| 443 |  |     // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors | 
| 444 |  |     // We can't simply use the segwit version, because that may be one of the errors | 
| 445 | 0 |     std::optional<Encoding> error_encoding; | 
| 446 | 0 |     for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) { | 
| 447 | 0 |         std::vector<int> possible_errors; | 
| 448 |  |         // Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial | 
| 449 |  |         // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x). | 
| 450 | 0 |         auto enc = PreparePolynomialCoefficients(hrp, values); | 
| 451 | 0 |         uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding); | 
| 452 |  |  | 
| 453 |  |         // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding | 
| 454 |  |         // constant) should be 0 - hence 0 indicates there are no errors present. | 
| 455 | 0 |         if (residue != 0) { | 
| 456 |  |             // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid | 
| 457 |  |             // codeword (a multiple of G(x)), and E encodes the errors. | 
| 458 | 0 |             uint32_t syn = Syndrome(residue); | 
| 459 |  |  | 
| 460 |  |             // Unpack the three 10-bit syndrome values | 
| 461 | 0 |             int s0 = syn & 0x3FF; | 
| 462 | 0 |             int s1 = (syn >> 10) & 0x3FF; | 
| 463 | 0 |             int s2 = syn >> 20; | 
| 464 |  |  | 
| 465 |  |             // Get the discrete logs of these values in GF1024 for more efficient computation | 
| 466 | 0 |             int l_s0 = GF1024_LOG.at(s0); | 
| 467 | 0 |             int l_s1 = GF1024_LOG.at(s1); | 
| 468 | 0 |             int l_s2 = GF1024_LOG.at(s2); | 
| 469 |  |  | 
| 470 |  |             // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1 | 
| 471 |  |             // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1) | 
| 472 |  |             // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too. | 
| 473 |  |             // Hence, s1^2 == s0*s2, which is exactly the condition we check first: | 
| 474 | 0 |             if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) { | 
| 475 |  |                 // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023) | 
| 476 | 0 |                 size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive | 
| 477 |  |                 // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1, | 
| 478 |  |                 // so 1/((e)^997) = (e)^(1023-997). | 
| 479 | 0 |                 int l_e1 = l_s0 + (1023 - 997) * p1; | 
| 480 |  |                 // Finally, some sanity checks on the result: | 
| 481 |  |                 // - The error position should be within the length of the data | 
| 482 |  |                 // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements | 
| 483 |  |                 // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)). | 
| 484 | 0 |                 if (p1 < length && !(l_e1 % 33)) { | 
| 485 |  |                     // Polynomials run from highest power to lowest, so the index p1 is from the right. | 
| 486 |  |                     // We don't return e1 because it is dangerous to suggest corrections to the user, | 
| 487 |  |                     // the user should check the address themselves. | 
| 488 | 0 |                     possible_errors.push_back(str.size() - p1 - 1); | 
| 489 | 0 |                 } | 
| 490 |  |             // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2. | 
| 491 | 0 |             } else { | 
| 492 |  |                 // For all possible first error positions p1 | 
| 493 | 0 |                 for (size_t p1 = 0; p1 < length; ++p1) { | 
| 494 |  |                     // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so | 
| 495 |  |                     // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2. | 
| 496 |  |                     // | 
| 497 |  |                     // Consider s2 + s1*(e)^p1 | 
| 498 |  |                     //          = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 | 
| 499 |  |                     //          = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 | 
| 500 |  |                     //    (Because we are working in characteristic 2.) | 
| 501 |  |                     //          = e2*(e)^(998*p2) ((e)^p2 + (e)^p1) | 
| 502 |  |                     // | 
| 503 | 0 |                     int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023)); | 
| 504 | 0 |                     if (s2_s1p1 == 0) continue; | 
| 505 | 0 |                     int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1); | 
| 506 |  |  | 
| 507 |  |                     // Similarly, s1 + s0*(e)^p1 | 
| 508 |  |                     //          = e2*(e)^(997*p2) ((e)^p2 + (e)^p1) | 
| 509 | 0 |                     int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023)); | 
| 510 | 0 |                     if (s1_s0p1 == 0) continue; | 
| 511 | 0 |                     int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1); | 
| 512 |  |  | 
| 513 |  |                     // So, putting these together, we can compute the second error position as | 
| 514 |  |                     // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1) | 
| 515 |  |                     // p2 = log((e)^p2) | 
| 516 | 0 |                     size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023; | 
| 517 |  |  | 
| 518 |  |                     // Sanity checks that p2 is a valid position and not the same as p1 | 
| 519 | 0 |                     if (p2 >= length || p1 == p2) continue; | 
| 520 |  |  | 
| 521 |  |                     // Now we want to compute the error values e1 and e2. | 
| 522 |  |                     // Similar to above, we compute s1 + s0*(e)^p2 | 
| 523 |  |                     //          = e1*(e)^(997*p1) ((e)^p1 + (e)^p2) | 
| 524 | 0 |                     int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023)); | 
| 525 | 0 |                     if (s1_s0p2 == 0) continue; | 
| 526 | 0 |                     int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2); | 
| 527 |  |  | 
| 528 |  |                     // And compute (the log of) 1/((e)^p1 + (e)^p2)) | 
| 529 | 0 |                     int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2)); | 
| 530 |  |  | 
| 531 |  |                     // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2))) | 
| 532 |  |                     //         = e2*(e)^(997*p2) | 
| 533 |  |                     // Then recover e2 by dividing by (e)^(997*p2) | 
| 534 | 0 |                     int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2; | 
| 535 |  |                     // Check that e2 is in GF(32) | 
| 536 | 0 |                     if (l_e2 % 33) continue; | 
| 537 |  |  | 
| 538 |  |                     // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2))) | 
| 539 |  |                     //         = e1*(e)^(997*p1) | 
| 540 |  |                     // So recover e1 by dividing by (e)^(997*p1) | 
| 541 | 0 |                     int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1; | 
| 542 |  |                     // Check that e1 is in GF(32) | 
| 543 | 0 |                     if (l_e1 % 33) continue; | 
| 544 |  |  | 
| 545 |  |                     // Again, we do not return e1 or e2 for safety. | 
| 546 |  |                     // Order the error positions from the left of the string and return them | 
| 547 | 0 |                     if (p1 > p2) { | 
| 548 | 0 |                         possible_errors.push_back(str.size() - p1 - 1); | 
| 549 | 0 |                         possible_errors.push_back(str.size() - p2 - 1); | 
| 550 | 0 |                     } else { | 
| 551 | 0 |                         possible_errors.push_back(str.size() - p2 - 1); | 
| 552 | 0 |                         possible_errors.push_back(str.size() - p1 - 1); | 
| 553 | 0 |                     } | 
| 554 | 0 |                     break; | 
| 555 | 0 |                 } | 
| 556 | 0 |             } | 
| 557 | 0 |         } else { | 
| 558 |  |             // No errors | 
| 559 | 0 |             return std::make_pair("", std::vector<int>{}); | 
| 560 | 0 |         } | 
| 561 |  |  | 
| 562 | 0 |         if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) { | 
| 563 | 0 |             error_locations = std::move(possible_errors); | 
| 564 | 0 |             if (!error_locations.empty()) error_encoding = encoding; | 
| 565 | 0 |         } | 
| 566 | 0 |     } | 
| 567 | 0 |     std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum" | 
| 568 | 0 |                               : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum" | 
| 569 | 0 |                               : "Invalid checksum"; | 
| 570 |  | 
 | 
| 571 | 0 |     return std::make_pair(error_message, std::move(error_locations)); | 
| 572 | 0 | } | 
| 573 |  |  | 
| 574 |  | } // namespace bech32 |