/root/bitcoin/src/bech32.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2017, 2021 Pieter Wuille |
2 | | // Copyright (c) 2021-2022 The Bitcoin Core developers |
3 | | // Distributed under the MIT software license, see the accompanying |
4 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
5 | | |
6 | | #include <bech32.h> |
7 | | #include <util/vector.h> |
8 | | |
9 | | #include <array> |
10 | | #include <assert.h> |
11 | | #include <numeric> |
12 | | #include <optional> |
13 | | |
14 | | namespace bech32 |
15 | | { |
16 | | |
17 | | namespace |
18 | | { |
19 | | |
20 | | typedef std::vector<uint8_t> data; |
21 | | |
22 | | /** The Bech32 and Bech32m character set for encoding. */ |
23 | | const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; |
24 | | |
25 | | /** The Bech32 and Bech32m character set for decoding. */ |
26 | | const int8_t CHARSET_REV[128] = { |
27 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
28 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
29 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
30 | | 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1, |
31 | | -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, |
32 | | 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1, |
33 | | -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1, |
34 | | 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1 |
35 | | }; |
36 | | |
37 | | /** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32) |
38 | | * The defining polynomial of the extension is x^2 + 9x + 23. |
39 | | * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024), |
40 | | * that is, a generator of the field. Every non-zero element of the field can then be represented |
41 | | * as (e)^k for some power k. |
42 | | * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024). |
43 | | * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so |
44 | | * GF1024_LOG[GF1024_EXP[k]] == k. |
45 | | * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */ |
46 | | constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables() |
47 | 0 | { |
48 | 0 | // Build table for GF(32). |
49 | 0 | // We use these tables to perform arithmetic in GF(32) below, when constructing the |
50 | 0 | // tables for GF(1024). |
51 | 0 | std::array<int8_t, 31> GF32_EXP{}; |
52 | 0 | std::array<int8_t, 32> GF32_LOG{}; |
53 | 0 |
|
54 | 0 | // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1. |
55 | 0 | // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001. |
56 | 0 | const int fmod = 41; |
57 | 0 |
|
58 | 0 | // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is, |
59 | 0 | // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial |
60 | 0 | // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod). |
61 | 0 | // For example, 00001 = 1 is the multiplicative identity. |
62 | 0 | GF32_EXP[0] = 1; |
63 | 0 | GF32_LOG[0] = -1; |
64 | 0 | GF32_LOG[1] = 0; |
65 | 0 | int v = 1; |
66 | 0 | for (int i = 1; i < 31; ++i) { |
67 | 0 | // Multiplication by x is the same as shifting left by 1, as |
68 | 0 | // every coefficient of the polynomial is moved up one place. |
69 | 0 | v = v << 1; |
70 | 0 | // If the polynomial now has an x^5 term, we subtract fmod from it |
71 | 0 | // to remain working modulo fmod. Subtraction is the same as XOR in characteristic |
72 | 0 | // 2 fields. |
73 | 0 | if (v & 32) v ^= fmod; |
74 | 0 | GF32_EXP[i] = v; |
75 | 0 | GF32_LOG[v] = i; |
76 | 0 | } |
77 | 0 |
|
78 | 0 | // Build table for GF(1024) |
79 | 0 | std::array<int16_t, 1023> GF1024_EXP{}; |
80 | 0 | std::array<int16_t, 1024> GF1024_LOG{}; |
81 | 0 |
|
82 | 0 | GF1024_EXP[0] = 1; |
83 | 0 | GF1024_LOG[0] = -1; |
84 | 0 | GF1024_LOG[1] = 0; |
85 | 0 |
|
86 | 0 | // Each element v of GF(1024) is encoded as a 10 bit integer in the following way: |
87 | 0 | // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)). |
88 | 0 | // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element |
89 | 0 | // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v), |
90 | 0 | // we compute (e)*(v) by multiplying in the following way: |
91 | 0 | // |
92 | 0 | // v0' = 23*v1 |
93 | 0 | // v1' = 9*v1 + v0 |
94 | 0 | // e*v = v1' || v0' |
95 | 0 | // |
96 | 0 | // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32) |
97 | 0 | // is done using the log/exp tables: |
98 | 0 | // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ] |
99 | 0 | // for non-zero a and b. |
100 | 0 |
|
101 | 0 | v = 1; |
102 | 0 | for (int i = 1; i < 1023; ++i) { |
103 | 0 | int v0 = v & 31; |
104 | 0 | int v1 = v >> 5; |
105 | 0 |
|
106 | 0 | int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0; |
107 | 0 | int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0; |
108 | 0 |
|
109 | 0 | v = v1n << 5 | v0n; |
110 | 0 | GF1024_EXP[i] = v; |
111 | 0 | GF1024_LOG[v] = i; |
112 | 0 | } |
113 | 0 |
|
114 | 0 | return std::make_pair(GF1024_EXP, GF1024_LOG); |
115 | 0 | } |
116 | | |
117 | | constexpr auto tables = GenerateGFTables(); |
118 | | constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first; |
119 | | constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second; |
120 | | |
121 | | /* Determine the final constant to use for the specified encoding. */ |
122 | 0 | uint32_t EncodingConstant(Encoding encoding) { |
123 | 0 | assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M); |
124 | 0 | return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3; |
125 | 0 | } |
126 | | |
127 | | /** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to |
128 | | * make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher |
129 | | * bits correspond to earlier values. */ |
130 | | uint32_t PolyMod(const data& v) |
131 | 0 | { |
132 | | // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an |
133 | | // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) = |
134 | | // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that |
135 | | // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...]. |
136 | | |
137 | | // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of |
138 | | // v(x) mod g(x), where g(x) is the Bech32 generator, |
139 | | // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way |
140 | | // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a |
141 | | // window of 1023 characters. Among the various possible BCH codes, one was selected to in |
142 | | // fact guarantee detection of up to 4 errors within a window of 89 characters. |
143 | | |
144 | | // Note that the coefficients are elements of GF(32), here represented as decimal numbers |
145 | | // between {}. In this finite field, addition is just XOR of the corresponding numbers. For |
146 | | // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires |
147 | | // treating the bits of values themselves as coefficients of a polynomial over a smaller field, |
148 | | // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} = |
149 | | // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a |
150 | | // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}. |
151 | | |
152 | | // During the course of the loop below, `c` contains the bitpacked coefficients of the |
153 | | // polynomial constructed from just the values of v that were processed so far, mod g(x). In |
154 | | // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of |
155 | | // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value |
156 | | // for `c`. |
157 | | |
158 | | // The following Sage code constructs the generator used: |
159 | | // |
160 | | // B = GF(2) # Binary field |
161 | | // BP.<b> = B[] # Polynomials over the binary field |
162 | | // F_mod = b**5 + b**3 + 1 |
163 | | // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition |
164 | | // FP.<x> = F[] # Polynomials over GF(32) |
165 | | // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23) |
166 | | // E.<e> = F.extension(E_mod) # GF(1024) extension field definition |
167 | | // for p in divisors(E.order() - 1): # Verify e has order 1023. |
168 | | // assert((e**p == 1) == (p % 1023 == 0)) |
169 | | // G = lcm([(e**i).minpoly() for i in range(997,1000)]) |
170 | | // print(G) # Print out the generator |
171 | | // |
172 | | // It demonstrates that g(x) is the least common multiple of the minimal polynomials |
173 | | // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024). |
174 | | // That guarantees it is, in fact, the generator of a primitive BCH code with cycle |
175 | | // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details. |
176 | |
|
177 | 0 | uint32_t c = 1; |
178 | 0 | for (const auto v_i : v) { |
179 | | // We want to update `c` to correspond to a polynomial with one extra term. If the initial |
180 | | // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to |
181 | | // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to |
182 | | // process. Simplifying: |
183 | | // c'(x) = (f(x) * x + v_i) mod g(x) |
184 | | // ((f(x) mod g(x)) * x + v_i) mod g(x) |
185 | | // (c(x) * x + v_i) mod g(x) |
186 | | // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute |
187 | | // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x) |
188 | | // = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x) |
189 | | // = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i |
190 | | // If we call (x^6 mod g(x)) = k(x), this can be written as |
191 | | // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x) |
192 | | |
193 | | // First, determine the value of c0: |
194 | 0 | uint8_t c0 = c >> 25; |
195 | | |
196 | | // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i: |
197 | 0 | c = ((c & 0x1ffffff) << 5) ^ v_i; |
198 | | |
199 | | // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be |
200 | | // computed using the following Sage code (continuing the code above): |
201 | | // |
202 | | // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers. |
203 | | // v = 0 |
204 | | // for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)): |
205 | | // v = v*32 + coef.integer_representation() |
206 | | // print("0x%x" % v) |
207 | | // |
208 | 0 | if (c0 & 1) c ^= 0x3b6a57b2; // k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18} |
209 | 0 | if (c0 & 2) c ^= 0x26508e6d; // {2}k(x) = {19}x^5 + {5}x^4 + x^3 + {3}x^2 + {19}x + {13} |
210 | 0 | if (c0 & 4) c ^= 0x1ea119fa; // {4}k(x) = {15}x^5 + {10}x^4 + {2}x^3 + {6}x^2 + {15}x + {26} |
211 | 0 | if (c0 & 8) c ^= 0x3d4233dd; // {8}k(x) = {30}x^5 + {20}x^4 + {4}x^3 + {12}x^2 + {30}x + {29} |
212 | 0 | if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 + x^4 + {8}x^3 + {24}x^2 + {21}x + {19} |
213 | |
|
214 | 0 | } |
215 | 0 | return c; |
216 | 0 | } |
217 | | |
218 | | /** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the |
219 | | * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999. |
220 | | * |
221 | | * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed |
222 | | * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid |
223 | | * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all |
224 | | * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j). |
225 | | * |
226 | | * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6 |
227 | | * |
228 | | * To compute R((e)^j), we are really computing: |
229 | | * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6 |
230 | | * |
231 | | * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed. |
232 | | * But even more than that, we can consider each coefficient as a bit-string. |
233 | | * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then: |
234 | | * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j) |
235 | | * where all the (2^i*(e)^j) are constants and can be precomputed. |
236 | | * |
237 | | * Then we just add each of these corresponding constants to our final value based on the |
238 | | * bit values b_i. This is exactly what is done in the Syndrome function below. |
239 | | */ |
240 | 0 | constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() { |
241 | 0 | std::array<uint32_t, 25> SYNDROME_CONSTS{}; |
242 | 0 | for (int k = 1; k < 6; ++k) { |
243 | 0 | for (int shift = 0; shift < 5; ++shift) { |
244 | 0 | int16_t b = GF1024_LOG.at(size_t{1} << shift); |
245 | 0 | int16_t c0 = GF1024_EXP.at((997*k + b) % 1023); |
246 | 0 | int16_t c1 = GF1024_EXP.at((998*k + b) % 1023); |
247 | 0 | int16_t c2 = GF1024_EXP.at((999*k + b) % 1023); |
248 | 0 | uint32_t c = c2 << 20 | c1 << 10 | c0; |
249 | 0 | int ind = 5*(k-1) + shift; |
250 | 0 | SYNDROME_CONSTS[ind] = c; |
251 | 0 | } |
252 | 0 | } |
253 | 0 | return SYNDROME_CONSTS; |
254 | 0 | } |
255 | | constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants(); |
256 | | |
257 | | /** |
258 | | * Syndrome returns the three values s_997, s_998, and s_999 described above, |
259 | | * packed into a 30-bit integer, where each group of 10 bits encodes one value. |
260 | | */ |
261 | 0 | uint32_t Syndrome(const uint32_t residue) { |
262 | | // low is the first 5 bits, corresponding to the r6 in the residue |
263 | | // (the constant term of the polynomial). |
264 | 0 | uint32_t low = residue & 0x1f; |
265 | | |
266 | | // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional. |
267 | 0 | uint32_t result = low ^ (low << 10) ^ (low << 20); |
268 | | |
269 | | // Then for each following bit, we add the corresponding precomputed constant if the bit is 1. |
270 | | // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10 |
271 | | // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in |
272 | | // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999) |
273 | | // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field. |
274 | 0 | for (int i = 0; i < 25; ++i) { |
275 | 0 | result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0); |
276 | 0 | } |
277 | 0 | return result; |
278 | 0 | } |
279 | | |
280 | | /** Convert to lower case. */ |
281 | | inline unsigned char LowerCase(unsigned char c) |
282 | 0 | { |
283 | 0 | return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c; |
284 | 0 | } |
285 | | |
286 | | /** Return indices of invalid characters in a Bech32 string. */ |
287 | | bool CheckCharacters(const std::string& str, std::vector<int>& errors) |
288 | 0 | { |
289 | 0 | bool lower = false, upper = false; |
290 | 0 | for (size_t i = 0; i < str.size(); ++i) { |
291 | 0 | unsigned char c{(unsigned char)(str[i])}; |
292 | 0 | if (c >= 'a' && c <= 'z') { |
293 | 0 | if (upper) { |
294 | 0 | errors.push_back(i); |
295 | 0 | } else { |
296 | 0 | lower = true; |
297 | 0 | } |
298 | 0 | } else if (c >= 'A' && c <= 'Z') { |
299 | 0 | if (lower) { |
300 | 0 | errors.push_back(i); |
301 | 0 | } else { |
302 | 0 | upper = true; |
303 | 0 | } |
304 | 0 | } else if (c < 33 || c > 126) { |
305 | 0 | errors.push_back(i); |
306 | 0 | } |
307 | 0 | } |
308 | 0 | return errors.empty(); |
309 | 0 | } |
310 | | |
311 | | std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values) |
312 | 0 | { |
313 | 0 | data ret; |
314 | 0 | ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE); |
315 | | |
316 | | /** Expand a HRP for use in checksum computation. */ |
317 | 0 | for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5); |
318 | 0 | ret.push_back(0); |
319 | 0 | for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f); |
320 | |
|
321 | 0 | ret.insert(ret.end(), values.begin(), values.end()); |
322 | |
|
323 | 0 | return ret; |
324 | 0 | } |
325 | | |
326 | | /** Verify a checksum. */ |
327 | | Encoding VerifyChecksum(const std::string& hrp, const data& values) |
328 | 0 | { |
329 | | // PolyMod computes what value to xor into the final values to make the checksum 0. However, |
330 | | // if we required that the checksum was 0, it would be the case that appending a 0 to a valid |
331 | | // list of values would result in a new valid list. For that reason, Bech32 requires the |
332 | | // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See |
333 | | // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details. |
334 | 0 | auto enc = PreparePolynomialCoefficients(hrp, values); |
335 | 0 | const uint32_t check = PolyMod(enc); |
336 | 0 | if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32; |
337 | 0 | if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M; |
338 | 0 | return Encoding::INVALID; |
339 | 0 | } |
340 | | |
341 | | /** Create a checksum. */ |
342 | | data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values) |
343 | 0 | { |
344 | 0 | auto enc = PreparePolynomialCoefficients(hrp, values); |
345 | 0 | enc.insert(enc.end(), CHECKSUM_SIZE, 0x00); |
346 | 0 | uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes. |
347 | 0 | data ret(CHECKSUM_SIZE); |
348 | 0 | for (size_t i = 0; i < CHECKSUM_SIZE; ++i) { |
349 | | // Convert the 5-bit groups in mod to checksum values. |
350 | 0 | ret[i] = (mod >> (5 * (5 - i))) & 31; |
351 | 0 | } |
352 | 0 | return ret; |
353 | 0 | } |
354 | | |
355 | | } // namespace |
356 | | |
357 | | /** Encode a Bech32 or Bech32m string. */ |
358 | 0 | std::string Encode(Encoding encoding, const std::string& hrp, const data& values) { |
359 | | // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder |
360 | | // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the |
361 | | // result will always be invalid. |
362 | 0 | for (const char& c : hrp) assert(c < 'A' || c > 'Z'); |
363 | | |
364 | 0 | std::string ret; |
365 | 0 | ret.reserve(hrp.size() + 1 + values.size() + CHECKSUM_SIZE); |
366 | 0 | ret += hrp; |
367 | 0 | ret += '1'; |
368 | 0 | for (const uint8_t& i : values) ret += CHARSET[i]; |
369 | 0 | for (const uint8_t& i : CreateChecksum(encoding, hrp, values)) ret += CHARSET[i]; |
370 | 0 | return ret; |
371 | 0 | } |
372 | | |
373 | | /** Decode a Bech32 or Bech32m string. */ |
374 | 0 | DecodeResult Decode(const std::string& str, CharLimit limit) { |
375 | 0 | std::vector<int> errors; |
376 | 0 | if (!CheckCharacters(str, errors)) return {}; |
377 | 0 | size_t pos = str.rfind('1'); |
378 | 0 | if (str.size() > limit) return {}; |
379 | 0 | if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) { |
380 | 0 | return {}; |
381 | 0 | } |
382 | 0 | data values(str.size() - 1 - pos); |
383 | 0 | for (size_t i = 0; i < str.size() - 1 - pos; ++i) { |
384 | 0 | unsigned char c = str[i + pos + 1]; |
385 | 0 | int8_t rev = CHARSET_REV[c]; |
386 | |
|
387 | 0 | if (rev == -1) { |
388 | 0 | return {}; |
389 | 0 | } |
390 | 0 | values[i] = rev; |
391 | 0 | } |
392 | 0 | std::string hrp; |
393 | 0 | hrp.reserve(pos); |
394 | 0 | for (size_t i = 0; i < pos; ++i) { |
395 | 0 | hrp += LowerCase(str[i]); |
396 | 0 | } |
397 | 0 | Encoding result = VerifyChecksum(hrp, values); |
398 | 0 | if (result == Encoding::INVALID) return {}; |
399 | 0 | return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)}; |
400 | 0 | } |
401 | | |
402 | | /** Find index of an incorrect character in a Bech32 string. */ |
403 | 0 | std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) { |
404 | 0 | std::vector<int> error_locations{}; |
405 | |
|
406 | 0 | if (str.size() > limit) { |
407 | 0 | error_locations.resize(str.size() - limit); |
408 | 0 | std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit)); |
409 | 0 | return std::make_pair("Bech32 string too long", std::move(error_locations)); |
410 | 0 | } |
411 | | |
412 | 0 | if (!CheckCharacters(str, error_locations)){ |
413 | 0 | return std::make_pair("Invalid character or mixed case", std::move(error_locations)); |
414 | 0 | } |
415 | | |
416 | 0 | size_t pos = str.rfind('1'); |
417 | 0 | if (pos == str.npos) { |
418 | 0 | return std::make_pair("Missing separator", std::vector<int>{}); |
419 | 0 | } |
420 | 0 | if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) { |
421 | 0 | error_locations.push_back(pos); |
422 | 0 | return std::make_pair("Invalid separator position", std::move(error_locations)); |
423 | 0 | } |
424 | | |
425 | 0 | std::string hrp; |
426 | 0 | hrp.reserve(pos); |
427 | 0 | for (size_t i = 0; i < pos; ++i) { |
428 | 0 | hrp += LowerCase(str[i]); |
429 | 0 | } |
430 | |
|
431 | 0 | size_t length = str.size() - 1 - pos; // length of data part |
432 | 0 | data values(length); |
433 | 0 | for (size_t i = pos + 1; i < str.size(); ++i) { |
434 | 0 | unsigned char c = str[i]; |
435 | 0 | int8_t rev = CHARSET_REV[c]; |
436 | 0 | if (rev == -1) { |
437 | 0 | error_locations.push_back(i); |
438 | 0 | return std::make_pair("Invalid Base 32 character", std::move(error_locations)); |
439 | 0 | } |
440 | 0 | values[i - pos - 1] = rev; |
441 | 0 | } |
442 | | |
443 | | // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors |
444 | | // We can't simply use the segwit version, because that may be one of the errors |
445 | 0 | std::optional<Encoding> error_encoding; |
446 | 0 | for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) { |
447 | 0 | std::vector<int> possible_errors; |
448 | | // Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial |
449 | | // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x). |
450 | 0 | auto enc = PreparePolynomialCoefficients(hrp, values); |
451 | 0 | uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding); |
452 | | |
453 | | // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding |
454 | | // constant) should be 0 - hence 0 indicates there are no errors present. |
455 | 0 | if (residue != 0) { |
456 | | // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid |
457 | | // codeword (a multiple of G(x)), and E encodes the errors. |
458 | 0 | uint32_t syn = Syndrome(residue); |
459 | | |
460 | | // Unpack the three 10-bit syndrome values |
461 | 0 | int s0 = syn & 0x3FF; |
462 | 0 | int s1 = (syn >> 10) & 0x3FF; |
463 | 0 | int s2 = syn >> 20; |
464 | | |
465 | | // Get the discrete logs of these values in GF1024 for more efficient computation |
466 | 0 | int l_s0 = GF1024_LOG.at(s0); |
467 | 0 | int l_s1 = GF1024_LOG.at(s1); |
468 | 0 | int l_s2 = GF1024_LOG.at(s2); |
469 | | |
470 | | // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1 |
471 | | // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1) |
472 | | // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too. |
473 | | // Hence, s1^2 == s0*s2, which is exactly the condition we check first: |
474 | 0 | if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) { |
475 | | // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023) |
476 | 0 | size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive |
477 | | // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1, |
478 | | // so 1/((e)^997) = (e)^(1023-997). |
479 | 0 | int l_e1 = l_s0 + (1023 - 997) * p1; |
480 | | // Finally, some sanity checks on the result: |
481 | | // - The error position should be within the length of the data |
482 | | // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements |
483 | | // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)). |
484 | 0 | if (p1 < length && !(l_e1 % 33)) { |
485 | | // Polynomials run from highest power to lowest, so the index p1 is from the right. |
486 | | // We don't return e1 because it is dangerous to suggest corrections to the user, |
487 | | // the user should check the address themselves. |
488 | 0 | possible_errors.push_back(str.size() - p1 - 1); |
489 | 0 | } |
490 | | // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2. |
491 | 0 | } else { |
492 | | // For all possible first error positions p1 |
493 | 0 | for (size_t p1 = 0; p1 < length; ++p1) { |
494 | | // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so |
495 | | // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2. |
496 | | // |
497 | | // Consider s2 + s1*(e)^p1 |
498 | | // = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 |
499 | | // = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1 |
500 | | // (Because we are working in characteristic 2.) |
501 | | // = e2*(e)^(998*p2) ((e)^p2 + (e)^p1) |
502 | | // |
503 | 0 | int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023)); |
504 | 0 | if (s2_s1p1 == 0) continue; |
505 | 0 | int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1); |
506 | | |
507 | | // Similarly, s1 + s0*(e)^p1 |
508 | | // = e2*(e)^(997*p2) ((e)^p2 + (e)^p1) |
509 | 0 | int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023)); |
510 | 0 | if (s1_s0p1 == 0) continue; |
511 | 0 | int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1); |
512 | | |
513 | | // So, putting these together, we can compute the second error position as |
514 | | // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1) |
515 | | // p2 = log((e)^p2) |
516 | 0 | size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023; |
517 | | |
518 | | // Sanity checks that p2 is a valid position and not the same as p1 |
519 | 0 | if (p2 >= length || p1 == p2) continue; |
520 | | |
521 | | // Now we want to compute the error values e1 and e2. |
522 | | // Similar to above, we compute s1 + s0*(e)^p2 |
523 | | // = e1*(e)^(997*p1) ((e)^p1 + (e)^p2) |
524 | 0 | int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023)); |
525 | 0 | if (s1_s0p2 == 0) continue; |
526 | 0 | int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2); |
527 | | |
528 | | // And compute (the log of) 1/((e)^p1 + (e)^p2)) |
529 | 0 | int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2)); |
530 | | |
531 | | // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2))) |
532 | | // = e2*(e)^(997*p2) |
533 | | // Then recover e2 by dividing by (e)^(997*p2) |
534 | 0 | int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2; |
535 | | // Check that e2 is in GF(32) |
536 | 0 | if (l_e2 % 33) continue; |
537 | | |
538 | | // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2))) |
539 | | // = e1*(e)^(997*p1) |
540 | | // So recover e1 by dividing by (e)^(997*p1) |
541 | 0 | int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1; |
542 | | // Check that e1 is in GF(32) |
543 | 0 | if (l_e1 % 33) continue; |
544 | | |
545 | | // Again, we do not return e1 or e2 for safety. |
546 | | // Order the error positions from the left of the string and return them |
547 | 0 | if (p1 > p2) { |
548 | 0 | possible_errors.push_back(str.size() - p1 - 1); |
549 | 0 | possible_errors.push_back(str.size() - p2 - 1); |
550 | 0 | } else { |
551 | 0 | possible_errors.push_back(str.size() - p2 - 1); |
552 | 0 | possible_errors.push_back(str.size() - p1 - 1); |
553 | 0 | } |
554 | 0 | break; |
555 | 0 | } |
556 | 0 | } |
557 | 0 | } else { |
558 | | // No errors |
559 | 0 | return std::make_pair("", std::vector<int>{}); |
560 | 0 | } |
561 | | |
562 | 0 | if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) { |
563 | 0 | error_locations = std::move(possible_errors); |
564 | 0 | if (!error_locations.empty()) error_encoding = encoding; |
565 | 0 | } |
566 | 0 | } |
567 | 0 | std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum" |
568 | 0 | : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum" |
569 | 0 | : "Invalid checksum"; |
570 | |
|
571 | 0 | return std::make_pair(error_message, std::move(error_locations)); |
572 | 0 | } |
573 | | |
574 | | } // namespace bech32 |