/Users/mcomp/contrib/bitcoin/src/test/util/cluster_linearize.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) The Bitcoin Core developers |
2 | | // Distributed under the MIT software license, see the accompanying |
3 | | // file COPYING or http://www.opensource.org/licenses/mit-license.php. |
4 | | |
5 | | #ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H |
6 | | #define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H |
7 | | |
8 | | #include <cluster_linearize.h> |
9 | | #include <serialize.h> |
10 | | #include <span.h> |
11 | | #include <streams.h> |
12 | | #include <util/bitset.h> |
13 | | #include <util/feefrac.h> |
14 | | |
15 | | #include <stdint.h> |
16 | | #include <numeric> |
17 | | #include <vector> |
18 | | #include <utility> |
19 | | |
20 | | namespace { |
21 | | |
22 | | using namespace cluster_linearize; |
23 | | |
24 | | using TestBitSet = BitSet<32>; |
25 | | |
26 | | /** A formatter for a bespoke serialization for acyclic DepGraph objects. |
27 | | * |
28 | | * The serialization format outputs information about transactions in a topological order (parents |
29 | | * before children), together with position information so transactions can be moved back to their |
30 | | * correct position on deserialization. |
31 | | * |
32 | | * - For each transaction t in the DepGraph (in some topological order); |
33 | | * - The size: VARINT(t.size), which cannot be 0. |
34 | | * - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned. |
35 | | * - For each direct dependency: |
36 | | * - VARINT(skip) |
37 | | * - The position of t in the cluster: VARINT(skip) |
38 | | * - The end of the graph: VARINT(0) |
39 | | * |
40 | | * The list of skip values encodes the dependencies of t, as well as its position in the cluster. |
41 | | * Each skip value is the number of possibilities that were available, but were not taken. These |
42 | | * possibilities are, in order: |
43 | | * - For each previous transaction in the graph, in reverse serialization order, whether it is a |
44 | | * direct parent of t (but excluding transactions which are already implied to be dependencies |
45 | | * by parent relations that were serialized before it). |
46 | | * - The various insertion positions in the cluster, from the very end of the cluster, to the |
47 | | * front. |
48 | | * - The appending of 1, 2, 3, ... holes at the end of the cluster, followed by appending the new |
49 | | * transaction. |
50 | | * |
51 | | * Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,_,G,E,_,D |
52 | | * (where _ represent holes; unused positions within the DepGraph) but serialized in order |
53 | | * A,B,C,D,E,F,G, because that happens to be a topological ordering. By the time G gets serialized, |
54 | | * what has been serialized already represents the cluster F,A,C,B,_,E,_,D (in that order). G has B |
55 | | * and E as direct parents, and E depends on C. |
56 | | * |
57 | | * In this case, the possibilities are, in order: |
58 | | * - [ ] the dependency G->F |
59 | | * - [X] the dependency G->E |
60 | | * - [ ] the dependency G->D |
61 | | * - [X] the dependency G->B |
62 | | * - [ ] the dependency G->A |
63 | | * - [ ] put G at the end of the cluster |
64 | | * - [ ] put G before D |
65 | | * - [ ] put G before the hole before D |
66 | | * - [X] put G before E |
67 | | * - [ ] put G before the hole before E |
68 | | * - [ ] put G before B |
69 | | * - [ ] put G before C |
70 | | * - [ ] put G before A |
71 | | * - [ ] put G before F |
72 | | * - [ ] add 1 hole at the end of the cluster, followed by G |
73 | | * - [ ] add 2 holes at the end of the cluster, followed by G |
74 | | * - [ ] add ... |
75 | | * |
76 | | * The skip values in this case are 1 (G->F), 1 (G->D), 4 (G->A, G at end, G before D, G before |
77 | | * hole). No skip after 4 is needed (or permitted), because there can only be one position for G. |
78 | | * Also note that G->C is not included in the list of possibilities, as it is implied by the |
79 | | * included G->E and E->C that came before it. On deserialization, if the last skip value was 8 or |
80 | | * larger (putting G before the beginning of the cluster), it is interpreted as wrapping around |
81 | | * back to the end. |
82 | | * |
83 | | * |
84 | | * Rationale: |
85 | | * - Why VARINTs? They are flexible enough to represent large numbers where needed, but more |
86 | | * compact for smaller numbers. The serialization format is designed so that simple structures |
87 | | * involve smaller numbers, so smaller size maps to simpler graphs. |
88 | | * - Why use SignedToUnsigned? It results in small unsigned values for signed values with small |
89 | | * absolute value. This way we can encode negative fees in graphs, but still let small negative |
90 | | * numbers have small encodings. |
91 | | * - Why are the parents emitted in reverse order compared to the transactions themselves? This |
92 | | * naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies. |
93 | | * - Why encode skip values and not a bitmask to convey the list positions? It turns out that the |
94 | | * most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per |
95 | | * transaction. The current encoding uses ~1 byte per transaction for dependencies in this case, |
96 | | * while a bitmask would require ~N/2 bits per transaction. |
97 | | */ |
98 | | |
99 | | struct DepGraphFormatter |
100 | | { |
101 | | /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */ |
102 | | [[maybe_unused]] static uint64_t SignedToUnsigned(int64_t x) noexcept |
103 | 0 | { |
104 | 0 | if (x < 0) { |
105 | 0 | return 2 * uint64_t(-(x + 1)) + 1; |
106 | 0 | } else { |
107 | 0 | return 2 * uint64_t(x); |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */ |
112 | | [[maybe_unused]] static int64_t UnsignedToSigned(uint64_t x) noexcept |
113 | 0 | { |
114 | 0 | if (x & 1) { |
115 | 0 | return -int64_t(x / 2) - 1; |
116 | 0 | } else { |
117 | 0 | return int64_t(x / 2); |
118 | 0 | } |
119 | 0 | } |
120 | | |
121 | | template <typename Stream, typename SetType> |
122 | | static void Ser(Stream& s, const DepGraph<SetType>& depgraph) |
123 | 0 | { |
124 | | /** Construct a topological order to serialize the transactions in. */ |
125 | 0 | std::vector<DepGraphIndex> topo_order; |
126 | 0 | topo_order.reserve(depgraph.TxCount()); |
127 | 0 | for (auto i : depgraph.Positions()) topo_order.push_back(i); |
128 | 0 | std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) { |
129 | 0 | auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count(); |
130 | 0 | if (anc_a != anc_b) return anc_a < anc_b; |
131 | 0 | return a < b; |
132 | 0 | }); |
133 | | |
134 | | /** Which positions (incl. holes) the deserializer already knows when it has deserialized |
135 | | * what has been serialized here so far. */ |
136 | 0 | SetType done; |
137 | | |
138 | | // Loop over the transactions in topological order. |
139 | 0 | for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { |
140 | | /** Which depgraph index we are currently writing. */ |
141 | 0 | DepGraphIndex idx = topo_order[topo_idx]; |
142 | | // Write size, which must be larger than 0. |
143 | 0 | s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED); |
144 | | // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative). |
145 | 0 | s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee)); |
146 | | // Write dependency information. |
147 | 0 | SetType written_parents; |
148 | 0 | uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over. |
149 | 0 | for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { |
150 | | /** Which depgraph index we are currently considering as parent of idx. */ |
151 | 0 | DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; |
152 | | // Ignore transactions which are already known to be ancestors. |
153 | 0 | if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue; |
154 | 0 | if (depgraph.Ancestors(idx)[dep_idx]) { |
155 | | // When an actual parent is encountered, encode how many non-parents were skipped |
156 | | // before it. |
157 | 0 | s << VARINT(diff); |
158 | 0 | diff = 0; |
159 | 0 | written_parents.Set(dep_idx); |
160 | 0 | } else { |
161 | | // When a non-parent is encountered, increment the skip counter. |
162 | 0 | ++diff; |
163 | 0 | } |
164 | 0 | } |
165 | | // Write position information. |
166 | 0 | auto add_holes = SetType::Fill(idx) - done - depgraph.Positions(); |
167 | 0 | if (add_holes.None()) { |
168 | | // The new transaction is to be inserted N positions back from the end of the |
169 | | // cluster. Emit N to indicate that that many insertion choices are skipped. |
170 | 0 | auto skips = (done - SetType::Fill(idx)).Count(); |
171 | 0 | s << VARINT(diff + skips); |
172 | 0 | } else { |
173 | | // The new transaction is to be appended at the end of the cluster, after N holes. |
174 | | // Emit current_cluster_size + N, to indicate all insertion choices are skipped, |
175 | | // plus N possibilities for the number of holes. |
176 | 0 | s << VARINT(diff + done.Count() + add_holes.Count()); |
177 | 0 | done |= add_holes; |
178 | 0 | } |
179 | 0 | done.Set(idx); |
180 | 0 | } |
181 | | |
182 | | // Output a final 0 to denote the end of the graph. |
183 | 0 | s << uint8_t{0}; |
184 | 0 | } |
185 | | |
186 | | template <typename Stream, typename SetType> |
187 | | void Unser(Stream& s, DepGraph<SetType>& depgraph) |
188 | 0 | { |
189 | | /** The dependency graph which we deserialize into first, with transactions in |
190 | | * topological serialization order, not original cluster order. */ |
191 | 0 | DepGraph<SetType> topo_depgraph; |
192 | | /** Mapping from serialization order to cluster order, used later to reconstruct the |
193 | | * cluster order. */ |
194 | 0 | std::vector<DepGraphIndex> reordering; |
195 | | /** How big the entries vector in the reconstructed depgraph will be (including holes). */ |
196 | 0 | DepGraphIndex total_size{0}; |
197 | | |
198 | | // Read transactions in topological order. |
199 | 0 | while (true) { |
200 | 0 | FeeFrac new_feerate; //!< The new transaction's fee and size. |
201 | 0 | SetType new_ancestors; //!< The new transaction's ancestors (excluding itself). |
202 | 0 | uint64_t diff{0}; //!< How many potential parents/insertions we have to skip. |
203 | 0 | bool read_error{false}; |
204 | 0 | try { |
205 | | // Read size. Size 0 signifies the end of the DepGraph. |
206 | 0 | int32_t size; |
207 | 0 | s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED); |
208 | 0 | size &= 0x3FFFFF; // Enough for size up to 4M. |
209 | 0 | static_assert(0x3FFFFF >= 4000000); |
210 | 0 | if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break; |
211 | | // Read fee, encoded as an unsigned varint (odd=negative, even=non-negative). |
212 | 0 | uint64_t coded_fee; |
213 | 0 | s >> VARINT(coded_fee); |
214 | 0 | coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC. |
215 | 0 | static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000); |
216 | 0 | new_feerate = {UnsignedToSigned(coded_fee), size}; |
217 | | // Read dependency information. |
218 | 0 | auto topo_idx = reordering.size(); |
219 | 0 | s >> VARINT(diff); |
220 | 0 | for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { |
221 | | /** Which topo_depgraph index we are currently considering as parent of topo_idx. */ |
222 | 0 | DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist; |
223 | | // Ignore transactions which are already known ancestors of topo_idx. |
224 | 0 | if (new_ancestors[dep_topo_idx]) continue; |
225 | 0 | if (diff == 0) { |
226 | | // When the skip counter has reached 0, add an actual dependency. |
227 | 0 | new_ancestors |= topo_depgraph.Ancestors(dep_topo_idx); |
228 | | // And read the number of skips after it. |
229 | 0 | s >> VARINT(diff); |
230 | 0 | } else { |
231 | | // Otherwise, dep_topo_idx is not a parent. Decrement and continue. |
232 | 0 | --diff; |
233 | 0 | } |
234 | 0 | } |
235 | 0 | } catch (const std::ios_base::failure&) { |
236 | | // Continue even if a read error was encountered. |
237 | 0 | read_error = true; |
238 | 0 | } |
239 | | // Construct a new transaction whenever we made it past the new_feerate construction. |
240 | 0 | if (new_feerate.IsEmpty()) break; |
241 | 0 | assert(reordering.size() < SetType::Size()); |
242 | 0 | auto topo_idx = topo_depgraph.AddTransaction(new_feerate); |
243 | 0 | topo_depgraph.AddDependencies(new_ancestors, topo_idx); |
244 | 0 | if (total_size < SetType::Size()) { |
245 | | // Normal case. |
246 | 0 | diff %= SetType::Size(); |
247 | 0 | if (diff <= total_size) { |
248 | | // Insert the new transaction at distance diff back from the end. |
249 | 0 | for (auto& pos : reordering) { |
250 | 0 | pos += (pos >= total_size - diff); |
251 | 0 | } |
252 | 0 | reordering.push_back(total_size++ - diff); |
253 | 0 | } else { |
254 | | // Append diff - total_size holes at the end, plus the new transaction. |
255 | 0 | total_size = diff; |
256 | 0 | reordering.push_back(total_size++); |
257 | 0 | } |
258 | 0 | } else { |
259 | | // In case total_size == SetType::Size, it is not possible to insert the new |
260 | | // transaction without exceeding SetType's size. Instead, interpret diff as an |
261 | | // index into the holes, and overwrite a position there. This branch is never used |
262 | | // when deserializing the output of the serializer, but gives meaning to otherwise |
263 | | // invalid input. |
264 | 0 | diff %= (SetType::Size() - reordering.size()); |
265 | 0 | SetType holes = SetType::Fill(SetType::Size()); |
266 | 0 | for (auto pos : reordering) holes.Reset(pos); |
267 | 0 | for (auto pos : holes) { |
268 | 0 | if (diff == 0) { |
269 | 0 | reordering.push_back(pos); |
270 | 0 | break; |
271 | 0 | } |
272 | 0 | --diff; |
273 | 0 | } |
274 | 0 | } |
275 | | // Stop if a read error was encountered during deserialization. |
276 | 0 | if (read_error) break; |
277 | 0 | } |
278 | | |
279 | | // Construct the original cluster order depgraph. |
280 | 0 | depgraph = DepGraph(topo_depgraph, reordering, total_size); |
281 | 0 | } |
282 | | }; |
283 | | |
284 | | /** Perform a sanity/consistency check on a DepGraph. */ |
285 | | template<typename SetType> |
286 | | void SanityCheck(const DepGraph<SetType>& depgraph) |
287 | 0 | { |
288 | | // Verify Positions and PositionRange consistency. |
289 | 0 | DepGraphIndex num_positions{0}; |
290 | 0 | DepGraphIndex position_range{0}; |
291 | 0 | for (DepGraphIndex i : depgraph.Positions()) { |
292 | 0 | ++num_positions; |
293 | 0 | position_range = i + 1; |
294 | 0 | } |
295 | 0 | assert(num_positions == depgraph.TxCount()); |
296 | 0 | assert(position_range == depgraph.PositionRange()); |
297 | 0 | assert(position_range >= num_positions); |
298 | 0 | assert(position_range <= SetType::Size()); |
299 | | // Consistency check between ancestors internally. |
300 | 0 | for (DepGraphIndex i : depgraph.Positions()) { |
301 | | // Transactions include themselves as ancestors. |
302 | 0 | assert(depgraph.Ancestors(i)[i]); |
303 | | // If a is an ancestor of b, then b's ancestors must include all of a's ancestors. |
304 | 0 | for (auto a : depgraph.Ancestors(i)) { |
305 | 0 | assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a))); |
306 | 0 | } |
307 | 0 | } |
308 | | // Consistency check between ancestors and descendants. |
309 | 0 | for (DepGraphIndex i : depgraph.Positions()) { |
310 | 0 | for (DepGraphIndex j : depgraph.Positions()) { |
311 | 0 | assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]); |
312 | 0 | } |
313 | | // No transaction is a parent or child of itself. |
314 | 0 | auto parents = depgraph.GetReducedParents(i); |
315 | 0 | auto children = depgraph.GetReducedChildren(i); |
316 | 0 | assert(!parents[i]); |
317 | 0 | assert(!children[i]); |
318 | | // Parents of a transaction do not have ancestors inside those parents (except itself). |
319 | | // Note that even the transaction itself may be missing (if it is part of a cycle). |
320 | 0 | for (auto parent : parents) { |
321 | 0 | assert((depgraph.Ancestors(parent) & parents).IsSubsetOf(SetType::Singleton(parent))); |
322 | 0 | } |
323 | | // Similar for children and descendants. |
324 | 0 | for (auto child : children) { |
325 | 0 | assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child))); |
326 | 0 | } |
327 | 0 | } |
328 | 0 | if (depgraph.IsAcyclic()) { |
329 | | // If DepGraph is acyclic, serialize + deserialize must roundtrip. |
330 | 0 | std::vector<unsigned char> ser; |
331 | 0 | VectorWriter writer(ser, 0); |
332 | 0 | writer << Using<DepGraphFormatter>(depgraph); |
333 | 0 | SpanReader reader(ser); |
334 | 0 | DepGraph<TestBitSet> decoded_depgraph; |
335 | 0 | reader >> Using<DepGraphFormatter>(decoded_depgraph); |
336 | 0 | assert(depgraph == decoded_depgraph); |
337 | 0 | assert(reader.empty()); |
338 | | // It must also deserialize correctly without the terminal 0 byte (as the deserializer |
339 | | // will upon EOF still return what it read so far). |
340 | 0 | assert(ser.size() >= 1 && ser.back() == 0); |
341 | 0 | ser.pop_back(); |
342 | 0 | reader = SpanReader{ser}; |
343 | 0 | decoded_depgraph = {}; |
344 | 0 | reader >> Using<DepGraphFormatter>(decoded_depgraph); |
345 | 0 | assert(depgraph == decoded_depgraph); |
346 | 0 | assert(reader.empty()); |
347 | | |
348 | | // In acyclic graphs, the union of parents with parents of parents etc. yields the |
349 | | // full ancestor set (and similar for children and descendants). |
350 | 0 | std::vector<SetType> parents(depgraph.PositionRange()), children(depgraph.PositionRange()); |
351 | 0 | for (DepGraphIndex i : depgraph.Positions()) { |
352 | 0 | parents[i] = depgraph.GetReducedParents(i); |
353 | 0 | children[i] = depgraph.GetReducedChildren(i); |
354 | 0 | } |
355 | 0 | for (auto i : depgraph.Positions()) { |
356 | | // Initialize the set of ancestors with just the current transaction itself. |
357 | 0 | SetType ancestors = SetType::Singleton(i); |
358 | | // Iteratively add parents of all transactions in the ancestor set to itself. |
359 | 0 | while (true) { |
360 | 0 | const auto old_ancestors = ancestors; |
361 | 0 | for (auto j : ancestors) ancestors |= parents[j]; |
362 | | // Stop when no more changes are being made. |
363 | 0 | if (old_ancestors == ancestors) break; |
364 | 0 | } |
365 | 0 | assert(ancestors == depgraph.Ancestors(i)); |
366 | | |
367 | | // Initialize the set of descendants with just the current transaction itself. |
368 | 0 | SetType descendants = SetType::Singleton(i); |
369 | | // Iteratively add children of all transactions in the descendant set to itself. |
370 | 0 | while (true) { |
371 | 0 | const auto old_descendants = descendants; |
372 | 0 | for (auto j : descendants) descendants |= children[j]; |
373 | | // Stop when no more changes are being made. |
374 | 0 | if (old_descendants == descendants) break; |
375 | 0 | } |
376 | 0 | assert(descendants == depgraph.Descendants(i)); |
377 | 0 | } |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | | /** Perform a sanity check on a linearization. */ |
382 | | template<typename SetType> |
383 | | void SanityCheck(const DepGraph<SetType>& depgraph, std::span<const DepGraphIndex> linearization) |
384 | 0 | { |
385 | | // Check completeness. |
386 | 0 | assert(linearization.size() == depgraph.TxCount()); |
387 | 0 | TestBitSet done; |
388 | 0 | for (auto i : linearization) { |
389 | | // Check transaction position is in range. |
390 | 0 | assert(depgraph.Positions()[i]); |
391 | | // Check topology and lack of duplicates. |
392 | 0 | assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i)); |
393 | 0 | done.Set(i); |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | | } // namespace |
398 | | |
399 | | #endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H |