Coverage Report

Created: 2025-04-14 16:24

/Users/mcomp/contrib/bitcoin/src/test/util/cluster_linearize.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
6
#define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
7
8
#include <cluster_linearize.h>
9
#include <serialize.h>
10
#include <span.h>
11
#include <streams.h>
12
#include <util/bitset.h>
13
#include <util/feefrac.h>
14
15
#include <stdint.h>
16
#include <numeric>
17
#include <vector>
18
#include <utility>
19
20
namespace {
21
22
using namespace cluster_linearize;
23
24
using TestBitSet = BitSet<32>;
25
26
/** A formatter for a bespoke serialization for acyclic DepGraph objects.
27
 *
28
 * The serialization format outputs information about transactions in a topological order (parents
29
 * before children), together with position information so transactions can be moved back to their
30
 * correct position on deserialization.
31
 *
32
 * - For each transaction t in the DepGraph (in some topological order);
33
 *   - The size: VARINT(t.size), which cannot be 0.
34
 *   - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned.
35
 *   - For each direct dependency:
36
 *     - VARINT(skip)
37
 *   - The position of t in the cluster: VARINT(skip)
38
 * - The end of the graph: VARINT(0)
39
 *
40
 * The list of skip values encodes the dependencies of t, as well as its position in the cluster.
41
 * Each skip value is the number of possibilities that were available, but were not taken. These
42
 * possibilities are, in order:
43
 * - For each previous transaction in the graph, in reverse serialization order, whether it is a
44
 *   direct parent of t (but excluding transactions which are already implied to be dependencies
45
 *   by parent relations that were serialized before it).
46
 * - The various insertion positions in the cluster, from the very end of the cluster, to the
47
 *   front.
48
 * - The appending of 1, 2, 3, ... holes at the end of the cluster, followed by appending the new
49
 *   transaction.
50
 *
51
 * Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,_,G,E,_,D
52
 * (where _ represent holes; unused positions within the DepGraph) but serialized in order
53
 * A,B,C,D,E,F,G, because that happens to be a topological ordering. By the time G gets serialized,
54
 * what has been serialized already represents the cluster F,A,C,B,_,E,_,D (in that order). G has B
55
 * and E as direct parents, and E depends on C.
56
 *
57
 * In this case, the possibilities are, in order:
58
 * - [ ] the dependency G->F
59
 * - [X] the dependency G->E
60
 * - [ ] the dependency G->D
61
 * - [X] the dependency G->B
62
 * - [ ] the dependency G->A
63
 * - [ ] put G at the end of the cluster
64
 * - [ ] put G before D
65
 * - [ ] put G before the hole before D
66
 * - [X] put G before E
67
 * - [ ] put G before the hole before E
68
 * - [ ] put G before B
69
 * - [ ] put G before C
70
 * - [ ] put G before A
71
 * - [ ] put G before F
72
 * - [ ] add 1 hole at the end of the cluster, followed by G
73
 * - [ ] add 2 holes at the end of the cluster, followed by G
74
 * - [ ] add ...
75
 *
76
 * The skip values in this case are 1 (G->F), 1 (G->D), 4 (G->A, G at end, G before D, G before
77
 * hole). No skip after 4 is needed (or permitted), because there can only be one position for G.
78
 * Also note that G->C is not included in the list of possibilities, as it is implied by the
79
 * included G->E and E->C that came before it. On deserialization, if the last skip value was 8 or
80
 * larger (putting G before the beginning of the cluster), it is interpreted as wrapping around
81
 * back to the end.
82
 *
83
 *
84
 * Rationale:
85
 * - Why VARINTs? They are flexible enough to represent large numbers where needed, but more
86
 *   compact for smaller numbers. The serialization format is designed so that simple structures
87
 *   involve smaller numbers, so smaller size maps to simpler graphs.
88
 * - Why use SignedToUnsigned? It results in small unsigned values for signed values with small
89
 *   absolute value. This way we can encode negative fees in graphs, but still let small negative
90
 *   numbers have small encodings.
91
 * - Why are the parents emitted in reverse order compared to the transactions themselves? This
92
 *   naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies.
93
 * - Why encode skip values and not a bitmask to convey the list positions? It turns out that the
94
 *   most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per
95
 *   transaction. The current encoding uses ~1 byte per transaction for dependencies in this case,
96
 *   while a bitmask would require ~N/2 bits per transaction.
97
 */
98
99
struct DepGraphFormatter
100
{
101
    /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */
102
    [[maybe_unused]] static uint64_t SignedToUnsigned(int64_t x) noexcept
103
0
    {
104
0
        if (x < 0) {
105
0
            return 2 * uint64_t(-(x + 1)) + 1;
106
0
        } else {
107
0
            return 2 * uint64_t(x);
108
0
        }
109
0
    }
110
111
    /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */
112
    [[maybe_unused]] static int64_t UnsignedToSigned(uint64_t x) noexcept
113
0
    {
114
0
        if (x & 1) {
115
0
            return -int64_t(x / 2) - 1;
116
0
        } else {
117
0
            return int64_t(x / 2);
118
0
        }
119
0
    }
120
121
    template <typename Stream, typename SetType>
122
    static void Ser(Stream& s, const DepGraph<SetType>& depgraph)
123
0
    {
124
        /** Construct a topological order to serialize the transactions in. */
125
0
        std::vector<DepGraphIndex> topo_order;
126
0
        topo_order.reserve(depgraph.TxCount());
127
0
        for (auto i : depgraph.Positions()) topo_order.push_back(i);
128
0
        std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) {
129
0
            auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count();
130
0
            if (anc_a != anc_b) return anc_a < anc_b;
131
0
            return a < b;
132
0
        });
133
134
        /** Which positions (incl. holes) the deserializer already knows when it has deserialized
135
         *  what has been serialized here so far. */
136
0
        SetType done;
137
138
        // Loop over the transactions in topological order.
139
0
        for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) {
140
            /** Which depgraph index we are currently writing. */
141
0
            DepGraphIndex idx = topo_order[topo_idx];
142
            // Write size, which must be larger than 0.
143
0
            s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED);
144
            // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative).
145
0
            s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee));
146
            // Write dependency information.
147
0
            SetType written_parents;
148
0
            uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over.
149
0
            for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
150
                /** Which depgraph index we are currently considering as parent of idx. */
151
0
                DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist];
152
                // Ignore transactions which are already known to be ancestors.
153
0
                if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue;
154
0
                if (depgraph.Ancestors(idx)[dep_idx]) {
155
                    // When an actual parent is encountered, encode how many non-parents were skipped
156
                    // before it.
157
0
                    s << VARINT(diff);
158
0
                    diff = 0;
159
0
                    written_parents.Set(dep_idx);
160
0
                } else {
161
                    // When a non-parent is encountered, increment the skip counter.
162
0
                    ++diff;
163
0
                }
164
0
            }
165
            // Write position information.
166
0
            auto add_holes = SetType::Fill(idx) - done - depgraph.Positions();
167
0
            if (add_holes.None()) {
168
                // The new transaction is to be inserted N positions back from the end of the
169
                // cluster. Emit N to indicate that that many insertion choices are skipped.
170
0
                auto skips = (done - SetType::Fill(idx)).Count();
171
0
                s << VARINT(diff + skips);
172
0
            } else {
173
                // The new transaction is to be appended at the end of the cluster, after N holes.
174
                // Emit current_cluster_size + N, to indicate all insertion choices are skipped,
175
                // plus N possibilities for the number of holes.
176
0
                s << VARINT(diff + done.Count() + add_holes.Count());
177
0
                done |= add_holes;
178
0
            }
179
0
            done.Set(idx);
180
0
        }
181
182
        // Output a final 0 to denote the end of the graph.
183
0
        s << uint8_t{0};
184
0
    }
185
186
    template <typename Stream, typename SetType>
187
    void Unser(Stream& s, DepGraph<SetType>& depgraph)
188
0
    {
189
        /** The dependency graph which we deserialize into first, with transactions in
190
         *  topological serialization order, not original cluster order. */
191
0
        DepGraph<SetType> topo_depgraph;
192
        /** Mapping from serialization order to cluster order, used later to reconstruct the
193
         *  cluster order. */
194
0
        std::vector<DepGraphIndex> reordering;
195
        /** How big the entries vector in the reconstructed depgraph will be (including holes). */
196
0
        DepGraphIndex total_size{0};
197
198
        // Read transactions in topological order.
199
0
        while (true) {
200
0
            FeeFrac new_feerate; //!< The new transaction's fee and size.
201
0
            SetType new_ancestors; //!< The new transaction's ancestors (excluding itself).
202
0
            uint64_t diff{0}; //!< How many potential parents/insertions we have to skip.
203
0
            bool read_error{false};
204
0
            try {
205
                // Read size. Size 0 signifies the end of the DepGraph.
206
0
                int32_t size;
207
0
                s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED);
208
0
                size &= 0x3FFFFF; // Enough for size up to 4M.
209
0
                static_assert(0x3FFFFF >= 4000000);
210
0
                if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break;
211
                // Read fee, encoded as an unsigned varint (odd=negative, even=non-negative).
212
0
                uint64_t coded_fee;
213
0
                s >> VARINT(coded_fee);
214
0
                coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC.
215
0
                static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000);
216
0
                new_feerate = {UnsignedToSigned(coded_fee), size};
217
                // Read dependency information.
218
0
                auto topo_idx = reordering.size();
219
0
                s >> VARINT(diff);
220
0
                for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
221
                    /** Which topo_depgraph index we are currently considering as parent of topo_idx. */
222
0
                    DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist;
223
                    // Ignore transactions which are already known ancestors of topo_idx.
224
0
                    if (new_ancestors[dep_topo_idx]) continue;
225
0
                    if (diff == 0) {
226
                        // When the skip counter has reached 0, add an actual dependency.
227
0
                        new_ancestors |= topo_depgraph.Ancestors(dep_topo_idx);
228
                        // And read the number of skips after it.
229
0
                        s >> VARINT(diff);
230
0
                    } else {
231
                        // Otherwise, dep_topo_idx is not a parent. Decrement and continue.
232
0
                        --diff;
233
0
                    }
234
0
                }
235
0
            } catch (const std::ios_base::failure&) {
236
                // Continue even if a read error was encountered.
237
0
                read_error = true;
238
0
            }
239
            // Construct a new transaction whenever we made it past the new_feerate construction.
240
0
            if (new_feerate.IsEmpty()) break;
241
0
            assert(reordering.size() < SetType::Size());
242
0
            auto topo_idx = topo_depgraph.AddTransaction(new_feerate);
243
0
            topo_depgraph.AddDependencies(new_ancestors, topo_idx);
244
0
            if (total_size < SetType::Size()) {
245
                // Normal case.
246
0
                diff %= SetType::Size();
247
0
                if (diff <= total_size) {
248
                    // Insert the new transaction at distance diff back from the end.
249
0
                    for (auto& pos : reordering) {
250
0
                        pos += (pos >= total_size - diff);
251
0
                    }
252
0
                    reordering.push_back(total_size++ - diff);
253
0
                } else {
254
                    // Append diff - total_size holes at the end, plus the new transaction.
255
0
                    total_size = diff;
256
0
                    reordering.push_back(total_size++);
257
0
                }
258
0
            } else {
259
                // In case total_size == SetType::Size, it is not possible to insert the new
260
                // transaction without exceeding SetType's size. Instead, interpret diff as an
261
                // index into the holes, and overwrite a position there. This branch is never used
262
                // when deserializing the output of the serializer, but gives meaning to otherwise
263
                // invalid input.
264
0
                diff %= (SetType::Size() - reordering.size());
265
0
                SetType holes = SetType::Fill(SetType::Size());
266
0
                for (auto pos : reordering) holes.Reset(pos);
267
0
                for (auto pos : holes) {
268
0
                    if (diff == 0) {
269
0
                        reordering.push_back(pos);
270
0
                        break;
271
0
                    }
272
0
                    --diff;
273
0
                }
274
0
            }
275
            // Stop if a read error was encountered during deserialization.
276
0
            if (read_error) break;
277
0
        }
278
279
        // Construct the original cluster order depgraph.
280
0
        depgraph = DepGraph(topo_depgraph, reordering, total_size);
281
0
    }
282
};
283
284
/** Perform a sanity/consistency check on a DepGraph. */
285
template<typename SetType>
286
void SanityCheck(const DepGraph<SetType>& depgraph)
287
0
{
288
    // Verify Positions and PositionRange consistency.
289
0
    DepGraphIndex num_positions{0};
290
0
    DepGraphIndex position_range{0};
291
0
    for (DepGraphIndex i : depgraph.Positions()) {
292
0
        ++num_positions;
293
0
        position_range = i + 1;
294
0
    }
295
0
    assert(num_positions == depgraph.TxCount());
296
0
    assert(position_range == depgraph.PositionRange());
297
0
    assert(position_range >= num_positions);
298
0
    assert(position_range <= SetType::Size());
299
    // Consistency check between ancestors internally.
300
0
    for (DepGraphIndex i : depgraph.Positions()) {
301
        // Transactions include themselves as ancestors.
302
0
        assert(depgraph.Ancestors(i)[i]);
303
        // If a is an ancestor of b, then b's ancestors must include all of a's ancestors.
304
0
        for (auto a : depgraph.Ancestors(i)) {
305
0
            assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a)));
306
0
        }
307
0
    }
308
    // Consistency check between ancestors and descendants.
309
0
    for (DepGraphIndex i : depgraph.Positions()) {
310
0
        for (DepGraphIndex j : depgraph.Positions()) {
311
0
            assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]);
312
0
        }
313
        // No transaction is a parent or child of itself.
314
0
        auto parents = depgraph.GetReducedParents(i);
315
0
        auto children = depgraph.GetReducedChildren(i);
316
0
        assert(!parents[i]);
317
0
        assert(!children[i]);
318
        // Parents of a transaction do not have ancestors inside those parents (except itself).
319
        // Note that even the transaction itself may be missing (if it is part of a cycle).
320
0
        for (auto parent : parents) {
321
0
            assert((depgraph.Ancestors(parent) & parents).IsSubsetOf(SetType::Singleton(parent)));
322
0
        }
323
        // Similar for children and descendants.
324
0
        for (auto child : children) {
325
0
            assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child)));
326
0
        }
327
0
    }
328
0
    if (depgraph.IsAcyclic()) {
329
        // If DepGraph is acyclic, serialize + deserialize must roundtrip.
330
0
        std::vector<unsigned char> ser;
331
0
        VectorWriter writer(ser, 0);
332
0
        writer << Using<DepGraphFormatter>(depgraph);
333
0
        SpanReader reader(ser);
334
0
        DepGraph<TestBitSet> decoded_depgraph;
335
0
        reader >> Using<DepGraphFormatter>(decoded_depgraph);
336
0
        assert(depgraph == decoded_depgraph);
337
0
        assert(reader.empty());
338
        // It must also deserialize correctly without the terminal 0 byte (as the deserializer
339
        // will upon EOF still return what it read so far).
340
0
        assert(ser.size() >= 1 && ser.back() == 0);
341
0
        ser.pop_back();
342
0
        reader = SpanReader{ser};
343
0
        decoded_depgraph = {};
344
0
        reader >> Using<DepGraphFormatter>(decoded_depgraph);
345
0
        assert(depgraph == decoded_depgraph);
346
0
        assert(reader.empty());
347
348
        // In acyclic graphs, the union of parents with parents of parents etc. yields the
349
        // full ancestor set (and similar for children and descendants).
350
0
        std::vector<SetType> parents(depgraph.PositionRange()), children(depgraph.PositionRange());
351
0
        for (DepGraphIndex i : depgraph.Positions()) {
352
0
            parents[i] = depgraph.GetReducedParents(i);
353
0
            children[i] = depgraph.GetReducedChildren(i);
354
0
        }
355
0
        for (auto i : depgraph.Positions()) {
356
            // Initialize the set of ancestors with just the current transaction itself.
357
0
            SetType ancestors = SetType::Singleton(i);
358
            // Iteratively add parents of all transactions in the ancestor set to itself.
359
0
            while (true) {
360
0
                const auto old_ancestors = ancestors;
361
0
                for (auto j : ancestors) ancestors |= parents[j];
362
                // Stop when no more changes are being made.
363
0
                if (old_ancestors == ancestors) break;
364
0
            }
365
0
            assert(ancestors == depgraph.Ancestors(i));
366
367
            // Initialize the set of descendants with just the current transaction itself.
368
0
            SetType descendants = SetType::Singleton(i);
369
            // Iteratively add children of all transactions in the descendant set to itself.
370
0
            while (true) {
371
0
                const auto old_descendants = descendants;
372
0
                for (auto j : descendants) descendants |= children[j];
373
                // Stop when no more changes are being made.
374
0
                if (old_descendants == descendants) break;
375
0
            }
376
0
            assert(descendants == depgraph.Descendants(i));
377
0
        }
378
0
    }
379
0
}
380
381
/** Perform a sanity check on a linearization. */
382
template<typename SetType>
383
void SanityCheck(const DepGraph<SetType>& depgraph, std::span<const DepGraphIndex> linearization)
384
0
{
385
    // Check completeness.
386
0
    assert(linearization.size() == depgraph.TxCount());
387
0
    TestBitSet done;
388
0
    for (auto i : linearization) {
389
        // Check transaction position is in range.
390
0
        assert(depgraph.Positions()[i]);
391
        // Check topology and lack of duplicates.
392
0
        assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i));
393
0
        done.Set(i);
394
0
    }
395
0
}
396
397
} // namespace
398
399
#endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H