Coverage Report

Created: 2025-10-29 16:48

/root/bitcoin/src/crypto/muhash.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2017-present The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4
5
#ifndef BITCOIN_CRYPTO_MUHASH_H
6
#define BITCOIN_CRYPTO_MUHASH_H
7
8
#include <serialize.h>
9
10
#include <cstddef>
11
#include <cstdint>
12
#include <span>
13
14
class uint256;
15
16
class Num3072
17
{
18
private:
19
    void FullReduce();
20
    bool IsOverflow() const;
21
    Num3072 GetInverse() const;
22
23
public:
24
    static constexpr size_t BYTE_SIZE = 384;
25
26
#ifdef __SIZEOF_INT128__
27
    typedef unsigned __int128 double_limb_t;
28
    typedef signed __int128 signed_double_limb_t;
29
    typedef uint64_t limb_t;
30
    typedef int64_t signed_limb_t;
31
    static constexpr int LIMBS = 48;
32
    static constexpr int SIGNED_LIMBS = 50;
33
    static constexpr int LIMB_SIZE = 64;
34
    static constexpr int SIGNED_LIMB_SIZE = 62;
35
#else
36
    typedef uint64_t double_limb_t;
37
    typedef int64_t signed_double_limb_t;
38
    typedef uint32_t limb_t;
39
    typedef int32_t signed_limb_t;
40
    static constexpr int LIMBS = 96;
41
    static constexpr int SIGNED_LIMBS = 103;
42
    static constexpr int LIMB_SIZE = 32;
43
    static constexpr int SIGNED_LIMB_SIZE = 30;
44
#endif
45
    limb_t limbs[LIMBS];
46
47
    // Sanity check for Num3072 constants
48
    static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits");
49
    static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t");
50
    static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect");
51
    static_assert(SIGNED_LIMB_SIZE * SIGNED_LIMBS > 3072, "SIGNED_LIMBS * SIGNED_LIMB_SIZE is too small");
52
    static_assert(3072 / SIGNED_LIMB_SIZE == SIGNED_LIMBS - 1, "Bit 3072 must land in top signed limb");
53
54
    // Hard coded values in MuHash3072 constructor and Finalize
55
    static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t");
56
57
    void Multiply(const Num3072& a);
58
    void Divide(const Num3072& a);
59
    void SetToOne();
60
    void ToBytes(unsigned char (&out)[BYTE_SIZE]);
61
62
0
    Num3072() { this->SetToOne(); };
63
    Num3072(const unsigned char (&data)[BYTE_SIZE]);
64
65
    SERIALIZE_METHODS(Num3072, obj)
66
0
    {
67
0
        for (auto& limb : obj.limbs) {
68
0
            READWRITE(limb);
69
0
        }
70
0
    }
Unexecuted instantiation: _ZN7Num307216SerializationOpsI10DataStreamS_17ActionUnserializeEEvRT0_RT_T1_
Unexecuted instantiation: _ZN7Num307216SerializationOpsI10DataStreamKS_15ActionSerializeEEvRT0_RT_T1_
71
};
72
73
/** A class representing MuHash sets
74
 *
75
 * MuHash is a hashing algorithm that supports adding set elements in any
76
 * order but also deleting in any order. As a result, it can maintain a
77
 * running sum for a set of data as a whole, and add/remove when data
78
 * is added to or removed from it. A downside of MuHash is that computing
79
 * an inverse is relatively expensive. This is solved by representing
80
 * the running value as a fraction, and multiplying added elements into
81
 * the numerator and removed elements into the denominator. Only when the
82
 * final hash is desired, a single modular inverse and multiplication is
83
 * needed to combine the two. The combination is also run on serialization
84
 * to allow for space-efficient storage on disk.
85
 *
86
 * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
87
 * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
88
 * all of this is perfectly parallellizable: each thread can process an
89
 * arbitrary subset of the update operations, allowing them to be
90
 * efficiently combined later.
91
 *
92
 * MuHash does not support checking if an element is already part of the
93
 * set. That is why this class does not enforce the use of a set as the
94
 * data it represents because there is no efficient way to do so.
95
 * It is possible to add elements more than once and also to remove
96
 * elements that have not been added before. However, this implementation
97
 * is intended to represent a set of elements.
98
 *
99
 * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
100
 * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
101
 */
102
class MuHash3072
103
{
104
private:
105
    Num3072 m_numerator;
106
    Num3072 m_denominator;
107
108
    Num3072 ToNum3072(std::span<const unsigned char> in);
109
110
public:
111
    /* The empty set. */
112
0
    MuHash3072() noexcept = default;
113
114
    /* A singleton with variable sized data in it. */
115
    explicit MuHash3072(std::span<const unsigned char> in) noexcept;
116
117
    /* Insert a single piece of data into the set. */
118
    MuHash3072& Insert(std::span<const unsigned char> in) noexcept;
119
120
    /* Remove a single piece of data from the set. */
121
    MuHash3072& Remove(std::span<const unsigned char> in) noexcept;
122
123
    /* Multiply (resulting in a hash for the union of the sets) */
124
    MuHash3072& operator*=(const MuHash3072& mul) noexcept;
125
126
    /* Divide (resulting in a hash for the difference of the sets) */
127
    MuHash3072& operator/=(const MuHash3072& div) noexcept;
128
129
    /* Finalize into a 32-byte hash. Does not change this object's value. */
130
    void Finalize(uint256& out) noexcept;
131
132
    SERIALIZE_METHODS(MuHash3072, obj)
133
0
    {
134
0
        READWRITE(obj.m_numerator);
135
0
        READWRITE(obj.m_denominator);
136
0
    }
Unexecuted instantiation: _ZN10MuHash307216SerializationOpsI10DataStreamS_17ActionUnserializeEEvRT0_RT_T1_
Unexecuted instantiation: _ZN10MuHash307216SerializationOpsI10DataStreamKS_15ActionSerializeEEvRT0_RT_T1_
137
};
138
139
#endif // BITCOIN_CRYPTO_MUHASH_H