Skip to content

Commit 55cf34a

Browse files
committed
Merge bitcoin/bitcoin#30047: refactor: Model the bech32 charlimit as an Enum
7f3f6c6 refactor: replace hardcoded numbers (Lőrinc) 5676aec refactor: Model the bech32 charlimit as an Enum (josibake) Pull request description: Broken out from #28122 --- Bech32(m) was defined with a 90 character limit so that certain guarantees for error detection could be made for segwit addresses (see https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#checksum-design). However, there is nothing about the encoding scheme itself that requires a limit of 90 and in practice bech32(m) is being used without the 90 char limit (e.g. lightning invoices, silent payments). Further, increasing the character limit doesn't do away with error detection, it simply changes the guarantee. The primary motivation for this change is for being able to parse BIP352 v0 silent payment addresses (see bitcoin/bitcoin@622c7a9), which require up to 118 characters. In addition to BIP352, modeling the character limit as an enum allows us to easily support new address types that use bech32m and specify their own character limit. ACKs for top commit: paplorinc: re-ACK 7f3f6c6 achow101: ACK 7f3f6c6 theuni: utACK 7f3f6c6 Tree-SHA512: 9c793d657448c1f795093b9f7d4d6dfa431598f48d54e1c899a69fb2f43aeb68b40ca2ff08864eefeeb6627d4171877234b5df0056ff2a2b84415bc3558bd280
2 parents 09fe143 + 7f3f6c6 commit 55cf34a

File tree

2 files changed

+41
-27
lines changed

2 files changed

+41
-27
lines changed

src/bech32.cpp

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ namespace
1919

2020
typedef std::vector<uint8_t> data;
2121

22+
/** The Bech32 and Bech32m checksum size */
23+
constexpr size_t CHECKSUM_SIZE = 6;
24+
2225
/** The Bech32 and Bech32m character set for encoding. */
2326
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
2427

@@ -308,18 +311,18 @@ bool CheckCharacters(const std::string& str, std::vector<int>& errors)
308311
return errors.empty();
309312
}
310313

311-
/** Expand a HRP for use in checksum computation. */
312-
data ExpandHRP(const std::string& hrp)
314+
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
313315
{
314316
data ret;
315-
ret.reserve(hrp.size() + 90);
316-
ret.resize(hrp.size() * 2 + 1);
317-
for (size_t i = 0; i < hrp.size(); ++i) {
318-
unsigned char c = hrp[i];
319-
ret[i] = c >> 5;
320-
ret[i + hrp.size() + 1] = c & 0x1f;
321-
}
322-
ret[hrp.size()] = 0;
317+
ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE);
318+
319+
/** Expand a HRP for use in checksum computation. */
320+
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
321+
ret.push_back(0);
322+
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
323+
324+
ret.insert(ret.end(), values.begin(), values.end());
325+
323326
return ret;
324327
}
325328

@@ -331,7 +334,8 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
331334
// list of values would result in a new valid list. For that reason, Bech32 requires the
332335
// resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
333336
// https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
334-
const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values));
337+
auto enc = PreparePolynomialCoefficients(hrp, values);
338+
const uint32_t check = PolyMod(enc);
335339
if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
336340
if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
337341
return Encoding::INVALID;
@@ -340,11 +344,11 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
340344
/** Create a checksum. */
341345
data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
342346
{
343-
data enc = Cat(ExpandHRP(hrp), values);
344-
enc.resize(enc.size() + 6); // Append 6 zeroes
347+
auto enc = PreparePolynomialCoefficients(hrp, values);
348+
enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
345349
uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
346-
data ret(6);
347-
for (size_t i = 0; i < 6; ++i) {
350+
data ret(CHECKSUM_SIZE);
351+
for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
348352
// Convert the 5-bit groups in mod to checksum values.
349353
ret[i] = (mod >> (5 * (5 - i))) & 31;
350354
}
@@ -370,11 +374,12 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values
370374
}
371375

372376
/** Decode a Bech32 or Bech32m string. */
373-
DecodeResult Decode(const std::string& str) {
377+
DecodeResult Decode(const std::string& str, CharLimit limit) {
374378
std::vector<int> errors;
375379
if (!CheckCharacters(str, errors)) return {};
376380
size_t pos = str.rfind('1');
377-
if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
381+
if (str.size() > limit) return {};
382+
if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
378383
return {};
379384
}
380385
data values(str.size() - 1 - pos);
@@ -393,16 +398,16 @@ DecodeResult Decode(const std::string& str) {
393398
}
394399
Encoding result = VerifyChecksum(hrp, values);
395400
if (result == Encoding::INVALID) return {};
396-
return {result, std::move(hrp), data(values.begin(), values.end() - 6)};
401+
return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
397402
}
398403

399404
/** Find index of an incorrect character in a Bech32 string. */
400-
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
405+
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) {
401406
std::vector<int> error_locations{};
402407

403-
if (str.size() > 90) {
404-
error_locations.resize(str.size() - 90);
405-
std::iota(error_locations.begin(), error_locations.end(), 90);
408+
if (str.size() > limit) {
409+
error_locations.resize(str.size() - limit);
410+
std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit));
406411
return std::make_pair("Bech32 string too long", std::move(error_locations));
407412
}
408413

@@ -414,7 +419,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
414419
if (pos == str.npos) {
415420
return std::make_pair("Missing separator", std::vector<int>{});
416421
}
417-
if (pos == 0 || pos + 7 > str.size()) {
422+
if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
418423
error_locations.push_back(pos);
419424
return std::make_pair("Invalid separator position", std::move(error_locations));
420425
}
@@ -441,9 +446,10 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
441446
std::optional<Encoding> error_encoding;
442447
for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
443448
std::vector<int> possible_errors;
444-
// Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
449+
// Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
445450
// over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
446-
uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
451+
auto enc = PreparePolynomialCoefficients(hrp, values);
452+
uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
447453

448454
// All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
449455
// constant) should be 0 - hence 0 indicates there are no errors present.

src/bech32.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ enum class Encoding {
2828
BECH32M, //!< Bech32m encoding as defined in BIP350
2929
};
3030

31+
/** Character limits for Bech32(m) encoded strings. Character limits are how we provide error location guarantees.
32+
* These values should never exceed 2^31 - 1 (max value for a 32-bit int), since there are places where we may need to
33+
* convert the CharLimit::VALUE to an int. In practice, this should never happen since this CharLimit applies to an address encoding
34+
* and we would never encode an address with such a massive value */
35+
enum CharLimit : size_t {
36+
BECH32 = 90, //!< BIP173/350 imposed character limit for Bech32(m) encoded addresses. This guarantees finding up to 4 errors.
37+
};
38+
3139
/** Encode a Bech32 or Bech32m string. If hrp contains uppercase characters, this will cause an
3240
* assertion error. Encoding must be one of BECH32 or BECH32M. */
3341
std::string Encode(Encoding encoding, const std::string& hrp, const std::vector<uint8_t>& values);
@@ -43,10 +51,10 @@ struct DecodeResult
4351
};
4452

4553
/** Decode a Bech32 or Bech32m string. */
46-
DecodeResult Decode(const std::string& str);
54+
DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::BECH32);
4755

4856
/** Return the positions of errors in a Bech32 string. */
49-
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str);
57+
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::BECH32);
5058

5159
} // namespace bech32
5260

0 commit comments

Comments
 (0)