From 5a400eb5230c6c507bb77a2cf4b9590d0d455a39 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Tue, 10 Jun 2025 14:15:57 +0200 Subject: [PATCH 01/28] Add Bytes.splice, an inplace variant of Buffer.slice --- .changeset/afraid-chicken-attack.md | 5 +++++ contracts/utils/Bytes.sol | 29 +++++++++++++++++++++++++++++ test/utils/Bytes.test.js | 8 +++++--- 3 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 .changeset/afraid-chicken-attack.md diff --git a/.changeset/afraid-chicken-attack.md b/.changeset/afraid-chicken-attack.md new file mode 100644 index 00000000000..9898087c0b5 --- /dev/null +++ b/.changeset/afraid-chicken-attack.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Bytes`: Add `splice(bytes,uint256)` and `splice(bytes,uint256,uint256)`, two "in place" variants of the existing slice functions diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index 1234b845513..cd8c7b41fa2 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -99,6 +99,35 @@ library Bytes { return result; } + /** + * @dev In place slice: moves the content of `buffer`, from `start` (included) to the end of `buffer` to the start of that buffer. + * + * NOTE: This function modifies the provided buffer in place. If you need to preserve the original buffer, use {slice} instead + */ + function splice(bytes memory buffer, uint256 start) internal pure returns (bytes memory) { + return splice(buffer, start, buffer.length); + } + + /** + * @dev In place slice: moves the content of `buffer`, from `start` (included) to end (excluded) to the start of that buffer. + * + * NOTE: This function modifies the provided buffer in place. If you need to preserve the original buffer, use {slice} instead + */ + function splice(bytes memory buffer, uint256 start, uint256 end) internal pure returns (bytes memory) { + // sanitize + uint256 length = buffer.length; + end = Math.min(end, length); + start = Math.min(start, end); + + // allocate and copy + assembly ("memory-safe") { + mcopy(add(buffer, 0x20), add(add(buffer, 0x20), start), sub(end, start)) + mstore(buffer, sub(end, start)) + } + + return buffer; + } + /** * @dev Reads a bytes32 from a bytes array without bounds checking. * diff --git a/test/utils/Bytes.test.js b/test/utils/Bytes.test.js index 52a1ae95e77..80caa7f8faa 100644 --- a/test/utils/Bytes.test.js +++ b/test/utils/Bytes.test.js @@ -56,8 +56,8 @@ describe('Bytes', function () { }); }); - describe('slice', function () { - describe('slice(bytes, uint256)', function () { + describe('slice & splice', function () { + describe('slice(bytes, uint256) & splice(bytes, uint256)', function () { for (const [descr, start] of Object.entries({ 'start = 0': 0, 'start within bound': 10, @@ -66,11 +66,12 @@ describe('Bytes', function () { it(descr, async function () { const result = ethers.hexlify(lorem.slice(start)); expect(await this.mock.$slice(lorem, start)).to.equal(result); + expect(await this.mock.$splice(lorem, start)).to.equal(result); }); } }); - describe('slice(bytes, uint256, uint256)', function () { + describe('slice(bytes, uint256, uint256) & splice(bytes, uint256, uint256)', function () { for (const [descr, [start, end]] of Object.entries({ 'start = 0': [0, 42], 'start and end within bound': [17, 42], @@ -81,6 +82,7 @@ describe('Bytes', function () { it(descr, async function () { const result = ethers.hexlify(lorem.slice(start, end)); expect(await this.mock.$slice(lorem, start, ethers.Typed.uint256(end))).to.equal(result); + expect(await this.mock.$splice(lorem, start, ethers.Typed.uint256(end))).to.equal(result); }); } }); From 65292d506c68ce441250ac5ba1fdfc0acfa97aa5 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Fri, 20 Jun 2025 23:15:00 +0200 Subject: [PATCH 02/28] Add Base58 library --- .changeset/loose-lamps-bake.md | 5 ++ contracts/utils/Base58.sol | 135 +++++++++++++++++++++++++++++++++ test/utils/Base58.t.sol | 16 ++++ test/utils/Base58.test.js | 26 +++++++ test/utils/Base64.test.js | 2 +- 5 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 .changeset/loose-lamps-bake.md create mode 100644 contracts/utils/Base58.sol create mode 100644 test/utils/Base58.t.sol create mode 100644 test/utils/Base58.test.js diff --git a/.changeset/loose-lamps-bake.md b/.changeset/loose-lamps-bake.md new file mode 100644 index 00000000000..bc4703817ae --- /dev/null +++ b/.changeset/loose-lamps-bake.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Base58`: Add a library for encoding and decoding bytes buffers into base58 strings. diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol new file mode 100644 index 00000000000..426d701bd98 --- /dev/null +++ b/contracts/utils/Base58.sol @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.24; + +import {Bytes} from "./Bytes.sol"; + +/** + * @dev Provides a set of functions to operate with Base58 strings. + * + * Based on the original https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[implementation of storyicon] (MIT). + */ +library Base58 { + using Bytes for bytes; + + string internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + + function encode(bytes memory data) internal pure returns (string memory) { + return string(_encode(data)); + } + + function decode(string memory data) internal pure returns (bytes memory) { + return _decode(bytes(data)); + } + + function _encode(bytes memory data) private pure returns (bytes memory) { + unchecked { + uint256 dataCLZ = _countLeading(data, 0x00); + uint256 slotLength = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; + + bytes memory slot = new bytes(slotLength); + uint256 end = slotLength; + for (uint256 i = 0; i < data.length; i++) { + uint256 ptr = slotLength; + for (uint256 carry = _mload8i(data, i); ptr > end || carry != 0; --ptr) { + carry += 256 * _mload8i(slot, ptr - 1); + _mstore8i(slot, ptr - 1, uint8(carry % 58)); + carry /= 58; + } + end = ptr; + } + + uint256 slotCLZ = _countLeading(slot, 0x00); + uint256 resultLength = slotLength + dataCLZ - slotCLZ; + + bytes memory cache = bytes(_TABLE); + for (uint256 i = 0; i < resultLength; ++i) { + uint256 idx = _mload8i(slot, i + slotCLZ - dataCLZ); + bytes1 c = _mload8(cache, idx); + _mstore8(slot, i, c); + } + + assembly ("memory-safe") { + mstore(slot, resultLength) + } + + return slot; + } + } + + function _decode(bytes memory data) private pure returns (bytes memory) { + unchecked { + uint256 b58Length = data.length; + + uint256 size = 2 * ((b58Length * 8351) / 6115 + 1); + bytes memory binu = new bytes(size); + + bytes memory cache = bytes(_TABLE); + uint32[] memory outi = new uint32[]((b58Length + 3) / 4); + for (uint256 i = 0; i < data.length; i++) { + bytes1 r = _mload8(data, i); + uint256 c = cache.indexOf(r); // can we avoid the loop here ? + require(c != type(uint256).max, "invalid base58 digit"); + for (uint256 k = outi.length; k > 0; --k) { + uint256 t = uint64(outi[k - 1]) * 58 + c; + c = t >> 32; + outi[k - 1] = uint32(t & 0xffffffff); + } + } + + uint256 ptr = 0; + uint256 mask = ((b58Length - 1) % 4) + 1; + for (uint256 j = 0; j < outi.length; ++j) { + while (mask > 0) { + --mask; + _mstore8(binu, ptr, bytes1(uint8(outi[j] >> (8 * mask)))); + ptr++; + } + mask = 4; + } + + uint256 dataCLZ = _countLeading(data, 0x31); + for (uint256 msb = dataCLZ; msb < binu.length; ++msb) { + if (_mload8(binu, msb) != 0x00) { + return binu.slice(msb - dataCLZ, ptr); + } + } + return binu.slice(0, ptr); + } + } + + function _mload8(bytes memory buffer, uint256 offset) private pure returns (bytes1 value) { + // This is not memory safe in the general case, but all calls to this private function are within bounds. + assembly ("memory-safe") { + value := mload(add(add(buffer, 0x20), offset)) + } + } + + function _mload8i(bytes memory buffer, uint256 offset) private pure returns (uint8 value) { + // This is not memory safe in the general case, but all calls to this private function are within bounds. + assembly ("memory-safe") { + value := shr(248, mload(add(add(buffer, 0x20), offset))) + } + } + + function _mstore8(bytes memory buffer, uint256 offset, bytes1 value) private pure { + // This is not memory safe in the general case, but all calls to this private function are within bounds. + assembly ("memory-safe") { + mstore8(add(add(buffer, 0x20), offset), shr(248, value)) + } + } + + function _mstore8i(bytes memory buffer, uint256 offset, uint8 value) private pure { + // This is not memory safe in the general case, but all calls to this private function are within bounds. + assembly ("memory-safe") { + mstore8(add(add(buffer, 0x20), offset), value) + } + } + + function _countLeading(bytes memory buffer, bytes1 el) private pure returns (uint256) { + uint256 length = buffer.length; + uint256 i = 0; + while (i < length && _mload8(buffer, i) == el) ++i; + return i; + } +} diff --git a/test/utils/Base58.t.sol b/test/utils/Base58.t.sol new file mode 100644 index 00000000000..7b3372c8d06 --- /dev/null +++ b/test/utils/Base58.t.sol @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: MIT + +pragma solidity ^0.8.20; + +import {Test} from "forge-std/Test.sol"; +import {Base58} from "@openzeppelin/contracts/utils/Base58.sol"; + +contract Base58Test is Test { + function testEncodeDecodeEmpty() external pure { + assertEq(Base58.decode(Base58.encode("")), ""); + } + + function testEncodeDecode(bytes memory input) external pure { + assertEq(Base58.decode(Base58.encode(input)), input); + } +} diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js new file mode 100644 index 00000000000..d0a9b6923a0 --- /dev/null +++ b/test/utils/Base58.test.js @@ -0,0 +1,26 @@ +const { ethers } = require('hardhat'); +const { expect } = require('chai'); +const { loadFixture } = require('@nomicfoundation/hardhat-network-helpers'); + +async function fixture() { + const mock = await ethers.deployContract('$Base58'); + return { mock }; +} + +describe('Base58', function () { + beforeEach(async function () { + Object.assign(this, await loadFixture(fixture)); + }); + + describe('base58', function () { + for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) // 512 runs out of gas + it(`Encode/Decode buffer of length ${length}`, async function () { + const buffer = ethers.randomBytes(length); + const hex = ethers.hexlify(buffer); + const b58 = ethers.encodeBase58(buffer); + + expect(await this.mock.$encode(hex)).to.equal(b58); + expect(await this.mock.$decode(b58)).to.equal(hex); + }); + }); +}); diff --git a/test/utils/Base64.test.js b/test/utils/Base64.test.js index 5c427466671..0e3885cbae4 100644 --- a/test/utils/Base64.test.js +++ b/test/utils/Base64.test.js @@ -11,7 +11,7 @@ async function fixture() { return { mock }; } -describe('Strings', function () { +describe('Base64', function () { beforeEach(async function () { Object.assign(this, await loadFixture(fixture)); }); From 99a1835157175d31c1e8eb0d78f20c74507fb96c Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Fri, 20 Jun 2025 23:17:51 +0200 Subject: [PATCH 03/28] docs --- contracts/mocks/Stateless.sol | 1 + contracts/utils/README.adoc | 3 +++ 2 files changed, 4 insertions(+) diff --git a/contracts/mocks/Stateless.sol b/contracts/mocks/Stateless.sol index 97e79085bfb..1cc0ccdb6f8 100644 --- a/contracts/mocks/Stateless.sol +++ b/contracts/mocks/Stateless.sol @@ -7,6 +7,7 @@ pragma solidity ^0.8.26; import {Address} from "../utils/Address.sol"; import {Arrays} from "../utils/Arrays.sol"; import {AuthorityUtils} from "../access/manager/AuthorityUtils.sol"; +import {Base58} from "../utils/Base58.sol"; import {Base64} from "../utils/Base64.sol"; import {BitMaps} from "../utils/structs/BitMaps.sol"; import {Blockhash} from "../utils/Blockhash.sol"; diff --git a/contracts/utils/README.adoc b/contracts/utils/README.adoc index 8640e56fa51..0149c9019a7 100644 --- a/contracts/utils/README.adoc +++ b/contracts/utils/README.adoc @@ -24,6 +24,7 @@ Miscellaneous contracts and libraries containing utility functions you can use t * {Create2}: Wrapper around the https://blog.openzeppelin.com/getting-the-most-out-of-create2/[`CREATE2` EVM opcode] for safe use without having to deal with low-level assembly. * {Address}: Collection of functions for overloading Solidity's https://docs.soliditylang.org/en/latest/types.html#address[`address`] type. * {Arrays}: Collection of functions that operate on https://docs.soliditylang.org/en/latest/types.html#arrays[`arrays`]. + * {Base58}: On-chain base58 encoding and decoding. * {Base64}: On-chain base64 and base64URL encoding according to https://datatracker.ietf.org/doc/html/rfc4648[RFC-4648]. * {Bytes}: Common operations on bytes objects. * {Calldata}: Helpers for manipulating calldata. @@ -105,6 +106,8 @@ Ethereum contracts have no native concept of an interface, so applications must {{Arrays}} +{{Base58}} + {{Base64}} {{Bytes}} From 88c03e7929d121039ed2c6c84d28c8e04ad508eb Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 15:28:41 +0200 Subject: [PATCH 04/28] Add Bytes.countConsecutive and Bytes.countLeading --- .changeset/thirty-pugs-pick.md | 5 ++++ contracts/utils/Base58.sol | 45 ++++++++++++---------------------- contracts/utils/Bytes.sol | 33 +++++++++++++++++++++++++ test/utils/Base58.t.sol | 8 ++++++ 4 files changed, 62 insertions(+), 29 deletions(-) create mode 100644 .changeset/thirty-pugs-pick.md diff --git a/.changeset/thirty-pugs-pick.md b/.changeset/thirty-pugs-pick.md new file mode 100644 index 00000000000..b86eac613ac --- /dev/null +++ b/.changeset/thirty-pugs-pick.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`Bytes`: add `countLeading` and `countConsecutive` diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 426d701bd98..4959dda2ee7 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -2,6 +2,7 @@ pragma solidity ^0.8.24; +import {SafeCast} from "./math/SafeCast.sol"; import {Bytes} from "./Bytes.sol"; /** @@ -10,6 +11,7 @@ import {Bytes} from "./Bytes.sol"; * Based on the original https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[implementation of storyicon] (MIT). */ library Base58 { + using SafeCast for bool; using Bytes for bytes; string internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; @@ -24,13 +26,13 @@ library Base58 { function _encode(bytes memory data) private pure returns (bytes memory) { unchecked { - uint256 dataCLZ = _countLeading(data, 0x00); - uint256 slotLength = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; + uint256 dataCLZ = data.countLeading(0x00); + uint256 length = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; + bytes memory slot = new bytes(length); - bytes memory slot = new bytes(slotLength); - uint256 end = slotLength; + uint256 end = length; for (uint256 i = 0; i < data.length; i++) { - uint256 ptr = slotLength; + uint256 ptr = length; for (uint256 carry = _mload8i(data, i); ptr > end || carry != 0; --ptr) { carry += 256 * _mload8i(slot, ptr - 1); _mstore8i(slot, ptr - 1, uint8(carry % 58)); @@ -39,18 +41,14 @@ library Base58 { end = ptr; } - uint256 slotCLZ = _countLeading(slot, 0x00); - uint256 resultLength = slotLength + dataCLZ - slotCLZ; + uint256 slotCLZ = slot.countLeading(0x00); + length -= slotCLZ - dataCLZ; + slot.splice(slotCLZ - dataCLZ); bytes memory cache = bytes(_TABLE); - for (uint256 i = 0; i < resultLength; ++i) { - uint256 idx = _mload8i(slot, i + slotCLZ - dataCLZ); - bytes1 c = _mload8(cache, idx); - _mstore8(slot, i, c); - } - - assembly ("memory-safe") { - mstore(slot, resultLength) + for (uint256 i = 0; i < length; ++i) { + // equivalent to `slot[i] = TABLE[slot[i]];` + _mstore8(slot, i, _mload8(cache, _mload8i(slot, i))); } return slot; @@ -88,13 +86,9 @@ library Base58 { mask = 4; } - uint256 dataCLZ = _countLeading(data, 0x31); - for (uint256 msb = dataCLZ; msb < binu.length; ++msb) { - if (_mload8(binu, msb) != 0x00) { - return binu.slice(msb - dataCLZ, ptr); - } - } - return binu.slice(0, ptr); + uint256 dataCLZ = data.countLeading(0x31); + uint256 msb = binu.countConsecutive(dataCLZ, 0x00); + return binu.splice(msb * (dataCLZ + msb < binu.length).toUint(), ptr); } } @@ -125,11 +119,4 @@ library Base58 { mstore8(add(add(buffer, 0x20), offset), value) } } - - function _countLeading(bytes memory buffer, bytes1 el) private pure returns (uint256) { - uint256 length = buffer.length; - uint256 i = 0; - while (i < length && _mload8(buffer, i) == el) ++i; - return i; - } } diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index cd8c7b41fa2..a3c0a1e2851 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -68,6 +68,39 @@ library Bytes { } } + /** + * @dev Count number of occurrences of `search` at the beginning of `buffer`. + */ + function countLeading(bytes memory buffer, bytes1 search) public pure returns (uint256) { + return countConsecutive(buffer, 0, search); + } + + /** + * @dev Count number of occurrences of `search` in `buffer`, starting from position `offset`. + */ + function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) public pure returns (uint256 i) { + assembly ("memory-safe") { + let chunk + let length := sub(mload(buffer), offset) + for { + i := 0 + } lt(i, length) { + i := add(i, 1) + } { + // every 32 bytes, load a new chunk + if iszero(mod(i, 0x20)) { + chunk := mload(add(buffer, add(0x20, add(offset, i)))) + } + // if the first byte of the chunk does not match the search element, exit + if shr(248, xor(chunk, search)) { + break + } + // shift chunk + chunk := shl(8, chunk) + } + } + } + /** * @dev Copies the content of `buffer`, from `start` (included) to the end of `buffer` into a new bytes object in * memory. diff --git a/test/utils/Base58.t.sol b/test/utils/Base58.t.sol index 7b3372c8d06..89b2923c169 100644 --- a/test/utils/Base58.t.sol +++ b/test/utils/Base58.t.sol @@ -10,6 +10,14 @@ contract Base58Test is Test { assertEq(Base58.decode(Base58.encode("")), ""); } + function testEncodeDecodeZeros() external pure { + bytes memory zeros = hex"0000000000000000"; + assertEq(Base58.decode(Base58.encode(zeros)), zeros); + + bytes memory almostZeros = hex"00000000a400000000"; + assertEq(Base58.decode(Base58.encode(almostZeros)), almostZeros); + } + function testEncodeDecode(bytes memory input) external pure { assertEq(Base58.decode(Base58.encode(input)), input); } From a3c4667f4f4a11033355c6b7291ef8d0e760fb35 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 15:34:04 +0200 Subject: [PATCH 05/28] fix --- contracts/utils/Bytes.sol | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index a3c0a1e2851..b09737dd957 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -71,14 +71,14 @@ library Bytes { /** * @dev Count number of occurrences of `search` at the beginning of `buffer`. */ - function countLeading(bytes memory buffer, bytes1 search) public pure returns (uint256) { + function countLeading(bytes memory buffer, bytes1 search) internal pure returns (uint256) { return countConsecutive(buffer, 0, search); } /** * @dev Count number of occurrences of `search` in `buffer`, starting from position `offset`. */ - function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) public pure returns (uint256 i) { + function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) internal pure returns (uint256 i) { assembly ("memory-safe") { let chunk let length := sub(mload(buffer), offset) From 41b586bec1f5dc0291b8489188af07bcbab43f86 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 16:12:43 +0200 Subject: [PATCH 06/28] efficient decoding --- contracts/utils/Base58.sol | 37 ++++++++++++++++++++++++------------- test/utils/Base58.test.js | 27 +++++++++++++++++++-------- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 4959dda2ee7..b70d9d97de3 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -14,7 +14,11 @@ library Base58 { using SafeCast for bool; using Bytes for bytes; - string internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + error InvalidBase56Digit(uint8); + + bytes internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + bytes internal constant _LOOKUP_TABLE = + hex"000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff161718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f30313233343536373839"; function encode(bytes memory data) internal pure returns (string memory) { return string(_encode(data)); @@ -45,7 +49,7 @@ library Base58 { length -= slotCLZ - dataCLZ; slot.splice(slotCLZ - dataCLZ); - bytes memory cache = bytes(_TABLE); + bytes memory cache = _TABLE; for (uint256 i = 0; i < length; ++i) { // equivalent to `slot[i] = TABLE[slot[i]];` _mstore8(slot, i, _mload8(cache, _mload8i(slot, i))); @@ -62,22 +66,29 @@ library Base58 { uint256 size = 2 * ((b58Length * 8351) / 6115 + 1); bytes memory binu = new bytes(size); - bytes memory cache = bytes(_TABLE); - uint32[] memory outi = new uint32[]((b58Length + 3) / 4); - for (uint256 i = 0; i < data.length; i++) { - bytes1 r = _mload8(data, i); - uint256 c = cache.indexOf(r); // can we avoid the loop here ? - require(c != type(uint256).max, "invalid base58 digit"); - for (uint256 k = outi.length; k > 0; --k) { - uint256 t = uint64(outi[k - 1]) * 58 + c; - c = t >> 32; - outi[k - 1] = uint32(t & 0xffffffff); + bytes memory cache = _LOOKUP_TABLE; + uint256 outiLength = (b58Length + 3) / 4; + // Note: allocating uint32[] would be enough, but solidity doesn't pack memory. + uint256[] memory outi = new uint256[](outiLength); + for (uint256 i = 0; i < data.length; ++i) { + // get b58 char + uint8 chr = _mload8i(data, i); + require(chr > 48 && chr < 123, InvalidBase56Digit(chr)); + + // decode b58 char + uint256 carry = _mload8i(cache, chr - 49); + require(carry < 58, InvalidBase56Digit(chr)); + + for (uint256 j = outiLength; j > 0; --j) { + uint256 value = carry + 58 * outi[j - 1]; + carry = value >> 32; + outi[j - 1] = value & 0xffffffff; } } uint256 ptr = 0; uint256 mask = ((b58Length - 1) % 4) + 1; - for (uint256 j = 0; j < outi.length; ++j) { + for (uint256 j = 0; j < outiLength; ++j) { while (mask > 0) { --mask; _mstore8(binu, ptr, bytes1(uint8(outi[j] >> (8 * mask)))); diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index d0a9b6923a0..216eb36ca3e 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -13,14 +13,25 @@ describe('Base58', function () { }); describe('base58', function () { - for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) // 512 runs out of gas - it(`Encode/Decode buffer of length ${length}`, async function () { - const buffer = ethers.randomBytes(length); - const hex = ethers.hexlify(buffer); - const b58 = ethers.encodeBase58(buffer); + describe('encode/decode', function () { + for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) // 512 runs out of gas + it(`buffer of length ${length}`, async function () { + const buffer = ethers.randomBytes(length); + const hex = ethers.hexlify(buffer); + const b58 = ethers.encodeBase58(buffer); - expect(await this.mock.$encode(hex)).to.equal(b58); - expect(await this.mock.$decode(b58)).to.equal(hex); - }); + expect(await this.mock.$encode(hex)).to.equal(b58); + expect(await this.mock.$decode(b58)).to.equal(hex); + }); + }); + + describe('decode invalid format', function () { + for (const chr of ['I', '-', '~']) + it(`Invalid base58 char ${chr}`, async function () { + await expect(this.mock.$decode(`VYRWKp${chr}pnN7`)) + .to.be.revertedWithCustomError(this.mock, 'InvalidBase56Digit') + .withArgs(chr.codePointAt(0)); + }); + }); }); }); From c6d6bdd13f9b9f034afb842fe0bc79fe2174dc7a Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 16:26:55 +0200 Subject: [PATCH 07/28] coverage --- contracts/utils/Bytes.sol | 7 +++++-- test/utils/Base58.test.js | 4 ++-- test/utils/Base64.test.js | 12 ++++++------ test/utils/Bytes.test.js | 29 +++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index b09737dd957..768cb7f03db 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -79,12 +79,15 @@ library Bytes { * @dev Count number of occurrences of `search` in `buffer`, starting from position `offset`. */ function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) internal pure returns (uint256 i) { + uint256 length = buffer.length; + if (offset > length) return 0; + assembly ("memory-safe") { let chunk - let length := sub(mload(buffer), offset) + let end := sub(length, offset) for { i := 0 - } lt(i, length) { + } lt(i, end) { i := add(i, 1) } { // every 32 bytes, load a new chunk diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index 216eb36ca3e..66ead7fd923 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -20,8 +20,8 @@ describe('Base58', function () { const hex = ethers.hexlify(buffer); const b58 = ethers.encodeBase58(buffer); - expect(await this.mock.$encode(hex)).to.equal(b58); - expect(await this.mock.$decode(b58)).to.equal(hex); + await expect(this.mock.$encode(hex)).to.eventually.equal(b58); + await expect(this.mock.$decode(b58)).to.eventually.equal(hex); }); }); diff --git a/test/utils/Base64.test.js b/test/utils/Base64.test.js index 0e3885cbae4..008b6b634db 100644 --- a/test/utils/Base64.test.js +++ b/test/utils/Base64.test.js @@ -27,8 +27,8 @@ describe('Base64', function () { ]) it(title, async function () { const buffer = Buffer.from(input, 'ascii'); - expect(await this.mock.$encode(buffer)).to.equal(ethers.encodeBase64(buffer)); - expect(await this.mock.$encode(buffer)).to.equal(expected); + await expect(this.mock.$encode(buffer)).to.eventually.equal(ethers.encodeBase64(buffer)); + await expect(this.mock.$encode(buffer)).to.eventually.equal(expected); }); }); @@ -43,8 +43,8 @@ describe('Base64', function () { ]) it(title, async function () { const buffer = Buffer.from(input, 'ascii'); - expect(await this.mock.$encodeURL(buffer)).to.equal(base64toBase64Url(ethers.encodeBase64(buffer))); - expect(await this.mock.$encodeURL(buffer)).to.equal(expected); + await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(base64toBase64Url(ethers.encodeBase64(buffer))); + await expect(this.mock.$encodeURL(buffer)).to.eventually.equal(expected); }); }); @@ -53,7 +53,7 @@ describe('Base64', function () { const buffer32 = ethers.id('example'); const buffer31 = buffer32.slice(0, -2); - expect(await mock.encode(buffer31)).to.equal(ethers.encodeBase64(buffer31)); - expect(await mock.encode(buffer32)).to.equal(ethers.encodeBase64(buffer32)); + await expect(mock.encode(buffer31)).to.eventually.equal(ethers.encodeBase64(buffer31)); + await expect(mock.encode(buffer32)).to.eventually.equal(ethers.encodeBase64(buffer32)); }); }); diff --git a/test/utils/Bytes.test.js b/test/utils/Bytes.test.js index 80caa7f8faa..07682aec852 100644 --- a/test/utils/Bytes.test.js +++ b/test/utils/Bytes.test.js @@ -56,6 +56,35 @@ describe('Bytes', function () { }); }); + describe('countConsecutive', function () { + it('empty buffer', async function () { + await expect(this.mock.$countConsecutive('0x', 0, '0x00')).to.eventually.equal(0); + }); + + it('no occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4f678', 0, '0x00')).to.eventually.equal(0); + await expect(this.mock.$countConsecutive('0x000000', 0, '0x01')).to.eventually.equal(0); + }); + + it('single occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4f678', 0, '0xa4')).to.eventually.equal(1); + await expect(this.mock.$countConsecutive('0xa4f678', 1, '0xf6')).to.eventually.equal(1); + await expect(this.mock.$countConsecutive('0xa4f678', 2, '0x78')).to.eventually.equal(1); + }); + + it('multiple occurrence', async function () { + await expect(this.mock.$countConsecutive('0xa4a4f6f6f6f678', 0, '0xa4')).to.eventually.equal(2); + await expect(this.mock.$countConsecutive('0xa4a4f6f6f6f678', 2, '0xf6')).to.eventually.equal(4); + await expect(this.mock.$countConsecutive('0x78787878787878', 0, '0x78')).to.eventually.equal(7); + await expect(this.mock.$countConsecutive('0x78787878787878', 3, '0x78')).to.eventually.equal(4); + }); + + it('out of bound offset', async function () { + await expect(this.mock.$countConsecutive('0x000000', 3, '0x00')).to.eventually.equal(0); + await expect(this.mock.$countConsecutive('0x000000', 42, '0x00')).to.eventually.equal(0); + }); + }); + describe('slice & splice', function () { describe('slice(bytes, uint256) & splice(bytes, uint256)', function () { for (const [descr, start] of Object.entries({ From 48bf13b52b4cdbab543e70bed61855e3981ce7cb Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 16:28:30 +0200 Subject: [PATCH 08/28] Update thirty-pugs-pick.md --- .changeset/thirty-pugs-pick.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/thirty-pugs-pick.md b/.changeset/thirty-pugs-pick.md index b86eac613ac..955c449b620 100644 --- a/.changeset/thirty-pugs-pick.md +++ b/.changeset/thirty-pugs-pick.md @@ -2,4 +2,4 @@ 'openzeppelin-solidity': minor --- -`Bytes`: add `countLeading` and `countConsecutive` +`Bytes`: Add `countLeading` and `countConsecutive` From eebd51e3cee71e4e1fc75658d5d107492d361acb Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 17:23:01 +0200 Subject: [PATCH 09/28] docs --- contracts/utils/Base58.sol | 9 +++++++++ contracts/utils/Base64.sol | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index b70d9d97de3..521344ab4cc 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -16,14 +16,23 @@ library Base58 { error InvalidBase56Digit(uint8); + /** + * @dev Base58 encoding and decoding tables + */ bytes internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; bytes internal constant _LOOKUP_TABLE = hex"000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff161718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f30313233343536373839"; + /** + * @dev Encode a `bytes` buffer as a Base58 `string`. + */ function encode(bytes memory data) internal pure returns (string memory) { return string(_encode(data)); } + /** + * @dev Decode a Base58 `string` into a `bytes` buffer. + */ function decode(string memory data) internal pure returns (bytes memory) { return _decode(bytes(data)); } diff --git a/contracts/utils/Base64.sol b/contracts/utils/Base64.sol index c6ee6a524aa..1870cff00cd 100644 --- a/contracts/utils/Base64.sol +++ b/contracts/utils/Base64.sol @@ -8,21 +8,21 @@ pragma solidity ^0.8.20; */ library Base64 { /** - * @dev Base64 Encoding/Decoding Table + * @dev Base64 encoding table * See sections 4 and 5 of https://datatracker.ietf.org/doc/html/rfc4648 */ string internal constant _TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; string internal constant _TABLE_URL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; /** - * @dev Converts a `bytes` to its Bytes64 `string` representation. + * @dev Converts a `bytes` to its Base64 `string` representation. */ function encode(bytes memory data) internal pure returns (string memory) { return _encode(data, _TABLE, true); } /** - * @dev Converts a `bytes` to its Bytes64Url `string` representation. + * @dev Converts a `bytes` to its Base64Url `string` representation. * Output is not padded with `=` as specified in https://www.rfc-editor.org/rfc/rfc4648[rfc4648]. */ function encodeURL(bytes memory data) internal pure returns (string memory) { From 296a87ea10611ec09b373bb370cdac6326309288 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 17:25:21 +0200 Subject: [PATCH 10/28] pragma --- contracts/utils/Base58.sol | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 521344ab4cc..fd56f83c6a0 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT -pragma solidity ^0.8.24; +pragma solidity ^0.8.26; import {SafeCast} from "./math/SafeCast.sol"; import {Bytes} from "./Bytes.sol"; From 8c94acc39c3ea8940f4bd84e3ee516ea9feb07ad Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 17:26:09 +0200 Subject: [PATCH 11/28] pragma --- test/utils/Base58.t.sol | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/utils/Base58.t.sol b/test/utils/Base58.t.sol index 89b2923c169..f75a2d99436 100644 --- a/test/utils/Base58.t.sol +++ b/test/utils/Base58.t.sol @@ -1,6 +1,6 @@ // SPDX-License-Identifier: MIT -pragma solidity ^0.8.20; +pragma solidity ^0.8.26; import {Test} from "forge-std/Test.sol"; import {Base58} from "@openzeppelin/contracts/utils/Base58.sol"; From d09ebfaec0696432202663630a8dbdd24600d4ff Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sat, 21 Jun 2025 17:44:13 +0200 Subject: [PATCH 12/28] coverage --- test/utils/Base58.test.js | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index 66ead7fd923..d0401103038 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -14,15 +14,20 @@ describe('Base58', function () { describe('base58', function () { describe('encode/decode', function () { - for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) // 512 runs out of gas - it(`buffer of length ${length}`, async function () { - const buffer = ethers.randomBytes(length); - const hex = ethers.hexlify(buffer); - const b58 = ethers.encodeBase58(buffer); + // length 512 runs out of gas. + // this checks are very slow when running coverage, causing CI to timeout. + for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) + it( + [length > 32 && '[skip-on-coverage]', `buffer of length ${length}`].filter(Boolean).join(' '), + async function () { + const buffer = ethers.randomBytes(length); + const hex = ethers.hexlify(buffer); + const b58 = ethers.encodeBase58(buffer); - await expect(this.mock.$encode(hex)).to.eventually.equal(b58); - await expect(this.mock.$decode(b58)).to.eventually.equal(hex); - }); + await expect(this.mock.$encode(hex)).to.eventually.equal(b58); + await expect(this.mock.$decode(b58)).to.eventually.equal(hex); + }, + ); }); describe('decode invalid format', function () { From a25bd1109cd9233c2ae5d2ab2bbe2c11f58246a2 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sun, 22 Jun 2025 14:58:32 +0200 Subject: [PATCH 13/28] rewrite _encode in assembly --- contracts/utils/Base58.sol | 122 ++++++++++++++++++++++++++++++------- 1 file changed, 100 insertions(+), 22 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index fd56f83c6a0..db89258651d 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -37,34 +37,112 @@ library Base58 { return _decode(bytes(data)); } - function _encode(bytes memory data) private pure returns (bytes memory) { - unchecked { - uint256 dataCLZ = data.countLeading(0x00); - uint256 length = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; - bytes memory slot = new bytes(length); - - uint256 end = length; - for (uint256 i = 0; i < data.length; i++) { - uint256 ptr = length; - for (uint256 carry = _mload8i(data, i); ptr > end || carry != 0; --ptr) { - carry += 256 * _mload8i(slot, ptr - 1); - _mstore8i(slot, ptr - 1, uint8(carry % 58)); - carry /= 58; + function _encode(bytes memory data) private pure returns (bytes memory encoded) { + // For reference, solidity implementation + // unchecked { + // uint256 dataCLZ = data.countLeading(0x00); + // uint256 length = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; + // encoded = new bytes(length); + // uint256 end = length; + // for (uint256 i = 0; i < data.length; ++i) { + // uint256 ptr = length; + // for (uint256 carry = uint8(data[i]); ptr > end || carry != 0; --ptr) { + // carry += 256 * uint8(encoded[ptr - 1]); + // encoded[ptr - 1] = bytes1(uint8(carry % 58)); + // carry /= 58; + // } + // end = ptr; + // } + // uint256 encodedCLZ = encoded.countLeading(0x00); + // length -= encodedCLZ - dataCLZ; + // encoded.splice(encodedCLZ - dataCLZ); + // for (uint256 i = 0; i < length; ++i) { + // encoded[i] = _TABLE[uint8(encoded[i])]; + // } + // } + + // Assembly is ~50% cheaper for buffers of size 32. + assembly ("memory-safe") { + function clzBytes(ptr, length) -> i { + let chunk + for { + i := 0 + } lt(i, length) { + i := add(i, 1) + } { + // Every 32 bytes, load a new chunk + if iszero(mod(i, 0x20)) { + chunk := mload(add(ptr, i)) + } + // If the first byte of the chunk is not zero, break + if shr(248, chunk) { + break + } + // Shift chunk + chunk := shl(8, chunk) + } + } + + encoded := mload(0x40) + let dataLength := mload(data) + + // Count number of zero bytes at the beginning of `data` + let dataCLZ := clzBytes(add(data, 0x20), dataLength) + + // Initial encoding + let slotLength := add(add(dataCLZ, div(mul(sub(dataLength, dataCLZ), 8351), 6115)), 1) + + // Zero the encoded buffer + for { + let i := 0 + } lt(i, slotLength) { + i := add(i, 0x20) + } { + mstore(add(add(encoded, 0x20), i), 0) + } + + // Build the "slots" + for { + let i := 0 + let end := slotLength + } lt(i, dataLength) { + i := add(i, 1) + } { + let ptr := slotLength + for { + let carry := shr(248, mload(add(add(data, 0x20), i))) + } or(carry, lt(end, ptr)) { + ptr := sub(ptr, 1) + carry := div(carry, 58) + } { + carry := add(carry, mul(256, shr(248, mload(add(add(encoded, 0x1f), ptr))))) + mstore8(add(add(encoded, 0x1f), ptr), mod(carry, 58)) } - end = ptr; + end := ptr } - uint256 slotCLZ = slot.countLeading(0x00); - length -= slotCLZ - dataCLZ; - slot.splice(slotCLZ - dataCLZ); + // Count number of zero bytes at the beginning of slots + let slotCLZ := clzBytes(add(encoded, 0x20), slotLength) + + // Update length + let offset := sub(slotCLZ, dataCLZ) + let encodedLength := sub(slotLength, offset) + + // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. + mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") + mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") - bytes memory cache = _TABLE; - for (uint256 i = 0; i < length; ++i) { - // equivalent to `slot[i] = TABLE[slot[i]];` - _mstore8(slot, i, _mload8(cache, _mload8i(slot, i))); + for { + let i := 0 + } lt(i, encodedLength) { + i := add(i, 1) + } { + mstore8(add(add(encoded, 0x20), i), mload(shr(248, mload(add(add(encoded, 0x20), add(offset, i)))))) } - return slot; + // Store length and allocate memory + mstore(encoded, encodedLength) + mstore(0x40, add(add(encoded, 0x20), encodedLength)) } } From a4ce8c89db8b463382e7aa64394a0a93ff0c517c Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sun, 22 Jun 2025 21:44:23 +0200 Subject: [PATCH 14/28] more inline documentation --- contracts/utils/Base58.sol | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index db89258651d..1061c33292a 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -40,8 +40,8 @@ library Base58 { function _encode(bytes memory data) private pure returns (bytes memory encoded) { // For reference, solidity implementation // unchecked { - // uint256 dataCLZ = data.countLeading(0x00); - // uint256 length = dataCLZ + ((data.length - dataCLZ) * 8351) / 6115 + 1; + // uint256 dataLeadingZeros = data.countLeading(0x00); + // uint256 length = dataLeadingZeros + ((data.length - dataLeadingZeros) * 8351) / 6115 + 1; // encoded = new bytes(length); // uint256 end = length; // for (uint256 i = 0; i < data.length; ++i) { @@ -54,8 +54,8 @@ library Base58 { // end = ptr; // } // uint256 encodedCLZ = encoded.countLeading(0x00); - // length -= encodedCLZ - dataCLZ; - // encoded.splice(encodedCLZ - dataCLZ); + // length -= encodedCLZ - dataLeadingZeros; + // encoded.splice(encodedCLZ - dataLeadingZeros); // for (uint256 i = 0; i < length; ++i) { // encoded[i] = _TABLE[uint8(encoded[i])]; // } @@ -86,11 +86,12 @@ library Base58 { encoded := mload(0x40) let dataLength := mload(data) - // Count number of zero bytes at the beginning of `data` - let dataCLZ := clzBytes(add(data, 0x20), dataLength) + // Count number of zero bytes at the beginning of `data`. These are encoded using the same number of '1's + // at then beginning of the encoded string. + let dataLeadingZeros := clzBytes(add(data, 0x20), dataLength) - // Initial encoding - let slotLength := add(add(dataCLZ, div(mul(sub(dataLength, dataCLZ), 8351), 6115)), 1) + // Initial encoding length: 100% of zero bytes (zero prefix) + 138% of non zero bytes + 1 + let slotLength := add(add(div(mul(sub(dataLength, dataLeadingZeros), 138), 100), dataLeadingZeros), 1) // Zero the encoded buffer for { @@ -121,17 +122,19 @@ library Base58 { end := ptr } - // Count number of zero bytes at the beginning of slots - let slotCLZ := clzBytes(add(encoded, 0x20), slotLength) + // Count number of zero bytes at the beginning of slots. This is a pointer to the first non zero slot that + // contains the base58 data. This base58 data span over `slotLength-slotLeadingZeros` bytes. + let slotLeadingZeros := clzBytes(add(encoded, 0x20), slotLength) - // Update length - let offset := sub(slotCLZ, dataCLZ) + // Update length: `slotLength-slotLeadingZeros` of non-zero data plus `dataLeadingZeros` of zero prefix. + let offset := sub(slotLeadingZeros, dataLeadingZeros) let encodedLength := sub(slotLength, offset) // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") + // For each slot, use the table to obtain the corresponding base58 "digit". for { let i := 0 } lt(i, encodedLength) { @@ -140,7 +143,7 @@ library Base58 { mstore8(add(add(encoded, 0x20), i), mload(shr(248, mload(add(add(encoded, 0x20), add(offset, i)))))) } - // Store length and allocate memory + // Store length and allocate (reserve) memory mstore(encoded, encodedLength) mstore(0x40, add(add(encoded, 0x20), encodedLength)) } @@ -184,9 +187,9 @@ library Base58 { mask = 4; } - uint256 dataCLZ = data.countLeading(0x31); - uint256 msb = binu.countConsecutive(dataCLZ, 0x00); - return binu.splice(msb * (dataCLZ + msb < binu.length).toUint(), ptr); + uint256 dataLeadingZeros = data.countLeading(0x31); + uint256 msb = binu.countConsecutive(dataLeadingZeros, 0x00); + return binu.splice(msb * (dataLeadingZeros + msb < binu.length).toUint(), ptr); } } From 7474f2a5f93833bab696db4ee97f0e6419bf62da Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sun, 22 Jun 2025 22:13:01 +0200 Subject: [PATCH 15/28] test vectors --- test/utils/Base58.test.js | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index d0401103038..2bb99220f9b 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -13,7 +13,7 @@ describe('Base58', function () { }); describe('base58', function () { - describe('encode/decode', function () { + describe('encode/decode random buffers', function () { // length 512 runs out of gas. // this checks are very slow when running coverage, causing CI to timeout. for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384]) @@ -30,6 +30,24 @@ describe('Base58', function () { ); }); + describe('test vectors', function () { + for (const { raw, b58 } of [ + { raw: 'Hello World!', b58: '2NEpo7TZRRrLZSi2U' }, + { + raw: 'The quick brown fox jumps over the lazy dog.', + b58: 'USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z', + }, + { raw: '0x0000287fb4cd', b58: '11233QC4' }, + ]) + it(raw, async function () { + const buffer = (ethers.isHexString(raw) ? ethers.getBytes : ethers.toUtf8Bytes)(raw); + const hex = ethers.hexlify(buffer); + + await expect(this.mock.$encode(hex)).to.eventually.equal(b58); + await expect(this.mock.$decode(b58)).to.eventually.equal(hex); + }); + }); + describe('decode invalid format', function () { for (const chr of ['I', '-', '~']) it(`Invalid base58 char ${chr}`, async function () { From bef2e4f3a9299cfeac5b2a0db51d49020a7ab2a3 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sun, 22 Jun 2025 22:20:03 +0200 Subject: [PATCH 16/28] document --- contracts/utils/Base58.sol | 5 +++-- test/utils/Base58.test.js | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 1061c33292a..6d1118da143 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -8,7 +8,7 @@ import {Bytes} from "./Bytes.sol"; /** * @dev Provides a set of functions to operate with Base58 strings. * - * Based on the original https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[implementation of storyicon] (MIT). + * Based on https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[storyicon's implementation] (MIT). */ library Base58 { using SafeCast for bool; @@ -17,7 +17,8 @@ library Base58 { error InvalidBase56Digit(uint8); /** - * @dev Base58 encoding and decoding tables + * @dev Base58 encoding & decoding tables + * See sections 2 of https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 */ bytes internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; bytes internal constant _LOOKUP_TABLE = diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index 2bb99220f9b..4ed3938ce09 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -30,6 +30,8 @@ describe('Base58', function () { ); }); + // Tests case from section 5 of the (no longer active) Base58 Encoding Scheme RFC + // https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 describe('test vectors', function () { for (const { raw, b58 } of [ { raw: 'Hello World!', b58: '2NEpo7TZRRrLZSi2U' }, From ce1c5adabd03d8a69436a04e2dcc340b208768f0 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Sun, 22 Jun 2025 22:28:06 +0200 Subject: [PATCH 17/28] remove auxiliary utils --- contracts/utils/Base58.sol | 36 ++++-------------------------------- 1 file changed, 4 insertions(+), 32 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 6d1118da143..aff224f7746 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -163,11 +163,11 @@ library Base58 { uint256[] memory outi = new uint256[](outiLength); for (uint256 i = 0; i < data.length; ++i) { // get b58 char - uint8 chr = _mload8i(data, i); + uint8 chr = uint8(data[i]); require(chr > 48 && chr < 123, InvalidBase56Digit(chr)); // decode b58 char - uint256 carry = _mload8i(cache, chr - 49); + uint256 carry = uint8(cache[chr - 49]); require(carry < 58, InvalidBase56Digit(chr)); for (uint256 j = outiLength; j > 0; --j) { @@ -182,8 +182,8 @@ library Base58 { for (uint256 j = 0; j < outiLength; ++j) { while (mask > 0) { --mask; - _mstore8(binu, ptr, bytes1(uint8(outi[j] >> (8 * mask)))); - ptr++; + binu[ptr] = bytes1(uint8(outi[j] >> (8 * mask))); + ++ptr; } mask = 4; } @@ -193,32 +193,4 @@ library Base58 { return binu.splice(msb * (dataLeadingZeros + msb < binu.length).toUint(), ptr); } } - - function _mload8(bytes memory buffer, uint256 offset) private pure returns (bytes1 value) { - // This is not memory safe in the general case, but all calls to this private function are within bounds. - assembly ("memory-safe") { - value := mload(add(add(buffer, 0x20), offset)) - } - } - - function _mload8i(bytes memory buffer, uint256 offset) private pure returns (uint8 value) { - // This is not memory safe in the general case, but all calls to this private function are within bounds. - assembly ("memory-safe") { - value := shr(248, mload(add(add(buffer, 0x20), offset))) - } - } - - function _mstore8(bytes memory buffer, uint256 offset, bytes1 value) private pure { - // This is not memory safe in the general case, but all calls to this private function are within bounds. - assembly ("memory-safe") { - mstore8(add(add(buffer, 0x20), offset), shr(248, value)) - } - } - - function _mstore8i(bytes memory buffer, uint256 offset, uint8 value) private pure { - // This is not memory safe in the general case, but all calls to this private function are within bounds. - assembly ("memory-safe") { - mstore8(add(add(buffer, 0x20), offset), value) - } - } } From c33e933ab32ac5bd2fb8724e586ee0334f49bea4 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Mon, 23 Jun 2025 15:09:24 +0200 Subject: [PATCH 18/28] mload is actually cheaper than jump --- contracts/utils/Base58.sol | 16 ++-------------- contracts/utils/Bytes.sol | 22 ++++------------------ 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index aff224f7746..d6dbd683645 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -65,23 +65,11 @@ library Base58 { // Assembly is ~50% cheaper for buffers of size 32. assembly ("memory-safe") { function clzBytes(ptr, length) -> i { - let chunk for { i := 0 - } lt(i, length) { + } and(iszero(shr(248, mload(add(ptr, i)))), lt(i, length)) { i := add(i, 1) - } { - // Every 32 bytes, load a new chunk - if iszero(mod(i, 0x20)) { - chunk := mload(add(ptr, i)) - } - // If the first byte of the chunk is not zero, break - if shr(248, chunk) { - break - } - // Shift chunk - chunk := shl(8, chunk) - } + } {} } encoded := mload(0x40) diff --git a/contracts/utils/Bytes.sol b/contracts/utils/Bytes.sol index 768cb7f03db..b5d2f81c3aa 100644 --- a/contracts/utils/Bytes.sol +++ b/contracts/utils/Bytes.sol @@ -79,28 +79,14 @@ library Bytes { * @dev Count number of occurrences of `search` in `buffer`, starting from position `offset`. */ function countConsecutive(bytes memory buffer, uint256 offset, bytes1 search) internal pure returns (uint256 i) { - uint256 length = buffer.length; - if (offset > length) return 0; - + uint256 length = Math.saturatingSub(buffer.length, offset); assembly ("memory-safe") { - let chunk - let end := sub(length, offset) for { + let ptr := add(add(buffer, 0x20), offset) i := 0 - } lt(i, end) { + } and(iszero(shr(248, xor(mload(add(ptr, i)), search))), lt(i, length)) { i := add(i, 1) - } { - // every 32 bytes, load a new chunk - if iszero(mod(i, 0x20)) { - chunk := mload(add(buffer, add(0x20, add(offset, i)))) - } - // if the first byte of the chunk does not match the search element, exit - if shr(248, xor(chunk, search)) { - break - } - // shift chunk - chunk := shl(8, chunk) - } + } {} } } From 855a1c68402253341ca9c1e2311f76697958b376 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Mon, 23 Jun 2025 23:39:21 +0200 Subject: [PATCH 19/28] up --- contracts/utils/Base58.sol | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index d6dbd683645..6c5136ff83c 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -67,7 +67,7 @@ library Base58 { function clzBytes(ptr, length) -> i { for { i := 0 - } and(iszero(shr(248, mload(add(ptr, i)))), lt(i, length)) { + } and(iszero(byte(0, mload(add(ptr, i)))), lt(i, length)) { i := add(i, 1) } {} } @@ -100,12 +100,12 @@ library Base58 { } { let ptr := slotLength for { - let carry := shr(248, mload(add(add(data, 0x20), i))) + let carry := byte(0, mload(add(add(data, 0x20), i))) } or(carry, lt(end, ptr)) { ptr := sub(ptr, 1) carry := div(carry, 58) } { - carry := add(carry, mul(256, shr(248, mload(add(add(encoded, 0x1f), ptr))))) + carry := add(carry, mul(256, byte(0, mload(add(add(encoded, 0x1f), ptr))))) mstore8(add(add(encoded, 0x1f), ptr), mod(carry, 58)) } end := ptr @@ -129,7 +129,7 @@ library Base58 { } lt(i, encodedLength) { i := add(i, 1) } { - mstore8(add(add(encoded, 0x20), i), mload(shr(248, mload(add(add(encoded, 0x20), add(offset, i)))))) + mstore8(add(add(encoded, 0x20), i), mload(byte(0, mload(add(add(encoded, 0x20), add(offset, i)))))) } // Store length and allocate (reserve) memory From ec641c78449ffe2a89ba54cafd43b39d11505ccc Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Wed, 25 Jun 2025 10:51:55 +0200 Subject: [PATCH 20/28] Update contracts/utils/Base58.sol Co-authored-by: Vectorized --- contracts/utils/Base58.sol | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 6c5136ff83c..78c674f87e1 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -83,13 +83,7 @@ library Base58 { let slotLength := add(add(div(mul(sub(dataLength, dataLeadingZeros), 138), 100), dataLeadingZeros), 1) // Zero the encoded buffer - for { - let i := 0 - } lt(i, slotLength) { - i := add(i, 0x20) - } { - mstore(add(add(encoded, 0x20), i), 0) - } + calldatacopy(add(encoded, 0x20), calldatasize(), slotLength) // Build the "slots" for { From 7429bccd9f3707cd7f44765acde2e9c302619098 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 07:49:18 +0200 Subject: [PATCH 21/28] up --- contracts/utils/Base58.sol | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 78c674f87e1..2d02be7639d 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -65,9 +65,10 @@ library Base58 { // Assembly is ~50% cheaper for buffers of size 32. assembly ("memory-safe") { function clzBytes(ptr, length) -> i { + // for continues while `i < length` = 1 (true) and the byte at `ptr+1` to be 0 for { i := 0 - } and(iszero(byte(0, mload(add(ptr, i)))), lt(i, length)) { + } lt(byte(0, mload(add(ptr, i))), lt(i, length)) { i := add(i, 1) } {} } @@ -79,8 +80,8 @@ library Base58 { // at then beginning of the encoded string. let dataLeadingZeros := clzBytes(add(data, 0x20), dataLength) - // Initial encoding length: 100% of zero bytes (zero prefix) + 138% of non zero bytes + 1 - let slotLength := add(add(div(mul(sub(dataLength, dataLeadingZeros), 138), 100), dataLeadingZeros), 1) + // Initial encoding length: 100% of zero bytes (zero prefix) + ~137% of non zero bytes + 1 + let slotLength := add(add(div(mul(sub(dataLength, dataLeadingZeros), 8351), 6115), dataLeadingZeros), 32) // Zero the encoded buffer calldatacopy(add(encoded, 0x20), calldatasize(), slotLength) @@ -120,6 +121,13 @@ library Base58 { // For each slot, use the table to obtain the corresponding base58 "digit". for { let i := 0 + } lt(i, dataLeadingZeros) { + i := add(i, 32) + } { + mstore(add(add(encoded, 0x20), i), "11111111111111111111111111111111") + } + for { + let i := dataLeadingZeros } lt(i, encodedLength) { i := add(i, 1) } { From 45edb7683ec2c32ed3efd64e3f2a2758a0e6e632 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 10:56:42 +0200 Subject: [PATCH 22/28] do base58 arithmetics in chunks of 248 bits --- contracts/utils/Base58.sol | 134 +++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 57 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 2d02be7639d..faeebcff27a 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -62,81 +62,101 @@ library Base58 { // } // } - // Assembly is ~50% cheaper for buffers of size 32. - assembly ("memory-safe") { - function clzBytes(ptr, length) -> i { - // for continues while `i < length` = 1 (true) and the byte at `ptr+1` to be 0 - for { - i := 0 - } lt(byte(0, mload(add(ptr, i))), lt(i, length)) { - i := add(i, 1) - } {} - } - - encoded := mload(0x40) - let dataLength := mload(data) + uint256 dataLength = data.length; + if (dataLength == 0) return ""; + assembly ("memory-safe") { // Count number of zero bytes at the beginning of `data`. These are encoded using the same number of '1's // at then beginning of the encoded string. - let dataLeadingZeros := clzBytes(add(data, 0x20), dataLength) - - // Initial encoding length: 100% of zero bytes (zero prefix) + ~137% of non zero bytes + 1 - let slotLength := add(add(div(mul(sub(dataLength, dataLeadingZeros), 8351), 6115), dataLeadingZeros), 32) - - // Zero the encoded buffer - calldatacopy(add(encoded, 0x20), calldatasize(), slotLength) + let dataLeadingZeros := 0 + for {} lt(byte(0, mload(add(add(data, 0x20), dataLeadingZeros))), lt(dataLeadingZeros, dataLength)) {} { + dataLeadingZeros := add(dataLeadingZeros, 1) + } - // Build the "slots" + // Start the output offset by an over-estimate of the length. + let overEstimatedSlotLength := add( + dataLeadingZeros, + div(mul(sub(dataLength, dataLeadingZeros), 8351), 6115) + ) + // `scratch` this is going to be our workspace. Be leave enough room on the left to store length + encoded data. + let scratch := add(mload(0x40), add(overEstimatedSlotLength, 0x21)) + + // Cut the input buffer in section (limbs) of 31 bytes (248 bits) + let limbs := scratch + let ptr := limbs for { - let i := 0 - let end := slotLength + // first section is possibly smaller than 31 bytes + let i := mod(dataLength, 31) + // unfold first loop, with a different shift. + if i { + mstore(ptr, shr(mul(sub(32, i), 8), mload(add(data, 0x20)))) + ptr := add(ptr, 0x20) + } } lt(i, dataLength) { - i := add(i, 1) + ptr := add(ptr, 0x20) // next limb + i := add(i, 31) // move in buffer } { - let ptr := slotLength - for { - let carry := byte(0, mload(add(add(data, 0x20), i))) - } or(carry, lt(end, ptr)) { - ptr := sub(ptr, 1) - carry := div(carry, 58) - } { - carry := add(carry, mul(256, byte(0, mload(add(add(encoded, 0x1f), ptr))))) - mstore8(add(add(encoded, 0x1f), ptr), mod(carry, 58)) - } - end := ptr + // Load 32 bytes from the input buffer and shift to only keep the 31 leftmost. + mstore(ptr, shr(8, mload(add(add(data, 0x20), i)))) } - // Count number of zero bytes at the beginning of slots. This is a pointer to the first non zero slot that - // contains the base58 data. This base58 data span over `slotLength-slotLeadingZeros` bytes. - let slotLeadingZeros := clzBytes(add(encoded, 0x20), slotLength) - - // Update length: `slotLength-slotLeadingZeros` of non-zero data plus `dataLeadingZeros` of zero prefix. - let offset := sub(slotLeadingZeros, dataLeadingZeros) - let encodedLength := sub(slotLength, offset) - // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") - // For each slot, use the table to obtain the corresponding base58 "digit". + // Put sentinel after limbs for faster looping. Since limbs are 248bits, 0xFF..FF + // cannot be confused with an actual limb. + mstore(ptr, not(0)) + + // Encoding the "data" part of the result. + // `encoded` point the the left part of the encoded string. we start from scratch, which means we have + // overEstimatedSlotLength bytes to work with before hitting the FMP for { - let i := 0 - } lt(i, dataLeadingZeros) { - i := add(i, 32) - } { - mstore(add(add(encoded, 0x20), i), "11111111111111111111111111111111") + encoded := scratch + } 1 {} { + // find location of the first non-zero limb + let i := limbs + for {} iszero(mload(i)) { + i := add(i, 0x20) + } {} + + // if that is the sentinel limb (0xFF..FF), we are done + if iszero(not(mload(i))) { + break + } + + // base 58 arithmetics on the 248bits limbs + let carry := 0 + for { + i := limbs + } lt(i, ptr) { + i := add(i, 0x20) + } { + let acc := add(shl(248, carry), mload(i)) + mstore(i, div(acc, 58)) + carry := mod(acc, 58) + } + + encoded := sub(encoded, 1) + mstore8(encoded, mload(carry)) } + + // Write the data leading zeros at the left of the encoded. + // Write the data leading zeros at the left of the encoded. + // This will spill to the left into the "length" of the buffer. for { - let i := dataLeadingZeros - } lt(i, encodedLength) { - i := add(i, 1) - } { - mstore8(add(add(encoded, 0x20), i), mload(byte(0, mload(add(add(encoded, 0x20), add(offset, i)))))) + let j := 0 + } lt(j, dataLeadingZeros) {} { + j := add(j, 0x20) + mstore(sub(encoded, j), "11111111111111111111111111111111") } - // Store length and allocate (reserve) memory - mstore(encoded, encodedLength) - mstore(0x40, add(add(encoded, 0x20), encodedLength)) + // Move encoded pointer to account for dataLeadingZeros + encoded := sub(encoded, add(dataLeadingZeros, 0x20)) + + // // Store length and allocate (reserve) memory + mstore(encoded, sub(scratch, add(encoded, 0x20))) + mstore(0x40, scratch) } } From 20f36118feb418353437d7c9611bb4c3110646c4 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 11:09:59 +0200 Subject: [PATCH 23/28] update --- contracts/utils/Base58.sol | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index faeebcff27a..a192a0c5ede 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -78,12 +78,11 @@ library Base58 { dataLeadingZeros, div(mul(sub(dataLength, dataLeadingZeros), 8351), 6115) ) - // `scratch` this is going to be our workspace. Be leave enough room on the left to store length + encoded data. + // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded data. let scratch := add(mload(0x40), add(overEstimatedSlotLength, 0x21)) - // Cut the input buffer in section (limbs) of 31 bytes (248 bits) - let limbs := scratch - let ptr := limbs + // Cut the input buffer in section (limbs) of 31 bytes (248 bits). Store in scratch. + let ptr := scratch for { // first section is possibly smaller than 31 bytes let i := mod(dataLength, 31) @@ -104,31 +103,25 @@ library Base58 { mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") - // Put sentinel after limbs for faster looping. Since limbs are 248bits, 0xFF..FF - // cannot be confused with an actual limb. - mstore(ptr, not(0)) - // Encoding the "data" part of the result. // `encoded` point the the left part of the encoded string. we start from scratch, which means we have // overEstimatedSlotLength bytes to work with before hitting the FMP for { encoded := scratch } 1 {} { - // find location of the first non-zero limb - let i := limbs - for {} iszero(mload(i)) { + // check if there are non-zero limbs remaining + let i := scratch + for {} and(iszero(mload(i)), lt(i, ptr)) { i := add(i, 0x20) } {} - - // if that is the sentinel limb (0xFF..FF), we are done - if iszero(not(mload(i))) { + if eq(i, ptr) { break } // base 58 arithmetics on the 248bits limbs let carry := 0 for { - i := limbs + i := scratch } lt(i, ptr) { i := add(i, 0x20) } { @@ -137,11 +130,11 @@ library Base58 { carry := mod(acc, 58) } + // encoded carry using base58 table, and add it to the output encoded := sub(encoded, 1) mstore8(encoded, mload(carry)) } - // Write the data leading zeros at the left of the encoded. // Write the data leading zeros at the left of the encoded. // This will spill to the left into the "length" of the buffer. for { @@ -154,7 +147,7 @@ library Base58 { // Move encoded pointer to account for dataLeadingZeros encoded := sub(encoded, add(dataLeadingZeros, 0x20)) - // // Store length and allocate (reserve) memory + // Store length and allocate (reserve) memory up to scratch. mstore(encoded, sub(scratch, add(encoded, 0x20))) mstore(0x40, scratch) } From 8e60a996cc9c0967c45224d10b4cf0bc9906091e Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 18:59:09 +0200 Subject: [PATCH 24/28] codespell --- contracts/utils/Base58.sol | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index a192a0c5ede..47467cbe507 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -16,11 +16,6 @@ library Base58 { error InvalidBase56Digit(uint8); - /** - * @dev Base58 encoding & decoding tables - * See sections 2 of https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 - */ - bytes internal constant _TABLE = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; bytes internal constant _LOOKUP_TABLE = hex"000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff161718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f30313233343536373839"; @@ -39,29 +34,6 @@ library Base58 { } function _encode(bytes memory data) private pure returns (bytes memory encoded) { - // For reference, solidity implementation - // unchecked { - // uint256 dataLeadingZeros = data.countLeading(0x00); - // uint256 length = dataLeadingZeros + ((data.length - dataLeadingZeros) * 8351) / 6115 + 1; - // encoded = new bytes(length); - // uint256 end = length; - // for (uint256 i = 0; i < data.length; ++i) { - // uint256 ptr = length; - // for (uint256 carry = uint8(data[i]); ptr > end || carry != 0; --ptr) { - // carry += 256 * uint8(encoded[ptr - 1]); - // encoded[ptr - 1] = bytes1(uint8(carry % 58)); - // carry /= 58; - // } - // end = ptr; - // } - // uint256 encodedCLZ = encoded.countLeading(0x00); - // length -= encodedCLZ - dataLeadingZeros; - // encoded.splice(encodedCLZ - dataLeadingZeros); - // for (uint256 i = 0; i < length; ++i) { - // encoded[i] = _TABLE[uint8(encoded[i])]; - // } - // } - uint256 dataLength = data.length; if (dataLength == 0) return ""; @@ -100,6 +72,7 @@ library Base58 { } // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. + // See sections 2 of https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") @@ -118,7 +91,7 @@ library Base58 { break } - // base 58 arithmetics on the 248bits limbs + // base 58 arithmetic on the 248bits limbs let carry := 0 for { i := scratch From dd8e895d54a8f5890b1eaf46d5c3101bf934104d Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 19:43:28 +0200 Subject: [PATCH 25/28] decode assembly --- contracts/utils/Base58.sol | 204 +++++++++++++++++++++---------------- 1 file changed, 119 insertions(+), 85 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 47467cbe507..09cd56cf370 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -1,86 +1,74 @@ // SPDX-License-Identifier: MIT -pragma solidity ^0.8.26; - -import {SafeCast} from "./math/SafeCast.sol"; -import {Bytes} from "./Bytes.sol"; +pragma solidity ^0.8.20; /** * @dev Provides a set of functions to operate with Base58 strings. * - * Based on https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[storyicon's implementation] (MIT). + * Initially based on https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[storyicon's implementation] (MIT). + * Based on the updated and improved https://github.com/Vectorized/solady/blob/main/src/utils/Base58.sol[Vectorized version] (MIT). */ library Base58 { - using SafeCast for bool; - using Bytes for bytes; - - error InvalidBase56Digit(uint8); - - bytes internal constant _LOOKUP_TABLE = - hex"000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff161718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f30313233343536373839"; - /** * @dev Encode a `bytes` buffer as a Base58 `string`. */ - function encode(bytes memory data) internal pure returns (string memory) { - return string(_encode(data)); + function encode(bytes memory input) internal pure returns (string memory) { + return string(_encode(input)); } /** * @dev Decode a Base58 `string` into a `bytes` buffer. */ - function decode(string memory data) internal pure returns (bytes memory) { - return _decode(bytes(data)); + function decode(string memory input) internal pure returns (bytes memory) { + return _decode(bytes(input)); } - function _encode(bytes memory data) private pure returns (bytes memory encoded) { - uint256 dataLength = data.length; - if (dataLength == 0) return ""; + function _encode(bytes memory input) private pure returns (bytes memory output) { + uint256 inputLength = input.length; + if (inputLength == 0) return ""; assembly ("memory-safe") { - // Count number of zero bytes at the beginning of `data`. These are encoded using the same number of '1's + // Count number of zero bytes at the beginning of `input`. These are encoded using the same number of '1's // at then beginning of the encoded string. - let dataLeadingZeros := 0 - for {} lt(byte(0, mload(add(add(data, 0x20), dataLeadingZeros))), lt(dataLeadingZeros, dataLength)) {} { - dataLeadingZeros := add(dataLeadingZeros, 1) + let inputLeadingZeros := 0 + for {} lt(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), lt(inputLeadingZeros, inputLength)) {} { + inputLeadingZeros := add(inputLeadingZeros, 1) } // Start the output offset by an over-estimate of the length. - let overEstimatedSlotLength := add( - dataLeadingZeros, - div(mul(sub(dataLength, dataLeadingZeros), 8351), 6115) - ) - // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded data. - let scratch := add(mload(0x40), add(overEstimatedSlotLength, 0x21)) + let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 8351), 6115)) + + // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded input. + let scratch := add(mload(0x40), add(outputLengthEstim, 0x21)) // Cut the input buffer in section (limbs) of 31 bytes (248 bits). Store in scratch. let ptr := scratch for { // first section is possibly smaller than 31 bytes - let i := mod(dataLength, 31) + let i := mod(inputLength, 31) // unfold first loop, with a different shift. if i { - mstore(ptr, shr(mul(sub(32, i), 8), mload(add(data, 0x20)))) + mstore(ptr, shr(mul(sub(32, i), 8), mload(add(input, 0x20)))) ptr := add(ptr, 0x20) } - } lt(i, dataLength) { + } lt(i, inputLength) { ptr := add(ptr, 0x20) // next limb i := add(i, 31) // move in buffer } { // Load 32 bytes from the input buffer and shift to only keep the 31 leftmost. - mstore(ptr, shr(8, mload(add(add(data, 0x20), i)))) + mstore(ptr, shr(8, mload(add(add(input, 0x20), i)))) } // Store the encoding table. This overlaps with the FMP that we are going to reset later anyway. - // See sections 2 of https://datatracker.ietf.org/doc/html/draft-msporny-base58-03 + // See sections 2 of https://inputtracker.ietf.org/doc/html/draft-msporny-base58-03 mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY") mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz") - // Encoding the "data" part of the result. - // `encoded` point the the left part of the encoded string. we start from scratch, which means we have - // overEstimatedSlotLength bytes to work with before hitting the FMP + // Encoding the "input" part of the result. + // `output` point the the left part of the encoded string. we start from scratch, which means we have + // outputLengthEstim bytes to work with before hitting the FMP for { - encoded := scratch + output := scratch } 1 {} { // check if there are non-zero limbs remaining let i := scratch @@ -103,70 +91,116 @@ library Base58 { carry := mod(acc, 58) } - // encoded carry using base58 table, and add it to the output - encoded := sub(encoded, 1) - mstore8(encoded, mload(carry)) + // encode carry using base58 table, and add it to the output + output := sub(output, 1) + mstore8(output, mload(carry)) } - // Write the data leading zeros at the left of the encoded. + // Write the input leading zeros at the left of the encoded. // This will spill to the left into the "length" of the buffer. for { - let j := 0 - } lt(j, dataLeadingZeros) {} { - j := add(j, 0x20) - mstore(sub(encoded, j), "11111111111111111111111111111111") + let i := 0 + } lt(i, inputLeadingZeros) {} { + i := add(i, 0x20) + mstore(sub(output, i), "11111111111111111111111111111111") } - // Move encoded pointer to account for dataLeadingZeros - encoded := sub(encoded, add(dataLeadingZeros, 0x20)) + // Move output pointer to account for inputLeadingZeros + output := sub(output, add(inputLeadingZeros, 0x20)) // Store length and allocate (reserve) memory up to scratch. - mstore(encoded, sub(scratch, add(encoded, 0x20))) + mstore(output, sub(scratch, add(output, 0x20))) mstore(0x40, scratch) } } - function _decode(bytes memory data) private pure returns (bytes memory) { - unchecked { - uint256 b58Length = data.length; - - uint256 size = 2 * ((b58Length * 8351) / 6115 + 1); - bytes memory binu = new bytes(size); - - bytes memory cache = _LOOKUP_TABLE; - uint256 outiLength = (b58Length + 3) / 4; - // Note: allocating uint32[] would be enough, but solidity doesn't pack memory. - uint256[] memory outi = new uint256[](outiLength); - for (uint256 i = 0; i < data.length; ++i) { - // get b58 char - uint8 chr = uint8(data[i]); - require(chr > 48 && chr < 123, InvalidBase56Digit(chr)); - - // decode b58 char - uint256 carry = uint8(cache[chr - 49]); - require(carry < 58, InvalidBase56Digit(chr)); - - for (uint256 j = outiLength; j > 0; --j) { - uint256 value = carry + 58 * outi[j - 1]; - carry = value >> 32; - outi[j - 1] = value & 0xffffffff; + function _decode(bytes memory input) private pure returns (bytes memory output) { + uint256 inputLength = input.length; + if (inputLength == 0) return ""; + + /// @solidity memory-safe-assembly + assembly { + let inputLeadingZeros := 0 // Number of leading '1' in `input`. + // Count leading zeros. In base58, zeros are represented using '1' (chr(49)). + for {} and( + eq(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), 49), + lt(inputLeadingZeros, inputLength) + ) {} { + inputLeadingZeros := add(inputLeadingZeros, 1) + } + + // Start the output offset by an over-estimate of the length. + let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 6115), 8351)) + + // This is going to be our "scratch" workspace. Be leave enough room on the left to store length + encoded input. + let scratch := add(mload(0x40), add(outputLengthEstim, 0x21)) + + // Store the decoding table. This overlaps with the FMP that we are going to reset later anyway. + mstore(0x2a, 0x30313233343536373839) + mstore(0x20, 0x1718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f) + mstore(0x00, 0x000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff16) + + // decode function + function decodeChr(chr) -> decoded { + if or(lt(chr, 49), gt(chr, 122)) { + revert(0, 0) + } + decoded := byte(0, mload(sub(chr, 49))) + if gt(decoded, 57) { + revert(0, 0) } } - uint256 ptr = 0; - uint256 mask = ((b58Length - 1) % 4) + 1; - for (uint256 j = 0; j < outiLength; ++j) { - while (mask > 0) { - --mask; - binu[ptr] = bytes1(uint8(outi[j] >> (8 * mask))); - ++ptr; + // Decode each char of the input string, and stored that in section (limbs) of 31 bytes. Store in scratch. + let ptr := scratch + let mask := shr(8, not(0)) + for { + let j := 0 + } lt(j, inputLength) { + j := add(j, 1) + } { + // for each char, decode it ... + let carry := decodeChr(byte(0, mload(add(add(input, 0x20), j)))) + // ... and add it to the limbs starting a `scratch` + for { + let i := scratch + } lt(i, ptr) { + i := add(i, 0x20) + } { + let acc := add(carry, mul(58, mload(i))) + mstore(i, and(mask, acc)) + carry := shr(248, acc) } - mask = 4; + // If the char just read result in a leftover carry, extend the limbs with the new value + if carry { + mstore(ptr, carry) + ptr := add(ptr, 0x20) + } + } + + // Copy and compact the uint248 limbs + remove any zeros at the beginning. + output := scratch + for { + let i := scratch + } lt(i, ptr) { + i := add(i, 0x20) + } { + output := sub(output, 31) + mstore(sub(output, 1), mload(i)) + } + for {} lt(byte(0, mload(output)), lt(output, scratch)) {} { + output := add(output, 1) } - uint256 dataLeadingZeros = data.countLeading(0x31); - uint256 msb = binu.countConsecutive(dataLeadingZeros, 0x00); - return binu.splice(msb * (dataLeadingZeros + msb < binu.length).toUint(), ptr); + // Add the zeros that were encoded in the input (prefix '1's) + calldatacopy(sub(output, inputLeadingZeros), calldatasize(), inputLeadingZeros) + + // Move output pointer to account for inputLeadingZeros + output := sub(output, add(inputLeadingZeros, 0x20)) + + // Store length and allocate (reserve) memory up to scratch. + mstore(output, sub(scratch, add(output, 0x20))) + mstore(0x40, scratch) } } } From 45f04b4c800e784e0fcf26c778251f6ecd729e57 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 22:11:17 +0200 Subject: [PATCH 26/28] char valdity filter --- contracts/utils/Base58.sol | 24 ++++++++++++------------ test/utils/Base58.test.js | 4 +--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 09cd56cf370..1f3c031e051 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -9,6 +9,8 @@ pragma solidity ^0.8.20; * Based on the updated and improved https://github.com/Vectorized/solady/blob/main/src/utils/Base58.sol[Vectorized version] (MIT). */ library Base58 { + error InvalidBase56Digit(uint8); + /** * @dev Encode a `bytes` buffer as a Base58 `string`. */ @@ -115,6 +117,8 @@ library Base58 { } function _decode(bytes memory input) private pure returns (bytes memory output) { + bytes4 errorSelector = InvalidBase56Digit.selector; + uint256 inputLength = input.length; if (inputLength == 0) return ""; @@ -140,17 +144,6 @@ library Base58 { mstore(0x20, 0x1718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f) mstore(0x00, 0x000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff16) - // decode function - function decodeChr(chr) -> decoded { - if or(lt(chr, 49), gt(chr, 122)) { - revert(0, 0) - } - decoded := byte(0, mload(sub(chr, 49))) - if gt(decoded, 57) { - revert(0, 0) - } - } - // Decode each char of the input string, and stored that in section (limbs) of 31 bytes. Store in scratch. let ptr := scratch let mask := shr(8, not(0)) @@ -160,7 +153,14 @@ library Base58 { j := add(j, 1) } { // for each char, decode it ... - let carry := decodeChr(byte(0, mload(add(add(input, 0x20), j)))) + let c := sub(byte(0, mload(add(add(input, 0x20), j))), 49) + if iszero(and(shl(c, 1), 0x3fff7ff03ffbeff01ff)) { + mstore(0, errorSelector) + mstore(4, add(c, 49)) + revert(0, 0x24) + } + let carry := byte(0, mload(c)) + // ... and add it to the limbs starting a `scratch` for { let i := scratch diff --git a/test/utils/Base58.test.js b/test/utils/Base58.test.js index 4ed3938ce09..bb19fa90250 100644 --- a/test/utils/Base58.test.js +++ b/test/utils/Base58.test.js @@ -53,9 +53,7 @@ describe('Base58', function () { describe('decode invalid format', function () { for (const chr of ['I', '-', '~']) it(`Invalid base58 char ${chr}`, async function () { - await expect(this.mock.$decode(`VYRWKp${chr}pnN7`)) - .to.be.revertedWithCustomError(this.mock, 'InvalidBase56Digit') - .withArgs(chr.codePointAt(0)); + await expect(this.mock.$decode(`VYRWKp${chr}pnN7`)).to.be.reverted; }); }); }); From da84743637bc8f47395c94cd8bc7820aec3b9b37 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Thu, 26 Jun 2025 22:24:05 +0200 Subject: [PATCH 27/28] slither --- contracts/utils/Base58.sol | 1 + 1 file changed, 1 insertion(+) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 1f3c031e051..5823ca8e1ff 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -154,6 +154,7 @@ library Base58 { } { // for each char, decode it ... let c := sub(byte(0, mload(add(add(input, 0x20), j))), 49) + /// slither-disable-next-line incorrect-shift if iszero(and(shl(c, 1), 0x3fff7ff03ffbeff01ff)) { mstore(0, errorSelector) mstore(4, add(c, 49)) From c80f693125b5723ab6e4396b20f1d69def20dd25 Mon Sep 17 00:00:00 2001 From: Hadrien Croubois Date: Fri, 27 Jun 2025 09:57:57 +0200 Subject: [PATCH 28/28] slither --- contracts/utils/Base58.sol | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contracts/utils/Base58.sol b/contracts/utils/Base58.sol index 5823ca8e1ff..42a59902ff5 100644 --- a/contracts/utils/Base58.sol +++ b/contracts/utils/Base58.sol @@ -154,7 +154,7 @@ library Base58 { } { // for each char, decode it ... let c := sub(byte(0, mload(add(add(input, 0x20), j))), 49) - /// slither-disable-next-line incorrect-shift + // slither-disable-next-line incorrect-shift if iszero(and(shl(c, 1), 0x3fff7ff03ffbeff01ff)) { mstore(0, errorSelector) mstore(4, add(c, 49))