Skip to content

Commit 520965e

Browse files
l0rincmaflckoryanofskymartinustheuni
committed
optimization: bulk serialization reads in UndoRead, ReadBlock
The obfuscation (XOR) operations are currently done byte-by-byte during serialization. Buffering the reads will enable batching the obfuscation operations later. Different operating systems handle file caching differently, so reading larger batches (and processing them from memory) is measurably faster, likely because of fewer native fread calls and reduced lock contention. Note that `ReadRawBlock` doesn't need buffering since it already reads the whole block directly. Unlike `ReadBlockUndo`, the new `ReadBlock` implementation delegates to `ReadRawBlock`, which uses more memory than a buffered alternative but results in slightly simpler code and a small performance increase (~0.4%). This approach also clearly documents that `ReadRawBlock` is a logical subset of `ReadBlock` functionality. The current implementation, which iterates over a fixed-size buffer, provides a more general alternative to Cory Fields' solution of reading the entire block size in advance. Buffer sizes were selected based on benchmarking to ensure the buffered reader produces performance similar to reading the whole block into memory. Smaller buffers were slower, while larger ones showed diminishing returns. ------ > macOS Sequoia 15.3.1 > C++ compiler .......................... Clang 19.1.7 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=10000 Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 2,271,441.67 | 440.25 | 0.1% | 11.00 | `ReadBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,738,971.29 | 575.05 | 0.2% | 10.97 | `ReadBlockBench` ------ > Ubuntu 24.04.2 LTS > C++ compiler .......................... GNU 13.3.0 > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=20000 Before: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 6,895,987.11 | 145.01 | 0.0% | 71,055,269.86 | 23,977,374.37 | 2.963 | 5,074,828.78 | 0.4% | 22.00 | `ReadBlockBench` After: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 5,771,882.71 | 173.25 | 0.0% | 65,741,889.82 | 20,453,232.33 | 3.214 | 3,971,321.75 | 0.3% | 22.01 | `ReadBlockBench` Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> Co-authored-by: Ryan Ofsky <ryan@ofsky.org> Co-authored-by: Martin Leitner-Ankerl <martin.ankerl@gmail.com> Co-authored-by: Cory Fields <cory-nospam-@coryfields.com>
1 parent 056cb3c commit 520965e

File tree

3 files changed

+105
-7
lines changed

3 files changed

+105
-7
lines changed

src/node/blockstorage.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -660,11 +660,12 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index
660660
const FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())};
661661

662662
// Open history file to read
663-
AutoFile filein{OpenUndoFile(pos, true)};
664-
if (filein.IsNull()) {
663+
AutoFile file{OpenUndoFile(pos, true)};
664+
if (file.IsNull()) {
665665
LogError("OpenUndoFile failed for %s while reading block undo", pos.ToString());
666666
return false;
667667
}
668+
BufferedReader filein{std::move(file)};
668669

669670
try {
670671
// Read block
@@ -996,15 +997,14 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const
996997
block.SetNull();
997998

998999
// Open history file to read
999-
AutoFile filein{OpenBlockFile(pos, /*fReadOnly=*/true)};
1000-
if (filein.IsNull()) {
1001-
LogError("OpenBlockFile failed for %s while reading block", pos.ToString());
1000+
std::vector<uint8_t> block_data;
1001+
if (!ReadRawBlock(block_data, pos)) {
10021002
return false;
10031003
}
10041004

10051005
try {
10061006
// Read block
1007-
filein >> TX_WITH_WITNESS(block);
1007+
SpanReader{block_data} >> TX_WITH_WITNESS(block);
10081008
} catch (const std::exception& e) {
10091009
LogError("Deserialize or I/O error - %s at %s while reading block", e.what(), pos.ToString());
10101010
return false;

src/streams.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ class AutoFile
467467
}
468468
};
469469

470+
using DataBuffer = std::vector<std::byte>;
471+
470472
/** Wrapper around an AutoFile& that implements a ring buffer to
471473
* deserialize from. It guarantees the ability to rewind a given number of bytes.
472474
*
@@ -481,7 +483,7 @@ class BufferedFile
481483
uint64_t m_read_pos{0}; //!< how many bytes have been read from this
482484
uint64_t nReadLimit; //!< up to which position we're allowed to read
483485
uint64_t nRewind; //!< how many bytes we guarantee to rewind
484-
std::vector<std::byte> vchBuf; //!< the buffer
486+
DataBuffer vchBuf;
485487

486488
//! read data from the source to fill the buffer
487489
bool Fill() {
@@ -614,4 +616,46 @@ class BufferedFile
614616
}
615617
};
616618

619+
/**
620+
* Wrapper that buffers reads from an underlying stream.
621+
* Requires underlying stream to support read() and detail_fread() calls
622+
* to support fixed-size and variable-sized reads, respectively.
623+
*/
624+
template <typename S>
625+
class BufferedReader
626+
{
627+
S& m_src;
628+
DataBuffer m_buf;
629+
size_t m_buf_pos;
630+
631+
public:
632+
//! Requires stream ownership to prevent leaving the stream at an unexpected position after buffered reads.
633+
explicit BufferedReader(S&& stream LIFETIMEBOUND, size_t size = 1 << 16)
634+
requires std::is_rvalue_reference_v<S&&>
635+
: m_src{stream}, m_buf(size), m_buf_pos{size} {}
636+
637+
void read(std::span<std::byte> dst)
638+
{
639+
if (const auto available{std::min(dst.size(), m_buf.size() - m_buf_pos)}) {
640+
std::copy_n(m_buf.begin() + m_buf_pos, available, dst.begin());
641+
m_buf_pos += available;
642+
dst = dst.subspan(available);
643+
}
644+
if (dst.size()) {
645+
assert(m_buf_pos == m_buf.size());
646+
m_src.read(dst);
647+
648+
m_buf_pos = 0;
649+
m_buf.resize(m_src.detail_fread(m_buf));
650+
}
651+
}
652+
653+
template <typename T>
654+
BufferedReader& operator>>(T&& obj)
655+
{
656+
Unserialize(*this, obj);
657+
return *this;
658+
}
659+
};
660+
617661
#endif // BITCOIN_STREAMS_H

src/test/streams_tests.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// Distributed under the MIT software license, see the accompanying
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

5+
#include <flatfile.h>
6+
#include <node/blockstorage.h>
57
#include <streams.h>
68
#include <test/util/random.h>
79
#include <test/util/setup_common.h>
@@ -553,6 +555,58 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
553555
fs::remove(streams_test_filename);
554556
}
555557

558+
BOOST_AUTO_TEST_CASE(buffered_reader_matches_autofile_random_content)
559+
{
560+
const size_t file_size{1 + m_rng.randrange<size_t>(1 << 17)};
561+
const size_t buf_size{1 + m_rng.randrange(file_size)};
562+
const FlatFilePos pos{0, 0};
563+
564+
const FlatFileSeq test_file{m_args.GetDataDirBase(), "buffered_file_test_random", node::BLOCKFILE_CHUNK_SIZE};
565+
const std::vector obfuscation{m_rng.randbytes<std::byte>(8)};
566+
567+
// Write out the file with random content
568+
{
569+
AutoFile{test_file.Open(pos, /*read_only=*/false), obfuscation}.write(m_rng.randbytes<std::byte>(file_size));
570+
}
571+
BOOST_CHECK_EQUAL(fs::file_size(test_file.FileName(pos)), file_size);
572+
573+
{
574+
AutoFile direct_file{test_file.Open(pos, /*read_only=*/true), obfuscation};
575+
576+
AutoFile buffered_file{test_file.Open(pos, /*read_only=*/true), obfuscation};
577+
BufferedReader buffered_reader{std::move(buffered_file), buf_size};
578+
579+
for (size_t total_read{0}; total_read < file_size;) {
580+
const size_t read{Assert(std::min(1 + m_rng.randrange(m_rng.randbool() ? buf_size : 2 * buf_size), file_size - total_read))};
581+
582+
DataBuffer direct_file_buffer{read};
583+
direct_file.read(direct_file_buffer);
584+
585+
DataBuffer buffered_buffer{read};
586+
buffered_reader.read(buffered_buffer);
587+
588+
BOOST_CHECK_EQUAL_COLLECTIONS(
589+
direct_file_buffer.begin(), direct_file_buffer.end(),
590+
buffered_buffer.begin(), buffered_buffer.end()
591+
);
592+
593+
total_read += read;
594+
}
595+
596+
{
597+
DataBuffer excess_byte{1};
598+
BOOST_CHECK_EXCEPTION(direct_file.read(excess_byte), std::ios_base::failure, HasReason{"end of file"});
599+
}
600+
601+
{
602+
DataBuffer excess_byte{1};
603+
BOOST_CHECK_EXCEPTION(buffered_reader.read(excess_byte), std::ios_base::failure, HasReason{"end of file"});
604+
}
605+
}
606+
607+
fs::remove(test_file.FileName(pos));
608+
}
609+
556610
BOOST_AUTO_TEST_CASE(streams_hashed)
557611
{
558612
DataStream stream{};

0 commit comments

Comments
 (0)