Skip to content

Commit 3ff67f7

Browse files
committed
Merge bitcoin/bitcoin#19690: util: improve FindByte() performance
72efc26 util: improve streams.h:FindByte() performance (Larry Ruane) 604df63 [bench] add streams findbyte (gzhao408) Pull request description: This PR is strictly a performance improvement; there is no functional change. The `CBufferedFile::FindByte()` method searches for the next occurrence of the given byte in the file. Currently, this is done by explicitly inspecting each byte in turn. This PR takes advantage of `std::find()` to do the same more efficiently, improving its CPU runtime by a factor of about 25 in typical use. ACKs for top commit: achow101: re-ACK 72efc26 stickies-v: re-ACK 72efc26 Tree-SHA512: ddf0bff335cc8aa34f911aa4e0558fa77ce35d963d602e4ab1c63090b4a386faf074548daf06ee829c7f2c760d06eed0125cf4c34e981c6129cea1804eb3b719
2 parents e0a70c5 + 72efc26 commit 3ff67f7

File tree

6 files changed

+50
-8
lines changed

6 files changed

+50
-8
lines changed

src/Makefile.bench.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ bench_bench_bitcoin_SOURCES = \
4747
bench/rollingbloom.cpp \
4848
bench/rpc_blockchain.cpp \
4949
bench/rpc_mempool.cpp \
50+
bench/streams_findbyte.cpp \
5051
bench/strencodings.cpp \
5152
bench/util_time.cpp \
5253
bench/verify_script.cpp

src/bench/streams_findbyte.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) 2023 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <bench/bench.h>
6+
7+
#include <util/fs.h>
8+
#include <streams.h>
9+
10+
static void FindByte(benchmark::Bench& bench)
11+
{
12+
// Setup
13+
FILE* file = fsbridge::fopen("streams_tmp", "w+b");
14+
const size_t file_size = 200;
15+
uint8_t data[file_size] = {0};
16+
data[file_size-1] = 1;
17+
fwrite(&data, sizeof(uint8_t), file_size, file);
18+
rewind(file);
19+
CBufferedFile bf(file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size, 0, 0);
20+
21+
bench.run([&] {
22+
bf.SetPos(0);
23+
bf.FindByte(std::byte(1));
24+
});
25+
26+
// Cleanup
27+
bf.fclose();
28+
fs::remove("streams_tmp");
29+
}
30+
31+
BENCHMARK(FindByte, benchmark::PriorityLevel::HIGH);

src/streams.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -756,15 +756,25 @@ class CBufferedFile
756756
}
757757

758758
//! search for a given byte in the stream, and remain positioned on it
759-
void FindByte(uint8_t ch)
759+
void FindByte(std::byte byte)
760760
{
761+
// For best performance, avoid mod operation within the loop.
762+
size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))};
761763
while (true) {
762-
if (m_read_pos == nSrcPos)
764+
if (m_read_pos == nSrcPos) {
765+
// No more bytes available; read from the file into the buffer,
766+
// setting nSrcPos to one beyond the end of the new data.
767+
// Throws exception if end-of-file reached.
763768
Fill();
764-
if (vchBuf[m_read_pos % vchBuf.size()] == std::byte{ch}) {
765-
break;
766769
}
767-
m_read_pos++;
770+
const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)};
771+
const auto it_start{vchBuf.begin() + buf_offset};
772+
const auto it_find{std::find(it_start, it_start + len, byte)};
773+
const size_t inc{size_t(std::distance(it_start, it_find))};
774+
m_read_pos += inc;
775+
if (inc < len) break;
776+
buf_offset += inc;
777+
if (buf_offset >= vchBuf.size()) buf_offset = 0;
768778
}
769779
}
770780
};

src/test/fuzz/buffered_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ FUZZ_TARGET(buffered_file)
5353
return;
5454
}
5555
try {
56-
opt_buffered_file->FindByte(fuzzed_data_provider.ConsumeIntegral<uint8_t>());
56+
opt_buffered_file->FindByte(std::byte(fuzzed_data_provider.ConsumeIntegral<uint8_t>()));
5757
} catch (const std::ios_base::failure&) {
5858
}
5959
},

src/test/streams_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
463463
size_t find = currentPos + InsecureRandRange(8);
464464
if (find >= fileSize)
465465
find = fileSize - 1;
466-
bf.FindByte(uint8_t(find));
466+
bf.FindByte(std::byte(find));
467467
// The value at each offset is the offset.
468468
BOOST_CHECK_EQUAL(bf.GetPos(), find);
469469
currentPos = find;

src/validation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4565,7 +4565,7 @@ void Chainstate::LoadExternalBlockFile(
45654565
try {
45664566
// locate a header
45674567
unsigned char buf[CMessageHeader::MESSAGE_START_SIZE];
4568-
blkdat.FindByte(params.MessageStart()[0]);
4568+
blkdat.FindByte(std::byte(params.MessageStart()[0]));
45694569
nRewind = blkdat.GetPos() + 1;
45704570
blkdat >> buf;
45714571
if (memcmp(buf, params.MessageStart(), CMessageHeader::MESSAGE_START_SIZE)) {

0 commit comments

Comments
 (0)