Skip to content

Commit 7d0cd72

Browse files
authored
refactor: dispatch load to 16 threads when installing snapshot (#15381)
To reduce delay caused by installing snapshot on startup. The major work load is deserializing data, and this part of job is dispatched into 16 threads in this commit.
1 parent 1660104 commit 7d0cd72

File tree

4 files changed

+80
-29
lines changed

4 files changed

+80
-29
lines changed

Cargo.lock

Lines changed: 16 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ byteorder = "1.4.3"
142142
chrono = { version = "0.4.31", features = ["serde"] }
143143
chrono-tz = { version = "0.8", features = ["serde"] }
144144
clap = { version = "4.4.2", features = ["derive"] }
145-
dashmap = "5.4.0"
146145
derive_more = "0.99.17"
147146
enumflags2 = { version = "0.7.7", features = ["serde"] }
148147
ethnum = { git = "https://github.com/ariesdevil/ethnum-rs", rev = "4cb05f1" }
@@ -157,9 +156,7 @@ match-template = "0.0.1"
157156
metrics = "0.20.1"
158157
minitrace = { version = "0.6", features = ["enable"] }
159158
mysql_async = { version = "0.33", default-features = false, features = ["rustls-tls"] }
160-
once_cell = "1.15.0"
161159
ordered-float = { version = "4.1.0", default-features = false }
162-
parking_lot = "0.12.1"
163160
poem = { version = "~1.3.57", features = ["rustls", "multipart", "compression"] }
164161
prometheus-client = "0.22"
165162
rand = { version = "0.8.5", features = ["small_rng"] }
@@ -182,6 +179,12 @@ uuid = { version = "1.1.2", features = ["serde", "v4"] }
182179
walkdir = "2.3.2"
183180
derive-visitor = "0.3.0"
184181

182+
# Synchronization
183+
dashmap = "5.4.0"
184+
once_cell = "1.15.0"
185+
ordq = "0.2.0"
186+
parking_lot = "0.12.1"
187+
185188
# Future and async
186189
futures = "0.3.24"
187190
futures-async-stream = { version = "0.2.7" }

src/meta/raft-store/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@ futures = { workspace = true }
3737
futures-async-stream = { workspace = true }
3838
futures-util = { workspace = true }
3939
hostname = "0.3.1"
40+
itertools = { workspace = true }
4041
log = { workspace = true }
4142
maplit = "1.0.2"
4243
minitrace = { workspace = true }
4344
num = "0.4.0"
45+
ordq = { workspace = true }
4446
semver = { workspace = true }
4547
serde = { workspace = true }
4648
serde_json = { workspace = true }

src/meta/raft-store/src/sm_v002/sm_v002.rs

Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use std::fmt::Debug;
1616
use std::future;
1717
use std::io;
18+
use std::iter::repeat_with;
1819
use std::sync::Arc;
1920

2021
use databend_common_meta_kvapi::kvapi;
@@ -38,6 +39,7 @@ use databend_common_meta_types::UpsertKV;
3839
use futures::Stream;
3940
use futures_util::StreamExt;
4041
use futures_util::TryStreamExt;
42+
use itertools::Itertools;
4143
use log::debug;
4244
use log::info;
4345
use log::warn;
@@ -150,36 +152,51 @@ impl SMV002 {
150152

151153
let mut importer = sm_v002::SMV002::new_importer();
152154

153-
// AsyncBufReadExt::lines() is a bit slow.
154-
//
155-
// let br = BufReader::with_capacity(16 * 1024 * 1024, data);
156-
// let mut lines = AsyncBufReadExt::lines(br);
157-
// while let Some(l) = lines.next_line().await? {
158-
// let ent: RaftStoreEntry = serde_json::from_str(&l)
159-
// .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
160-
// importer.import(ent)?;
161-
// }
162-
163155
let f = data.into_std().await;
164156

165157
let h = databend_common_base::runtime::spawn_blocking(move || {
166-
let mut br = std::io::BufReader::with_capacity(16 * 1024 * 1024, f);
167-
let mut line_buf = String::with_capacity(4 * 1024);
168-
169-
loop {
170-
line_buf.clear();
171-
let n_read = std::io::BufRead::read_line(&mut br, &mut line_buf)?;
172-
if n_read == 0 {
173-
break;
158+
// Create a worker pool to deserialize the entries.
159+
160+
let queue_depth = 1024;
161+
let n_workers = 16;
162+
let (tx, rx) = ordq::new(queue_depth, repeat_with(|| Deserializer).take(n_workers));
163+
164+
// Spawn a thread to import the deserialized entries.
165+
166+
let import_th = databend_common_base::runtime::Thread::spawn(move || {
167+
while let Some(res) = rx.recv() {
168+
let entries: Result<Vec<RaftStoreEntry>, io::Error> =
169+
res.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
170+
171+
let entries = entries?;
172+
173+
for ent in entries {
174+
importer.import(ent)?;
175+
}
174176
}
175177

176-
let ent: RaftStoreEntry = serde_json::from_str(&line_buf)
177-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
178+
let level_data = importer.commit();
179+
Ok::<_, io::Error>(level_data)
180+
});
178181

179-
importer.import(ent)?;
182+
// Feed input strings to the worker pool.
183+
{
184+
let mut br = io::BufReader::with_capacity(16 * 1024 * 1024, f);
185+
let lines = io::BufRead::lines(&mut br);
186+
for c in &lines.into_iter().chunks(1024) {
187+
let chunk = c.collect::<Result<Vec<_>, _>>()?;
188+
tx.send(chunk)
189+
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
190+
}
191+
192+
// drop `tx` to notify the worker threads to exit.
193+
tx.close()
180194
}
181195

182-
let level_data = importer.commit();
196+
let level_data = import_th
197+
.join()
198+
.map_err(|_e| io::Error::new(io::ErrorKind::Other, "import thread failure"))??;
199+
183200
Ok::<_, io::Error>(level_data)
184201
});
185202

@@ -493,3 +510,20 @@ impl SMV002 {
493510
Ok(())
494511
}
495512
}
513+
514+
struct Deserializer;
515+
516+
impl ordq::Work for Deserializer {
517+
type I = Vec<String>;
518+
type O = Result<Vec<RaftStoreEntry>, io::Error>;
519+
520+
fn run(&mut self, strings: Self::I) -> Self::O {
521+
let mut res = Vec::with_capacity(strings.len());
522+
for s in strings {
523+
let ent: RaftStoreEntry = serde_json::from_str(&s)
524+
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
525+
res.push(ent);
526+
}
527+
Ok(res)
528+
}
529+
}

0 commit comments

Comments
 (0)