Skip to content

Commit d9951b7

Browse files
Optimize goto binary exporting in cprover_bindings (#4148)
Motivated by the finding that the [`write_goto_binary_file`](https://github.com/model-checking/kani/blob/1b25a1853fab3262380e9baefdfeb929edd53fd1/cprover_bindings/src/irep/goto_binary_serde.rs#L97-L103) function is responsible for around half of our compiler's codegen time, this PR does a collection of low-hanging fruit optimizations for the way we export goto files in the `cprover_bindings` crate. This includes: - Switching `IrepNumbering` caches to use the faster [`FxHashMap`](https://nnethercote.github.io/2021/12/08/a-brutally-effective-hash-function-in-rust.html) (as used in the rust compiler) - Using `.entry().or_insert()` to initialize the irep cache - This avoids having to hash separately for a `.get()` and `.insert()` (as the compiler seemed to have not optimized them together) - Making `IrepId` strings copy-on-write - This avoids unnecessary string allocations by allowing us to construct `InternedString`s directly from borrowed string literals when possible Initial testing (on [`perf/format`](https://github.com/model-checking/kani/blob/main/tests/perf/format/src/lib.rs) & [`perf/vec/vec`](https://github.com/model-checking/kani/blob/main/tests/perf/vec/vec/src/main.rs)) suggests that these changes make our compiler ~13% faster, but I'm also working on comparing performance on the whole std project for the future. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
1 parent 5b6700e commit d9951b7

File tree

4 files changed

+95
-30
lines changed

4 files changed

+95
-30
lines changed

Cargo.lock

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,12 @@ dependencies = [
190190
"which",
191191
]
192192

193+
[[package]]
194+
name = "byteorder"
195+
version = "1.5.0"
196+
source = "registry+https://github.com/rust-lang/crates.io-index"
197+
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
198+
193199
[[package]]
194200
name = "bytes"
195201
version = "1.10.1"
@@ -406,6 +412,7 @@ dependencies = [
406412
name = "cprover_bindings"
407413
version = "0.63.0"
408414
dependencies = [
415+
"fxhash",
409416
"lazy_static",
410417
"linear-map",
411418
"memuse",
@@ -681,6 +688,15 @@ dependencies = [
681688
"percent-encoding",
682689
]
683690

691+
[[package]]
692+
name = "fxhash"
693+
version = "0.2.1"
694+
source = "registry+https://github.com/rust-lang/crates.io-index"
695+
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
696+
dependencies = [
697+
"byteorder",
698+
]
699+
684700
[[package]]
685701
name = "getopts"
686702
version = "0.2.21"

cprover_bindings/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ serde = {version = "1", features = ["derive"]}
2020
string-interner = "0.19"
2121
tracing = "0.1"
2222
linear-map = {version = "1.2", features = ["serde_impl"]}
23+
fxhash = "0.2.1"
2324

2425
[dev-dependencies]
2526
serde_test = "1"

cprover_bindings/src/irep/goto_binary_serde.rs

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@
8080
8181
use crate::irep::{Irep, IrepId, Symbol, SymbolTable};
8282
use crate::{InternString, InternedString};
83+
#[cfg(not(test))]
84+
use fxhash::FxHashMap;
85+
#[cfg(test)]
8386
use std::collections::HashMap;
8487
use std::fs::File;
8588
use std::hash::Hash;
@@ -199,7 +202,28 @@ impl IrepNumberingInv {
199202
}
200203
}
201204

205+
#[cfg(not(test))]
206+
/// A numbering of [InternedString], [IrepId] and [Irep] based on their contents.
207+
/// Note that using [FxHashMap] makes our caches faster, but we still have to use
208+
/// the default [HashMap] in a test context as only it implements the
209+
/// `memuse::DynamicUsage` trait we need for memory profiling under test.
210+
struct IrepNumbering {
211+
/// Map from [InternedString] to their unique numbers.
212+
string_cache: FxHashMap<InternedString, usize>,
213+
214+
/// Inverse string cache.
215+
inv_string_cache: Vec<NumberedString>,
216+
217+
/// Map from [IrepKey] to their unique numbers.
218+
cache: FxHashMap<IrepKey, usize>,
219+
220+
/// Inverse cache, allows to get a NumberedIrep from its unique number.
221+
inv_cache: IrepNumberingInv,
222+
}
223+
224+
#[cfg(test)]
202225
/// A numbering of [InternedString], [IrepId] and [Irep] based on their contents.
226+
/// See above for explanation of why this definition is only used in a test context.
203227
struct IrepNumbering {
204228
/// Map from [InternedString] to their unique numbers.
205229
string_cache: HashMap<InternedString, usize>,
@@ -214,16 +238,31 @@ struct IrepNumbering {
214238
inv_cache: IrepNumberingInv,
215239
}
216240

241+
#[cfg(not(test))]
217242
impl IrepNumbering {
218243
fn new() -> Self {
219244
IrepNumbering {
220-
string_cache: HashMap::new(),
245+
string_cache: FxHashMap::default(),
221246
inv_string_cache: Vec::new(),
222-
cache: HashMap::new(),
247+
cache: FxHashMap::default(),
223248
inv_cache: IrepNumberingInv::new(),
224249
}
225250
}
251+
}
226252

253+
#[cfg(test)]
254+
impl IrepNumbering {
255+
fn new() -> Self {
256+
IrepNumbering {
257+
string_cache: HashMap::default(),
258+
inv_string_cache: Vec::new(),
259+
cache: HashMap::default(),
260+
inv_cache: IrepNumberingInv::new(),
261+
}
262+
}
263+
}
264+
265+
impl IrepNumbering {
227266
/// Returns a [NumberedString] from its number if it exists, None otherwise.
228267
fn numbered_string_from_number(&mut self, string_number: usize) -> Option<NumberedString> {
229268
self.inv_string_cache.get(string_number).copied()
@@ -248,7 +287,7 @@ impl IrepNumbering {
248287
/// Turns a [IrepId] to a [NumberedString]. The [IrepId] gets the number of its
249288
/// string representation.
250289
fn number_irep_id(&mut self, irep_id: &IrepId) -> NumberedString {
251-
self.number_string(&irep_id.to_string().intern())
290+
self.number_string(&irep_id.to_string_cow().intern())
252291
}
253292

254293
/// Turns an [Irep] into a [NumberedIrep]. The [Irep] is recursively traversed
@@ -264,20 +303,18 @@ impl IrepNumbering {
264303
.map(|(key, value)| (self.number_irep_id(key).number, self.number_irep(value).number))
265304
.collect();
266305
let key = IrepKey::new(id, &sub, &named_sub);
267-
self.get_or_insert(&key)
306+
self.get_or_insert(key)
268307
}
269308

270309
/// Gets the existing [NumberedIrep] from the [IrepKey] or inserts a fresh
271310
/// one and returns it.
272-
fn get_or_insert(&mut self, key: &IrepKey) -> NumberedIrep {
273-
if let Some(number) = self.cache.get(key) {
274-
// Return the NumberedIrep from the inverse cache
275-
return self.inv_cache.index[*number];
276-
}
277-
// This is where the key gets its unique number assigned.
278-
let number = self.inv_cache.add_key(key);
279-
self.cache.insert(key.clone(), number);
280-
self.inv_cache.index[number]
311+
fn get_or_insert(&mut self, key: IrepKey) -> NumberedIrep {
312+
let number = self.cache.entry(key).or_insert_with_key(|key| {
313+
// This is where the key gets its unique number assigned.
314+
self.inv_cache.add_key(key)
315+
});
316+
317+
self.inv_cache.index[*number]
281318
}
282319

283320
/// Returns the unique number of the `id` field of the given [NumberedIrep].
@@ -794,7 +831,7 @@ where
794831
let key = IrepKey::new(id, &sub, &named_sub);
795832

796833
// Insert key in the numbering
797-
let numbered = self.numbering.get_or_insert(&key);
834+
let numbered = self.numbering.get_or_insert(key);
798835

799836
// Map number from the binary to new number
800837
self.add_irep_mapping(irep_number, numbered.number);

cprover_bindings/src/irep/irep_id.rs

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::cbmc_string::InternedString;
88
use crate::utils::NumUtils;
99
use num::bigint::{BigInt, BigUint, Sign};
1010

11-
use std::fmt::Display;
11+
use std::borrow::Cow;
1212

1313
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
1414
pub enum IrepId {
@@ -877,22 +877,34 @@ impl IrepId {
877877
}
878878
}
879879

880-
impl Display for IrepId {
881-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
880+
// Implementing `ToString` rather than `Display` because display only has the interface
881+
// to write to a formatter which seems to be slower when you just need the string.
882+
#[allow(clippy::to_string_trait_impl)]
883+
impl ToString for IrepId {
884+
fn to_string(&self) -> String {
885+
self.to_string_cow().into_owned()
886+
}
887+
}
888+
889+
impl IrepId {
890+
pub fn to_string_cow(&self) -> Cow<str> {
891+
match self.to_owned_string() {
892+
Some(owned) => Cow::Owned(owned),
893+
None => Cow::Borrowed(self.to_static_string()),
894+
}
895+
}
896+
897+
fn to_owned_string(&self) -> Option<String> {
882898
match self {
883-
IrepId::FreeformString(s) => {
884-
return write!(f, "{s}");
885-
}
886-
IrepId::FreeformInteger(i) => {
887-
return write!(f, "{i}");
888-
}
889-
IrepId::FreeformBitPattern(i) => {
890-
return write!(f, "{i:X}");
891-
}
892-
_ => {}
899+
IrepId::FreeformString(s) => Some(s.to_string()),
900+
IrepId::FreeformInteger(i) => Some(i.to_string()),
901+
IrepId::FreeformBitPattern(i) => Some(format!("{i:X}")),
902+
_ => None,
893903
}
904+
}
894905

895-
let s = match self {
906+
fn to_static_string(&self) -> &'static str {
907+
match self {
896908
IrepId::FreeformString(_)
897909
| IrepId::FreeformInteger(_)
898910
| IrepId::FreeformBitPattern { .. } => unreachable!(),
@@ -1719,8 +1731,7 @@ impl Display for IrepId {
17191731
IrepId::VectorGt => "vector->",
17201732
IrepId::VectorLt => "vector-<",
17211733
IrepId::FloatbvRoundToIntegral => "floatbv_round_to_integral",
1722-
};
1723-
write!(f, "{s}")
1734+
}
17241735
}
17251736
}
17261737

0 commit comments

Comments
 (0)