Skip to content

Commit 783de7f

Browse files
committed
Adds fixes to avoid heap allocations
As per feedback from @zslayton. Renames `PestToElement` to `TryPestToElement` which captures the fallible conversion (e.g. from `str` slices) and makes `PestToElement` be an infallible version of the API. This is a more correct factoring, but also needed to avoid the intermediate copy for `Vec<AstNode>` conversion. Changes `AstRule` structure generation to use an array as it has a fixed number of fields. Adds `smallvec` to make it easier to model dynamically sized yet stack allocated fields for sub-structures for `Expr` conversion. Adjusts the doc test to be more illustrative.
1 parent 2807cdb commit 783de7f

File tree

2 files changed

+96
-74
lines changed

2 files changed

+96
-74
lines changed

pestion/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ thiserror = "~1.0.25"
2424
pest = "~2.1.3"
2525
pest_meta = "~2.1.3"
2626
ion-rs = "~0.6.0"
27+
smallvec = "~1.6.1"
2728

2829
[dev-dependencies]
2930
rstest = "~0.10.0"

pestion/src/lib.rs

Lines changed: 95 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,30 @@
99
//! ```
1010
//! use pestion::*;
1111
//! use ion_rs::value::*;
12+
//! use ion_rs::value::reader::*;
1213
//!
1314
//! fn main() -> PestionResult<()> {
1415
//! // parse a Pest grammar and convert it to Ion element
15-
//! let element = r#"a = @{ "a" | "b" ~ "c" }"#.pest_to_element()?;
16+
//! let actual = r#"a = @{ "a" | "b" ~ "c" }"#.try_pest_to_element()?;
1617
//!
17-
//! // the grammar is a struct containing a field for each rule
18-
//! let a_rule = element
19-
//! .as_struct()
20-
//! .and_then(|s| s.get("a"))
21-
//! .and_then(|r| r.as_struct()).unwrap();
18+
//! // here is the equivalent Ion element
19+
//! let ion_text = r#"{
20+
//! a: {
21+
//! type: atomic,
22+
//! expression:
23+
//! (choice
24+
//! (string exact "a")
25+
//! (sequence
26+
//! (string exact "b")
27+
//! (string exact "c")
28+
//! )
29+
//! )
30+
//! }
31+
//! }"#;
32+
//! let expected = element_reader().read_one(ion_text.as_bytes())?;
2233
//!
23-
//! // The '@' in the start of the rule means it is atomic
24-
//! assert_eq!("atomic", a_rule.get("type").and_then(|t| t.as_str()).unwrap());
25-
//!
26-
//! // the first node in the expression tree is a `choice` operator
27-
//! assert_eq!(
28-
//! "choice",
29-
//! a_rule
30-
//! .get("expression")
31-
//! .and_then(|e| e.as_sequence())
32-
//! .and_then(|s| s.get(0))
33-
//! .and_then(|h| h.as_str()).unwrap()
34-
//! );
34+
//! // these should be equivalent
35+
//! assert_eq!(expected, actual);
3536
//!
3637
//! Ok(())
3738
//! }
@@ -49,20 +50,35 @@ use ion_rs::value::{Builder, Element};
4950
use pest::Parser;
5051
use pest_meta::ast::{Expr, Rule as AstRule, RuleType as AstRuleType, RuleType};
5152
use pest_meta::parser::{consume_rules, PestParser, Rule};
53+
use smallvec::{smallvec, SmallVec};
5254

53-
/// Converts a representation of a Pest grammar (or part of a grammar) into Ion [`Element`].
54-
pub trait PestToElement {
55+
/// Converts representation of a Pest grammar (or part of a grammar) into Ion [`Element`].
56+
pub trait TryPestToElement {
5557
type Element: Element;
5658

5759
/// Converts this into [`Element`] which may imply parsing Pest syntax.
58-
fn pest_to_element(&self) -> PestionResult<Self::Element>;
60+
///
61+
/// This returns `Err` if the the conversion fails. For example, this can happen if the
62+
/// source is not a valid Pest grammar.
63+
fn try_pest_to_element(&self) -> PestionResult<Self::Element>;
64+
}
65+
66+
/// Infallible conversion of a Pest grammar (or part of a grammar) into Ion [`Element`].
67+
pub trait PestToElement {
68+
type Element: Element;
69+
70+
/// Converts this into an [`Element`] representation.
71+
///
72+
/// This operation cannot fail and therefore it is implied that it represents some
73+
/// well formed Pest grammar or component thereof.
74+
fn pest_to_element(&self) -> Self::Element;
5975
}
6076

61-
impl PestToElement for &str {
77+
impl TryPestToElement for &str {
6278
type Element = OwnedElement;
6379

6480
/// Parses a `str` slice as a Pest grammar and serializes the AST into [`Element`].
65-
fn pest_to_element(&self) -> PestionResult<Self::Element> {
81+
fn try_pest_to_element(&self) -> PestionResult<Self::Element> {
6682
let pairs = PestParser::parse(Rule::grammar_rules, *self)?;
6783
let ast = match consume_rules(pairs) {
6884
Ok(ast) => ast,
@@ -77,22 +93,21 @@ impl PestToElement for &str {
7793
}
7894
};
7995

80-
ast.pest_to_element()
96+
Ok(ast.pest_to_element())
8197
}
8298
}
8399

84100
impl PestToElement for Vec<AstRule> {
85101
type Element = OwnedElement;
86102

87103
/// Converts a body of rules into a `struct` that has a rule for each field.
88-
fn pest_to_element(&self) -> PestionResult<Self::Element> {
89-
let mut fields = vec![];
90-
for rule in self.iter() {
104+
fn pest_to_element(&self) -> Self::Element {
105+
let fields = self.iter().map(|rule| {
91106
let rule_name = text_token(rule.name.clone());
92-
let rule_value = rule.pest_to_element()?;
93-
fields.push((rule_name, rule_value));
94-
}
95-
Ok(Self::Element::new_struct(fields))
107+
let rule_value = rule.pest_to_element();
108+
(rule_name, rule_value)
109+
});
110+
Self::Element::new_struct(fields)
96111
}
97112
}
98113

@@ -101,20 +116,20 @@ impl PestToElement for AstRule {
101116

102117
/// Converts a Pest Rule into a `struct` that has the field for [`RuleType`] as a symbol
103118
/// and a field for the [`Expr`].
104-
fn pest_to_element(&self) -> PestionResult<Self::Element> {
105-
let fields = vec![
106-
(text_token("type"), self.ty.pest_to_element()?),
107-
(text_token("expression"), self.expr.pest_to_element()?),
108-
];
109-
Ok(Self::Element::new_struct(fields))
119+
fn pest_to_element(&self) -> Self::Element {
120+
let fields = std::array::IntoIter::new([
121+
(text_token("type"), self.ty.pest_to_element()),
122+
(text_token("expression"), self.expr.pest_to_element()),
123+
]);
124+
Self::Element::new_struct(fields)
110125
}
111126
}
112127

113128
impl PestToElement for AstRuleType {
114129
type Element = OwnedElement;
115130

116131
/// Serializes the enum into a symbolic value.
117-
fn pest_to_element(&self) -> PestionResult<Self::Element> {
132+
fn pest_to_element(&self) -> Self::Element {
118133
let sym_tok = text_token(match self {
119134
RuleType::Normal => "normal",
120135
RuleType::Silent => "silent",
@@ -123,94 +138,100 @@ impl PestToElement for AstRuleType {
123138
RuleType::NonAtomic => "non_atomic",
124139
});
125140

126-
Ok(sym_tok.into())
141+
sym_tok.into()
127142
}
128143
}
129144

130145
impl PestToElement for Expr {
131146
type Element = OwnedElement;
132147

133148
/// Generates a `sexp` representation of the rule expression.
134-
fn pest_to_element(&self) -> PestionResult<Self::Element> {
149+
fn pest_to_element(&self) -> Self::Element {
135150
use OwnedValue::*;
136151

137-
let element = Self::Element::new_sexp(match self.clone() {
138-
Expr::Str(text) => vec![
152+
const STACK_LEN: usize = 4;
153+
let values: SmallVec<[_; STACK_LEN]> = match self.clone() {
154+
Expr::Str(text) => smallvec![
139155
text_token("string").into(),
140156
text_token("exact").into(),
141157
String(text).into(),
142158
],
143-
Expr::Insens(text) => vec![
159+
Expr::Insens(text) => smallvec![
144160
text_token("string").into(),
145161
text_token("insensitive").into(),
146162
String(text).into(),
147163
],
148-
Expr::Range(begin, end) => vec![
164+
Expr::Range(begin, end) => smallvec![
149165
text_token("character_range").into(),
150166
String(begin).into(),
151167
String(end).into(),
152168
],
153-
Expr::Ident(name) => vec![text_token("identifier").into(), String(name).into()],
154-
Expr::PosPred(expr) => vec![
169+
Expr::Ident(name) => smallvec![text_token("identifier").into(), String(name).into()],
170+
Expr::PosPred(expr) => smallvec![
155171
text_token("predicate").into(),
156172
text_token("positive").into(),
157-
expr.pest_to_element()?,
173+
expr.pest_to_element(),
158174
],
159-
Expr::NegPred(expr) => vec![
175+
Expr::NegPred(expr) => smallvec![
160176
text_token("predicate").into(),
161177
text_token("negative").into(),
162-
expr.pest_to_element()?,
178+
expr.pest_to_element(),
163179
],
164-
Expr::Seq(left, right) => vec![
180+
Expr::Seq(left, right) => smallvec![
165181
text_token("sequence").into(),
166-
left.pest_to_element()?,
167-
right.pest_to_element()?,
182+
left.pest_to_element(),
183+
right.pest_to_element(),
168184
],
169-
Expr::Choice(left, right) => vec![
185+
Expr::Choice(left, right) => smallvec![
170186
text_token("choice").into(),
171-
left.pest_to_element()?,
172-
right.pest_to_element()?,
187+
left.pest_to_element(),
188+
right.pest_to_element(),
173189
],
174-
Expr::Opt(expr) => vec![text_token("optional").into(), expr.pest_to_element()?],
175-
Expr::Rep(expr) => vec![
190+
Expr::Opt(expr) => {
191+
smallvec![text_token("optional").into(), expr.pest_to_element()]
192+
}
193+
Expr::Rep(expr) => smallvec![
176194
text_token("repeat_min").into(),
177195
0.into(),
178-
expr.pest_to_element()?,
196+
expr.pest_to_element(),
179197
],
180-
Expr::RepOnce(expr) => vec![
198+
Expr::RepOnce(expr) => smallvec![
181199
text_token("repeat_min").into(),
182200
1.into(),
183-
expr.pest_to_element()?,
201+
expr.pest_to_element(),
184202
],
185-
Expr::RepMin(expr, min) => vec![
203+
Expr::RepMin(expr, min) => smallvec![
186204
text_token("repeat_min").into(),
187205
(min as i64).into(),
188-
expr.pest_to_element()?,
206+
expr.pest_to_element(),
189207
],
190-
Expr::RepMax(expr, max) => vec![
208+
Expr::RepMax(expr, max) => smallvec![
191209
text_token("repeat_max").into(),
192210
(max as i64).into(),
193-
expr.pest_to_element()?,
211+
expr.pest_to_element(),
194212
],
195-
Expr::RepExact(expr, exact) => vec![
213+
Expr::RepExact(expr, exact) => smallvec![
196214
text_token("repeat_range").into(),
197215
(exact as i64).into(),
198216
(exact as i64).into(),
199-
expr.pest_to_element()?,
217+
expr.pest_to_element(),
200218
],
201-
Expr::RepMinMax(expr, min, max) => vec![
219+
Expr::RepMinMax(expr, min, max) => smallvec![
202220
text_token("repeat_range").into(),
203221
(min as i64).into(),
204222
(max as i64).into(),
205-
expr.pest_to_element()?,
223+
expr.pest_to_element(),
206224
],
207225
// TODO implement these
208226
Expr::Skip(_) => unimplemented!(),
209227
Expr::Push(_) => unimplemented!(),
210228
Expr::PeekSlice(_, _) => unimplemented!(),
211-
});
229+
};
230+
assert!(values.len() <= STACK_LEN);
231+
232+
let element = Self::Element::new_sexp(values);
212233

213-
Ok(element)
234+
element
214235
}
215236
}
216237

@@ -403,10 +424,10 @@ mod tests {
403424
)]
404425
fn good<T, S>(#[case] input: T, #[case] ion_literal: S) -> PestionResult<()>
405426
where
406-
T: PestToElement<Element = OwnedElement> + Debug,
427+
T: TryPestToElement<Element = OwnedElement> + Debug,
407428
S: AsRef<str>,
408429
{
409-
let actual = input.pest_to_element()?;
430+
let actual = input.try_pest_to_element()?;
410431
let expected = element_reader().read_one(ion_literal.as_ref().as_bytes())?;
411432

412433
const BUF_SIZE: usize = 16 * 1024 * 1024;
@@ -431,8 +452,8 @@ mod tests {
431452
#[case::empty_rule(r#"a = {}"#)]
432453
#[case::self_reference(r#"a = { a }"#)]
433454
#[case::double_rule(r#"a = { "a" }\n a = { "b" }"#)]
434-
fn pest_errors<T: PestToElement>(#[case] input: T) -> PestionResult<()> {
435-
match input.pest_to_element() {
455+
fn pest_errors<T: TryPestToElement>(#[case] input: T) -> PestionResult<()> {
456+
match input.try_pest_to_element() {
436457
Err(PestionError::Pest(_)) => {}
437458
something => {
438459
unreachable!("Got result we did not expect: {:?}", something);

0 commit comments

Comments
 (0)