Skip to content

Commit 1701949

Browse files
committed
Use a builder to build up the indicator while compiling the table
1 parent f4c93dc commit 1701949

File tree

2 files changed

+115
-46
lines changed

2 files changed

+115
-46
lines changed

src/translator.rs

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use trie::Trie;
66
use crate::parser::{AnchoredRule, Attribute, Braille, Direction, Rule, dots_to_unicode, fallback};
77

88
use self::trie::Boundary;
9-
use indication::{Indication, NumericIndicator};
9+
use indication::{Indication, NumericIndicator, NumericIndicatorBuilder};
1010

1111
mod boundaries;
1212
mod indication;
@@ -181,7 +181,7 @@ pub struct TranslationTable {
181181
character_attributes: CharacterAttributes,
182182
translations: Trie,
183183
match_patterns: MatchPatterns,
184-
indicator_signs: IndicatorSigns,
184+
numeric_indicator: NumericIndicator,
185185
direction: Direction,
186186
}
187187

@@ -195,17 +195,20 @@ impl TranslationTable {
195195
let mut character_attributes = CharacterAttributes::new();
196196
let mut translations = Trie::new();
197197
let mut match_patterns = MatchPatterns::new();
198-
let mut indicator_signs = IndicatorSigns::new();
198+
let mut numeric_indicator_builder = NumericIndicatorBuilder::new();
199199

200200
let rules: Vec<AnchoredRule> = rules
201201
.into_iter()
202202
.filter(|r| r.rule.is_direction(direction))
203203
.collect();
204204

205-
// FIXME: For some unknown reason the litdigit rule seems to have precedence over the digit
206-
// rule. Since they both want to define digits in the same character_definitions slot we
207-
// need to make sure litdigits rules are handled before digit rules
208-
for rule in rules.iter().filter(|r| matches!(r.rule, Rule::Litdigit { .. })) {
205+
// FIXME: For some unknown reason the litdigit rule seems to have precedence over the digit
206+
// rule. Since they both want to define digits in the same character_definitions slot we
207+
// need to make sure litdigits rules are handled before digit rules
208+
for rule in rules
209+
.iter()
210+
.filter(|r| matches!(r.rule, Rule::Litdigit { .. }))
211+
{
209212
match &rule.rule {
210213
Rule::Litdigit {
211214
character, dots, ..
@@ -215,9 +218,9 @@ impl TranslationTable {
215218
character_definitions.insert(*character, translation);
216219
character_attributes.insert(Attribute::Digit, *character);
217220
}
218-
_ => (),
219-
}
220-
}
221+
_ => (),
222+
}
223+
}
221224

222225
// The compilation is done in two passes: The first pass simply collects all character
223226
// definitions and character attributes, so that they are then known in a second pass, e.g.
@@ -296,10 +299,19 @@ impl TranslationTable {
296299
// TODO: should the math opcode not also define a CharacterAttribute?
297300
}
298301
Rule::Numsign { dots } => {
299-
indicator_signs.insert(Indication::NumericStart, dots_to_unicode(dots));
302+
numeric_indicator_builder =
303+
numeric_indicator_builder.numsign(&dots_to_unicode(dots));
300304
}
301305
Rule::Nonumsign { dots } => {
302-
indicator_signs.insert(Indication::NumericEnd, dots_to_unicode(dots));
306+
numeric_indicator_builder =
307+
numeric_indicator_builder.nonumsign(&dots_to_unicode(dots));
308+
}
309+
Rule::Numericnocontchars { chars } => {
310+
numeric_indicator_builder =
311+
numeric_indicator_builder.numericnocontchars(&chars);
312+
}
313+
Rule::Numericmodechars { chars } => {
314+
numeric_indicator_builder = numeric_indicator_builder.numericmodechars(&chars);
303315
}
304316
// display rules are ignored for translation tables
305317
Rule::Display { .. } => (),
@@ -414,14 +426,19 @@ impl TranslationTable {
414426
}
415427
}
416428

429+
numeric_indicator_builder = numeric_indicator_builder.numeric_characters(
430+
character_attributes
431+
.get(Attribute::Digit)
432+
.unwrap_or(HashSet::default()),
433+
);
417434
Ok(TranslationTable {
418435
undefined,
419436
direction,
420437
character_definitions,
421438
character_attributes,
422439
translations,
423440
match_patterns,
424-
indicator_signs,
441+
numeric_indicator: numeric_indicator_builder.build(),
425442
})
426443
}
427444

@@ -437,32 +454,23 @@ impl TranslationTable {
437454
let mut delayed_translations: Vec<Translation> = Vec::new();
438455
let mut chars = input.chars();
439456
let mut prev: Option<char> = None;
440-
let mut indicator = NumericIndicator::new(
441-
self.character_attributes
442-
.get(Attribute::Digit)
443-
.unwrap_or(HashSet::default()),
444-
self.indicator_signs.get(Indication::NumericStart).cloned(),
445-
self.indicator_signs.get(Indication::NumericEnd).cloned(),
446-
);
457+
// FIXME: the following seems weird, but the indicator is a mutable state machine. Since
458+
// self (the translation table) is immutable we build a mutable copy of the indicator for
459+
// each translation
460+
let mut numeric_indicator = self.numeric_indicator.clone();
447461

448462
loop {
449463
// Check if there is a need for an indication
450-
if let Some(indication) = indicator.next(chars.as_str()) {
464+
if let Some(indication) = numeric_indicator.next(chars.as_str()) {
451465
match indication {
452466
Indication::NumericStart => translations.push(Translation::new(
453467
"".to_string(),
454-
self.indicator_signs
455-
.get(Indication::NumericStart)
456-
.cloned()
457-
.unwrap(),
468+
numeric_indicator.start_indicator().unwrap(),
458469
1,
459470
)),
460471
Indication::NumericEnd => translations.push(Translation::new(
461472
"".to_string(),
462-
self.indicator_signs
463-
.get(Indication::NumericEnd)
464-
.cloned()
465-
.unwrap(),
473+
numeric_indicator.end_indicator().unwrap(),
466474
1,
467475
)),
468476
_ => (),

src/translator/indication.rs

Lines changed: 78 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,39 +26,90 @@ pub enum Indication {
2626
}
2727

2828
/// Possible states for the [NumericIndicator] state machine
29-
#[derive(Debug)]
29+
#[derive(Debug, Clone)]
3030
enum State {
3131
Default,
3232
Numeric,
3333
}
3434

35+
#[derive(Debug)]
36+
pub struct NumericIndicatorBuilder(NumericIndicator);
37+
38+
impl NumericIndicatorBuilder {
39+
pub fn new() -> Self {
40+
NumericIndicatorBuilder(NumericIndicator {
41+
state: State::Default,
42+
numeric_chars: HashSet::default(),
43+
extra_numeric_chars: HashSet::default(),
44+
start_indicator: None,
45+
end_indicator: None,
46+
terminating_chars: HashSet::default(),
47+
})
48+
}
49+
50+
pub fn build(self) -> NumericIndicator {
51+
self.0
52+
}
53+
54+
pub fn start_indicator(mut self, s: &str) -> Self {
55+
self.0.start_indicator = Some(s.to_string());
56+
self
57+
}
58+
59+
pub fn numsign(mut self, s: &str) -> Self {
60+
self.0.start_indicator = Some(s.to_string());
61+
self
62+
}
63+
64+
pub fn nonumsign(mut self, s: &str) -> Self {
65+
self.0.end_indicator = Some(s.to_string());
66+
self
67+
}
68+
69+
pub fn numericnocontchars(mut self, s: &str) -> Self {
70+
self.0.terminating_chars = HashSet::from_iter(s.chars());
71+
self
72+
}
73+
74+
pub fn numericmodechars(mut self, s: &str) -> Self {
75+
self.0.extra_numeric_chars = HashSet::from_iter(s.chars());
76+
self
77+
}
78+
79+
pub fn numeric_characters(mut self, chars: HashSet<char>) -> Self {
80+
self.0.numeric_chars = chars;
81+
self
82+
}
83+
}
84+
3585
/// A very simple state machine to keep track when an numeric indication is
3686
/// required
37-
#[derive(Debug)]
87+
///
88+
/// The state is changed to `State::Numeric` as soon as a character is
89+
/// encountered that is a member of the set of `numeric_chars`. And if a
90+
/// character is encountered that is neither in the set of `numeric_chars` nor
91+
/// in the set of `extra_numeric_chars` the state is changed to
92+
/// `State::Default`.
93+
///
94+
/// An indication for a start is emitted if the `start_indicator` is not None.
95+
/// Indication for the end is emitted if `end_indicator` is not None and the
96+
/// character encountered is in the set of terminating_chars.
97+
#[derive(Debug, Clone)]
3898
pub struct NumericIndicator {
3999
state: State,
40100
/// Characters that will trigger a state change to the [State::Numeric] mode
41101
numeric_chars: HashSet<char>,
102+
/// Characters that will prevent a state change to the [State::Default] mode
103+
extra_numeric_chars: HashSet<char>,
42104
/// The characters to indicate the start of a sequence of numerical characters
43105
start_indicator: Option<String>,
44106
/// The characters to indicate the end of a sequence of numerical characters
45107
end_indicator: Option<String>,
108+
/// Characters that will trigger an [`Indication::NumericEndend`] indication
109+
terminating_chars: HashSet<char>,
46110
}
47111

48112
impl NumericIndicator {
49-
pub fn new(
50-
numeric_chars: HashSet<char>,
51-
start_indicator: Option<String>,
52-
end_indicator: Option<String>,
53-
) -> Self {
54-
Self {
55-
state: State::Default,
56-
numeric_chars,
57-
start_indicator,
58-
end_indicator,
59-
}
60-
}
61-
62113
/// The transition method of the numeric indication state machine.
63114
///
64115
/// Takes a string slice to examine the next character(s). Typically the
@@ -93,6 +144,13 @@ impl NumericIndicator {
93144
_ => None,
94145
}
95146
}
147+
148+
pub fn start_indicator(&self) -> Option<String> {
149+
self.start_indicator.clone()
150+
}
151+
pub fn end_indicator(&self) -> Option<String> {
152+
self.end_indicator.clone()
153+
}
96154
}
97155

98156
#[cfg(test)]
@@ -102,8 +160,11 @@ mod tests {
102160
#[test]
103161
fn indicator_test() {
104162
let numeric_chars: HashSet<char> = HashSet::from(['1', '2', '3']);
105-
let mut indicator =
106-
NumericIndicator::new(numeric_chars, Some('⠼'.to_string()), Some("⠰".to_string()));
163+
let builder = NumericIndicatorBuilder::new()
164+
.numeric_characters(numeric_chars)
165+
.numsign("⠼")
166+
.nonumsign("⠰");
167+
let mut indicator = builder.build();
107168
assert_eq!(indicator.next("ab12 a".into()), None);
108169
assert_eq!(indicator.next("b12 a".into()), None);
109170
assert_eq!(

0 commit comments

Comments
 (0)