Skip to content

Commit e51654b

Browse files
committed
Integrate the uppercase indicator in the translation
1 parent 2ba5914 commit e51654b

File tree

3 files changed

+178
-45
lines changed

3 files changed

+178
-45
lines changed

src/translator.rs

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ use trie::Trie;
66
use crate::parser::{AnchoredRule, Attribute, Braille, Direction, Rule, dots_to_unicode, fallback};
77

88
use self::trie::Boundary;
9-
use indication::Indication;
10-
use indication::numeric;
9+
use indication::{Indication, numeric, uppercase};
1110

1211
mod boundaries;
1312
mod indication;
@@ -164,6 +163,7 @@ pub struct TranslationTable {
164163
translations: Trie,
165164
match_patterns: MatchPatterns,
166165
numeric_indicator: numeric::Indicator,
166+
uppercase_indicator: uppercase::Indicator,
167167
direction: Direction,
168168
}
169169

@@ -178,6 +178,7 @@ impl TranslationTable {
178178
let mut translations = Trie::new();
179179
let mut match_patterns = MatchPatterns::new();
180180
let mut numeric_indicator_builder = numeric::IndicatorBuilder::new();
181+
let mut uppercase_indicator_builder = uppercase::IndicatorBuilder::new();
181182

182183
let rules: Vec<AnchoredRule> = rules
183184
.into_iter()
@@ -254,6 +255,8 @@ impl TranslationTable {
254255
Translation::new(character.to_string(), dots_to_unicode(dots), 1);
255256
character_definitions.insert(*character, translation);
256257
character_attributes.insert(Attribute::Lowercase, *character);
258+
// a lowercase is also a letter
259+
character_attributes.insert(Attribute::Letter, *character);
257260
}
258261
Rule::Uppercase {
259262
character, dots, ..
@@ -262,6 +265,8 @@ impl TranslationTable {
262265
Translation::new(character.to_string(), dots_to_unicode(dots), 1);
263266
character_definitions.insert(*character, translation);
264267
character_attributes.insert(Attribute::Uppercase, *character);
268+
// an uppercase is also a letter
269+
character_attributes.insert(Attribute::Letter, *character);
265270
}
266271
Rule::Sign {
267272
character, dots, ..
@@ -295,6 +300,29 @@ impl TranslationTable {
295300
Rule::Numericmodechars { chars } => {
296301
numeric_indicator_builder = numeric_indicator_builder.numericmodechars(&chars);
297302
}
303+
Rule::Capsletter { dots, .. } => {
304+
uppercase_indicator_builder =
305+
uppercase_indicator_builder.capsletter(&dots_to_unicode(dots));
306+
}
307+
Rule::Begcapsword { dots, .. } => {
308+
uppercase_indicator_builder =
309+
uppercase_indicator_builder.begcapsword(&dots_to_unicode(dots));
310+
}
311+
Rule::Endcapsword { dots, .. } => {
312+
uppercase_indicator_builder =
313+
uppercase_indicator_builder.endcapsword(&dots_to_unicode(dots));
314+
}
315+
Rule::Begcaps { dots } => {
316+
uppercase_indicator_builder =
317+
uppercase_indicator_builder.begcaps(&dots_to_unicode(dots));
318+
}
319+
Rule::Endcaps { dots } => {
320+
uppercase_indicator_builder =
321+
uppercase_indicator_builder.endcaps(&dots_to_unicode(dots));
322+
}
323+
Rule::Capsmodechars { chars } => {
324+
uppercase_indicator_builder = uppercase_indicator_builder.capsmodechars(&chars);
325+
}
298326
// display rules are ignored for translation tables
299327
Rule::Display { .. } => (),
300328
_ => (),
@@ -305,7 +333,11 @@ impl TranslationTable {
305333
// arguments in rules
306334
for rule in &rules {
307335
match &rule.rule {
308-
Rule::Base { derived, base, .. } => {
336+
Rule::Base {
337+
derived,
338+
base,
339+
name,
340+
} => {
309341
if let Some(translation) = character_definitions.get(base) {
310342
character_definitions.insert(
311343
*derived,
@@ -314,6 +346,14 @@ impl TranslationTable {
314346
..translation.clone()
315347
},
316348
);
349+
// FIXME: The functionality to derive an attribute from a string should be
350+
// separated out. Also it should support more than "uppercase" :-)
351+
match &name[..] {
352+
"uppercase" => {
353+
character_attributes.insert(Attribute::Uppercase, *derived)
354+
}
355+
_ => (),
356+
}
317357
} else {
318358
// hm, there is no character definition for the base character.
319359
// If we are backwards compatible ignore the problem, otherwise
@@ -413,6 +453,17 @@ impl TranslationTable {
413453
.get(Attribute::Digit)
414454
.unwrap_or(HashSet::default()),
415455
);
456+
uppercase_indicator_builder = uppercase_indicator_builder
457+
.uppercase_characters(
458+
character_attributes
459+
.get(Attribute::Uppercase)
460+
.unwrap_or(HashSet::default()),
461+
)
462+
.letter_characters(
463+
character_attributes
464+
.get(Attribute::Letter)
465+
.unwrap_or(HashSet::default()),
466+
);
416467
Ok(TranslationTable {
417468
undefined,
418469
direction,
@@ -421,6 +472,7 @@ impl TranslationTable {
421472
translations,
422473
match_patterns,
423474
numeric_indicator: numeric_indicator_builder.build(),
475+
uppercase_indicator: uppercase_indicator_builder.build(),
424476
})
425477
}
426478

@@ -440,6 +492,7 @@ impl TranslationTable {
440492
// self (the translation table) is immutable we build a mutable copy of the indicator for
441493
// each translation
442494
let mut numeric_indicator = self.numeric_indicator.clone();
495+
let mut uppercase_indicator = self.uppercase_indicator.clone();
443496

444497
loop {
445498
// Check if there is a need for an indication
@@ -455,8 +508,25 @@ impl TranslationTable {
455508
numeric_indicator.end_indicator().unwrap(),
456509
1,
457510
)),
458-
_ => (),
511+
_ => unreachable!(),
512+
};
513+
}
514+
if let Some(indication) = uppercase_indicator.next(chars.as_str()) {
515+
let output = match indication {
516+
Indication::UppercaseStart => uppercase_indicator.start_indicator().unwrap(),
517+
Indication::UppercaseEnd => uppercase_indicator.end_indicator().unwrap(),
518+
Indication::UppercaseStartLetter => {
519+
uppercase_indicator.start_letter_indicator().unwrap()
520+
}
521+
Indication::UppercaseStartWord => {
522+
uppercase_indicator.start_word_indicator().unwrap()
523+
}
524+
Indication::UppercaseEndWord => {
525+
uppercase_indicator.end_word_indicator().unwrap()
526+
}
527+
_ => unreachable!(),
459528
};
529+
translations.push(Translation::new("".to_string(), output, 1));
460530
}
461531
// given an input query the translation table for matching translations. Then split off
462532
// the translations that are delayed, i.e. have an offset because they have a pre-pattern
@@ -855,4 +925,22 @@ mod tests {
855925
assert_eq!(table.translate("123foo"), "⠼⠁⠃⠉⠰⠄⠈⠈");
856926
assert_eq!(table.translate("foof"), "⠄⠈⠈⠄");
857927
}
928+
929+
#[test]
930+
fn uppercase_indication_text() {
931+
let rules = vec![
932+
parse_rule("lowercase a 1"),
933+
parse_rule("lowercase b 12"),
934+
parse_rule("lowercase c 14"),
935+
parse_rule("base uppercase A a"),
936+
parse_rule("base uppercase B b"),
937+
parse_rule("base uppercase C c"),
938+
parse_rule("capsletter 46"),
939+
parse_rule("begcapsword 6-6"),
940+
];
941+
let table = TranslationTable::compile(rules, Direction::Forward).unwrap();
942+
assert_eq!(table.translate("abc"), "⠁⠃⠉");
943+
assert_eq!(table.translate("Abc"), "⠨⠁⠃⠉");
944+
assert_eq!(table.translate("ABC"), "⠠⠠⠁⠃⠉");
945+
}
858946
}

src/translator/indication.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@
1616
//! * [`numeric::Indicator`]: knowns whether the translation is in numeric mode
1717
//! * [`uppercase::Indicator`]: knowns whether the translation is in uppercase mode
1818
19-
pub mod uppercase;
2019
pub mod numeric;
20+
pub mod uppercase;
2121

2222
/// Possible indication events that the indicator state machine(s) support
2323
#[derive(Debug, PartialEq, Eq, Hash)]
2424
pub enum Indication {
2525
NumericStart,
2626
NumericEnd,
27+
UppercaseStartLetter,
28+
UppercaseStartWord,
29+
UppercaseEndWord,
2730
UppercaseStart,
2831
UppercaseEnd,
2932
EmphasisStart,

0 commit comments

Comments
 (0)