Skip to content

Commit 5b00a42

Browse files
committed
New API which does not expose unreachable
The old API exposes `unreachable` in both unescape_unicode and unescape_mixed. These are conceptually one function, but because their return types are incompatible, they could not be unified. The new API takes this insight further to separate unescape_unicode into separate functions, such that byte functions can return bytes instead of chars.
1 parent a584e1a commit 5b00a42

File tree

6 files changed

+169
-71
lines changed

6 files changed

+169
-71
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# 0.0.4
2+
3+
- Add `check_raw_str`, `check_raw_byte_str`, `check_raw_c_str`,
4+
- Add `unescape_str`, `unescape_byte_str`, `unescape_c_str`,
5+
- Add `unescape_for_errors`,
6+
- Remove: `unescape_unicode` and `unescape_mixed`
7+
18
# 0.0.3
29

310
- Extend `rustc-dep-of-std` feature to include `libcore`

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rustc-literal-escaper"
3-
version = "0.0.3"
3+
version = "0.0.4"
44
edition = "2021"
55
description = "Provides code to unescape string literals"
66
license = "Apache-2.0 OR MIT"

benches/benches.rs

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@ fn bench_skip_ascii_whitespace(b: &mut test::Bencher) {
2323
// skip_ascii_whitespace(&mut input.chars(), 0, &mut |range, res| {
2424
// output.push((range, res))
2525
// });
26-
unescape_unicode(&input, Mode::Str, &mut |range, res| {
27-
output.push((range, res))
28-
});
26+
unescape_str(&input, |range, res| output.push((range, res)));
2927
assert_eq!(
3028
output,
3129
[((0..LEN + 2), Err(EscapeError::MultipleSkippedLinesWarning))]
@@ -37,117 +35,122 @@ fn bench_skip_ascii_whitespace(b: &mut test::Bencher) {
3735
// Check raw
3836
//
3937

40-
fn bench_check_raw(b: &mut test::Bencher, c: char, mode: Mode) {
41-
let input: String = test::black_box(repeat_n(c, LEN).collect());
42-
assert_eq!(input.len(), LEN * c.len_utf8());
43-
b.iter(|| {
44-
let mut output = vec![];
45-
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
46-
assert_eq!(output.len(), LEN);
47-
assert_eq!(output[0], ((0..c.len_utf8()), Ok(c)));
48-
});
38+
macro_rules! fn_bench_check_raw {
39+
($name:ident, $unit:ty, $check_raw:ident) => {
40+
fn $name(b: &mut test::Bencher, s: &str, expected: $unit) {
41+
let input: String = test::black_box(repeat_n(s, LEN).collect());
42+
assert_eq!(input.len(), LEN * s.len());
43+
b.iter(|| {
44+
let mut output = vec![];
45+
46+
$check_raw(&input, |range, res| output.push((range, res)));
47+
assert_eq!(output.len(), LEN);
48+
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
49+
});
50+
}
51+
};
4952
}
5053

54+
fn_bench_check_raw!(bench_check_raw_str, char, check_raw_str);
55+
fn_bench_check_raw!(bench_check_raw_byte_str, u8, check_raw_byte_str);
56+
fn_bench_check_raw!(bench_check_raw_c_str, char, check_raw_c_str);
57+
5158
// raw str
5259

5360
#[bench]
5461
fn bench_check_raw_str_ascii(b: &mut test::Bencher) {
55-
bench_check_raw(b, 'a', Mode::RawStr);
62+
bench_check_raw_str(b, "a", 'a');
5663
}
5764

5865
#[bench]
5966
fn bench_check_raw_str_unicode(b: &mut test::Bencher) {
60-
bench_check_raw(b, '🦀', Mode::RawStr);
67+
bench_check_raw_str(b, "🦀", '🦀');
6168
}
6269

6370
// raw byte str
6471

6572
#[bench]
66-
fn bench_check_raw_byte_str(b: &mut test::Bencher) {
67-
bench_check_raw(b, 'a', Mode::RawByteStr);
73+
fn bench_check_raw_byte_str_ascii(b: &mut test::Bencher) {
74+
bench_check_raw_byte_str(b, "a", b'a');
6875
}
6976

7077
// raw C str
7178

7279
#[bench]
7380
fn bench_check_raw_c_str_ascii(b: &mut test::Bencher) {
74-
bench_check_raw(b, 'a', Mode::RawCStr);
81+
bench_check_raw_c_str(b, "a", 'a');
7582
}
7683

7784
#[bench]
7885
fn bench_check_raw_c_str_unicode(b: &mut test::Bencher) {
79-
bench_check_raw(b, '🦀', Mode::RawCStr);
86+
bench_check_raw_c_str(b, "🦀", '🦀');
8087
}
8188

8289
//
8390
// Unescape
8491
//
8592

86-
fn bench_unescape(b: &mut test::Bencher, s: &str, mode: Mode, expected: char) {
87-
let input: String = test::black_box(repeat_n(s, LEN).collect());
88-
assert_eq!(input.len(), LEN * s.len());
89-
b.iter(|| {
90-
let mut output = vec![];
91-
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
92-
assert_eq!(output.len(), LEN);
93-
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
94-
});
93+
macro_rules! fn_bench_unescape {
94+
($name:ident, $unit:ty, $unescape:ident) => {
95+
fn $name(b: &mut test::Bencher, s: &str, expected: $unit) {
96+
let input: String = test::black_box(repeat_n(s, LEN).collect());
97+
assert_eq!(input.len(), LEN * s.len());
98+
b.iter(|| {
99+
let mut output = vec![];
100+
101+
$unescape(&input, |range, res| output.push((range, res)));
102+
assert_eq!(output.len(), LEN);
103+
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
104+
});
105+
}
106+
};
95107
}
96108

109+
fn_bench_unescape!(bench_unescape_str, char, unescape_str);
110+
fn_bench_unescape!(bench_unescape_byte_str, u8, unescape_byte_str);
111+
fn_bench_unescape!(bench_unescape_c_str, MixedUnit, unescape_c_str);
112+
97113
// str
98114

99115
#[bench]
100116
fn bench_unescape_str_trivial(b: &mut test::Bencher) {
101-
bench_unescape(b, r"a", Mode::Str, 'a');
117+
bench_unescape_str(b, r"a", 'a');
102118
}
103119

104120
#[bench]
105121
fn bench_unescape_str_ascii(b: &mut test::Bencher) {
106-
bench_unescape(b, r"\n", Mode::Str, '\n');
122+
bench_unescape_str(b, r"\n", '\n');
107123
}
108124

109125
#[bench]
110126
fn bench_unescape_str_hex(b: &mut test::Bencher) {
111-
bench_unescape(b, r"\x22", Mode::Str, '"');
127+
bench_unescape_str(b, r"\x22", '"');
112128
}
113129

114130
#[bench]
115131
fn bench_unescape_str_unicode(b: &mut test::Bencher) {
116-
bench_unescape(b, r"\u{1f980}", Mode::Str, '🦀');
132+
bench_unescape_str(b, r"\u{1f980}", '🦀');
117133
}
118134

119135
// byte str
120136

121137
#[bench]
122138
fn bench_unescape_byte_str_trivial(b: &mut test::Bencher) {
123-
bench_unescape(b, r"a", Mode::ByteStr, 'a');
139+
bench_unescape_byte_str(b, r"a", b'a');
124140
}
125141

126142
#[bench]
127143
fn bench_unescape_byte_str_ascii(b: &mut test::Bencher) {
128-
bench_unescape(b, r"\n", Mode::ByteStr, b'\n' as char);
144+
bench_unescape_byte_str(b, r"\n", b'\n');
129145
}
130146

131147
#[bench]
132148
fn bench_unescape_byte_str_hex(b: &mut test::Bencher) {
133-
bench_unescape(b, r"\xff", Mode::ByteStr, b'\xff' as char);
149+
bench_unescape_byte_str(b, r"\xff", b'\xff');
134150
}
135151

136152
// C str
137153

138-
fn bench_unescape_c_str(b: &mut test::Bencher, s: &str, expected: MixedUnit) {
139-
let input: String = test::black_box(repeat_n(s, LEN).collect());
140-
assert_eq!(input.len(), LEN * s.len());
141-
b.iter(|| {
142-
let mut output = vec![];
143-
unescape_mixed(&input, Mode::CStr, &mut |range, res| {
144-
output.push((range, res))
145-
});
146-
assert_eq!(output.len(), LEN);
147-
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
148-
});
149-
}
150-
151154
#[bench]
152155
fn bench_unescape_c_str_trivial(b: &mut test::Bencher) {
153156
bench_unescape_c_str(b, r"a", MixedUnit::Char('a'));

src/lib.rs

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,106 @@ impl EscapeError {
8080
}
8181
}
8282

83+
/// Takes the contents of a literal (without quotes)
84+
/// and produces a sequence of errors,
85+
/// which are returned by invoking `error_callback`.
86+
pub fn unescape_for_errors(
87+
src: &str,
88+
mode: Mode,
89+
mut error_callback: impl FnMut(Range<usize>, EscapeError),
90+
) {
91+
match mode {
92+
Char => {
93+
let mut chars = src.chars();
94+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Char) {
95+
error_callback(0..(src.len() - chars.as_str().len()), e);
96+
}
97+
}
98+
Byte => {
99+
let mut chars = src.chars();
100+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Byte) {
101+
error_callback(0..(src.len() - chars.as_str().len()), e);
102+
}
103+
}
104+
Str => unescape_str(src, |range, res| {
105+
if let Err(e) = res {
106+
error_callback(range, e);
107+
}
108+
}),
109+
ByteStr => unescape_byte_str(src, |range, res| {
110+
if let Err(e) = res {
111+
error_callback(range, e);
112+
}
113+
}),
114+
CStr => unescape_c_str(src, |range, res| {
115+
if let Err(e) = res {
116+
error_callback(range, e);
117+
}
118+
}),
119+
RawStr => check_raw_str(src, |range, res| {
120+
if let Err(e) = res {
121+
error_callback(range, e);
122+
}
123+
}),
124+
RawByteStr => check_raw_byte_str(src, |range, res| {
125+
if let Err(e) = res {
126+
error_callback(range, e);
127+
}
128+
}),
129+
RawCStr => check_raw_c_str(src, |range, res| {
130+
if let Err(e) = res {
131+
error_callback(range, e);
132+
}
133+
}),
134+
}
135+
}
136+
137+
pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
138+
unescape_unicode(src, Mode::RawStr, &mut callback)
139+
}
140+
141+
pub fn check_raw_byte_str(
142+
src: &str,
143+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
144+
) {
145+
unescape_unicode(src, Mode::RawByteStr, &mut |r, res| {
146+
callback(r, res.map(byte_from_char))
147+
})
148+
}
149+
150+
pub fn check_raw_c_str(
151+
src: &str,
152+
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
153+
) {
154+
unescape_unicode(src, Mode::RawCStr, &mut callback)
155+
}
156+
157+
pub fn unescape_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
158+
unescape_unicode(src, Mode::Str, &mut callback)
159+
}
160+
161+
pub fn unescape_byte_str(
162+
src: &str,
163+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
164+
) {
165+
unescape_unicode(src, Mode::ByteStr, &mut |r, res| {
166+
callback(r, res.map(byte_from_char))
167+
})
168+
}
169+
170+
pub fn unescape_c_str(
171+
src: &str,
172+
mut callback: impl FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
173+
) {
174+
unescape_mixed(src, Mode::CStr, &mut callback)
175+
}
176+
83177
/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without
84178
/// quotes) and produces a sequence of escaped characters or errors.
85179
///
86180
/// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
87181
/// the callback will be called exactly once.
88-
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
182+
fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
89183
where
90184
F: FnMut(Range<usize>, Result<char, EscapeError>),
91185
{
@@ -147,7 +241,7 @@ impl From<u8> for MixedUnit {
147241
/// a sequence of escaped characters or errors.
148242
///
149243
/// Values are returned by invoking `callback`.
150-
pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
244+
fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
151245
where
152246
F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
153247
{
@@ -444,7 +538,7 @@ where
444538
}
445539

446540
#[inline]
447-
pub fn byte_from_char(c: char) -> u8 {
541+
fn byte_from_char(c: char) -> u8 {
448542
let res = c as u32;
449543
debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
450544
res as u8

0 commit comments

Comments
 (0)