Skip to content

Commit a8565aa

Browse files
committed
New API which does not expose unreachable
The old API exposes `unreachable` in both unescape_unicode and unescape_mixed. These are conceptually one function, but because their return types are incompatible, they could not be unified. The new API takes this insight further to separate unescape_unicode into separate functions, such that byte functions can return bytes instead of chars.
1 parent 1889c21 commit a8565aa

File tree

6 files changed

+155
-49
lines changed

6 files changed

+155
-49
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# 0.0.3
2+
3+
- Add `check_raw_str`, `check_raw_byte_str`, `check_raw_c_str`,
4+
- Add `unescape_str`, `unescape_byte_str`, `unescape_c_str`,
5+
- Add `unescape_for_errors`,
6+
- Remove: `unescape_unicode` and `unescape_mixed`
7+
18
# 0.0.2
29

310
- Add new `rustc-dep-of-std` feature to allow building `libproc-macro`

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rustc-literal-escaper"
3-
version = "0.0.2"
3+
version = "0.0.3"
44
edition = "2021"
55
description = "Provides code to unescape string literals"
66
license = "Apache-2.0 OR MIT"

benches/benches.rs

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
extern crate test;
44

55
use rustc_literal_escaper::*;
6+
use std::fmt::Debug;
67
use std::iter::repeat_n;
8+
use std::ops::Range;
79

810
const LEN: usize = 10_000;
911

@@ -23,9 +25,7 @@ fn bench_skip_ascii_whitespace(b: &mut test::Bencher) {
2325
// skip_ascii_whitespace(&mut input.chars(), 0, &mut |range, res| {
2426
// output.push((range, res))
2527
// });
26-
unescape_unicode(&input, Mode::Str, &mut |range, res| {
27-
output.push((range, res))
28-
});
28+
unescape_str(&input, |range, res| output.push((range, res)));
2929
assert_eq!(
3030
output,
3131
[((0..LEN + 2), Err(EscapeError::MultipleSkippedLinesWarning))]
@@ -37,58 +37,71 @@ fn bench_skip_ascii_whitespace(b: &mut test::Bencher) {
3737
// Check raw
3838
//
3939

40-
fn bench_check_raw(b: &mut test::Bencher, c: char, mode: Mode) {
41-
let input: String = test::black_box(repeat_n(c, LEN).collect());
42-
assert_eq!(input.len(), LEN * c.len_utf8());
40+
#[allow(clippy::type_complexity)]
41+
fn bench_check_raw<UNIT: Into<char> + PartialEq + Debug + Copy>(
42+
b: &mut test::Bencher,
43+
c: UNIT,
44+
check_raw: fn(&str, &mut dyn FnMut(Range<usize>, Result<UNIT, EscapeError>)),
45+
) {
46+
let input: String = test::black_box(repeat_n(c.into(), LEN).collect());
47+
assert_eq!(input.len(), LEN * c.into().len_utf8());
48+
4349
b.iter(|| {
4450
let mut output = vec![];
45-
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
51+
52+
check_raw(&input, &mut |range, res| output.push((range, res)));
4653
assert_eq!(output.len(), LEN);
47-
assert_eq!(output[0], ((0..c.len_utf8()), Ok(c)));
54+
assert_eq!(output[0], (0..c.into().len_utf8(), Ok(c)));
4855
});
4956
}
5057

5158
// raw str
5259

5360
#[bench]
5461
fn bench_check_raw_str_ascii(b: &mut test::Bencher) {
55-
bench_check_raw(b, 'a', Mode::RawStr);
62+
bench_check_raw(b, 'a', |s, cb| check_raw_str(s, cb));
5663
}
5764

5865
#[bench]
5966
fn bench_check_raw_str_unicode(b: &mut test::Bencher) {
60-
bench_check_raw(b, '🦀', Mode::RawStr);
67+
bench_check_raw(b, '🦀', |s, cb| check_raw_str(s, cb));
6168
}
6269

6370
// raw byte str
6471

6572
#[bench]
6673
fn bench_check_raw_byte_str(b: &mut test::Bencher) {
67-
bench_check_raw(b, 'a', Mode::RawByteStr);
74+
bench_check_raw(b, b'a', |s, cb| check_raw_byte_str(s, cb));
6875
}
6976

7077
// raw C str
7178

7279
#[bench]
7380
fn bench_check_raw_c_str_ascii(b: &mut test::Bencher) {
74-
bench_check_raw(b, 'a', Mode::RawCStr);
81+
bench_check_raw(b, 'a', |s, cb| check_raw_c_str(s, cb));
7582
}
7683

7784
#[bench]
7885
fn bench_check_raw_c_str_unicode(b: &mut test::Bencher) {
79-
bench_check_raw(b, '🦀', Mode::RawCStr);
86+
bench_check_raw(b, '🦀', |s, cb| check_raw_c_str(s, cb));
8087
}
8188

8289
//
8390
// Unescape
8491
//
8592

86-
fn bench_unescape(b: &mut test::Bencher, s: &str, mode: Mode, expected: char) {
93+
#[allow(clippy::type_complexity)]
94+
fn bench_unescape<UNIT: Into<char> + PartialEq + Debug + Copy>(
95+
b: &mut test::Bencher,
96+
s: &str,
97+
expected: UNIT,
98+
unescape: fn(&str, &mut dyn FnMut(Range<usize>, Result<UNIT, EscapeError>)),
99+
) {
87100
let input: String = test::black_box(repeat_n(s, LEN).collect());
88101
assert_eq!(input.len(), LEN * s.len());
89102
b.iter(|| {
90103
let mut output = vec![];
91-
unescape_unicode(&input, mode, &mut |range, res| output.push((range, res)));
104+
unescape(&input, &mut |range, res| output.push((range, res)));
92105
assert_eq!(output.len(), LEN);
93106
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
94107
});
@@ -98,39 +111,39 @@ fn bench_unescape(b: &mut test::Bencher, s: &str, mode: Mode, expected: char) {
98111

99112
#[bench]
100113
fn bench_unescape_str_trivial(b: &mut test::Bencher) {
101-
bench_unescape(b, r"a", Mode::Str, 'a');
114+
bench_unescape(b, r"a", 'a', |s, cb| unescape_str(s, cb));
102115
}
103116

104117
#[bench]
105118
fn bench_unescape_str_ascii(b: &mut test::Bencher) {
106-
bench_unescape(b, r"\n", Mode::Str, '\n');
119+
bench_unescape(b, r"\n", '\n', |s, cb| unescape_str(s, cb));
107120
}
108121

109122
#[bench]
110123
fn bench_unescape_str_hex(b: &mut test::Bencher) {
111-
bench_unescape(b, r"\x22", Mode::Str, '"');
124+
bench_unescape(b, r"\x22", '"', |s, cb| unescape_str(s, cb));
112125
}
113126

114127
#[bench]
115128
fn bench_unescape_str_unicode(b: &mut test::Bencher) {
116-
bench_unescape(b, r"\u{1f980}", Mode::Str, '🦀');
129+
bench_unescape(b, r"\u{1f980}", '🦀', |s, cb| unescape_str(s, cb));
117130
}
118131

119132
// byte str
120133

121134
#[bench]
122135
fn bench_unescape_byte_str_trivial(b: &mut test::Bencher) {
123-
bench_unescape(b, r"a", Mode::ByteStr, 'a');
136+
bench_unescape(b, r"a", b'a', |s, cb| unescape_byte_str(s, cb));
124137
}
125138

126139
#[bench]
127140
fn bench_unescape_byte_str_ascii(b: &mut test::Bencher) {
128-
bench_unescape(b, r"\n", Mode::ByteStr, b'\n' as char);
141+
bench_unescape(b, r"\n", b'\n', |s, cb| unescape_byte_str(s, cb));
129142
}
130143

131144
#[bench]
132145
fn bench_unescape_byte_str_hex(b: &mut test::Bencher) {
133-
bench_unescape(b, r"\xff", Mode::ByteStr, b'\xff' as char);
146+
bench_unescape(b, r"\xff", b'\xff', |s, cb| unescape_byte_str(s, cb));
134147
}
135148

136149
// C str
@@ -140,9 +153,7 @@ fn bench_unescape_c_str(b: &mut test::Bencher, s: &str, expected: MixedUnit) {
140153
assert_eq!(input.len(), LEN * s.len());
141154
b.iter(|| {
142155
let mut output = vec![];
143-
unescape_mixed(&input, Mode::CStr, &mut |range, res| {
144-
output.push((range, res))
145-
});
156+
unescape_c_str(&input, &mut |range, res| output.push((range, res)));
146157
assert_eq!(output.len(), LEN);
147158
assert_eq!(output[0], ((0..s.len()), Ok(expected)));
148159
});

src/lib.rs

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,106 @@ impl EscapeError {
8080
}
8181
}
8282

83+
/// Takes the contents of a literal (without quotes)
84+
/// and produces a sequence of errors,
85+
/// which are returned by invoking `error_callback`.
86+
pub fn unescape_for_errors(
87+
src: &str,
88+
mode: Mode,
89+
mut error_callback: impl FnMut(Range<usize>, EscapeError),
90+
) {
91+
match mode {
92+
Char => {
93+
let mut chars = src.chars();
94+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Char) {
95+
error_callback(0..(src.len() - chars.as_str().len()), e);
96+
}
97+
}
98+
Byte => {
99+
let mut chars = src.chars();
100+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Byte) {
101+
error_callback(0..(src.len() - chars.as_str().len()), e);
102+
}
103+
}
104+
Str => unescape_str(src, |range, res| {
105+
if let Err(e) = res {
106+
error_callback(range, e);
107+
}
108+
}),
109+
ByteStr => unescape_byte_str(src, |range, res| {
110+
if let Err(e) = res {
111+
error_callback(range, e);
112+
}
113+
}),
114+
CStr => unescape_c_str(src, |range, res| {
115+
if let Err(e) = res {
116+
error_callback(range, e);
117+
}
118+
}),
119+
RawStr => check_raw_str(src, |range, res| {
120+
if let Err(e) = res {
121+
error_callback(range, e);
122+
}
123+
}),
124+
RawByteStr => check_raw_byte_str(src, |range, res| {
125+
if let Err(e) = res {
126+
error_callback(range, e);
127+
}
128+
}),
129+
RawCStr => check_raw_c_str(src, |range, res| {
130+
if let Err(e) = res {
131+
error_callback(range, e);
132+
}
133+
}),
134+
}
135+
}
136+
137+
pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
138+
unescape_unicode(src, Mode::RawStr, &mut callback)
139+
}
140+
141+
pub fn check_raw_byte_str(
142+
src: &str,
143+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
144+
) {
145+
unescape_unicode(src, Mode::RawByteStr, &mut |r, res| {
146+
callback(r, res.map(byte_from_char))
147+
})
148+
}
149+
150+
pub fn check_raw_c_str(
151+
src: &str,
152+
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
153+
) {
154+
unescape_unicode(src, Mode::RawCStr, &mut callback)
155+
}
156+
157+
pub fn unescape_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
158+
unescape_unicode(src, Mode::Str, &mut callback)
159+
}
160+
161+
pub fn unescape_byte_str(
162+
src: &str,
163+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
164+
) {
165+
unescape_unicode(src, Mode::ByteStr, &mut |r, res| {
166+
callback(r, res.map(byte_from_char))
167+
})
168+
}
169+
170+
pub fn unescape_c_str(
171+
src: &str,
172+
mut callback: impl FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
173+
) {
174+
unescape_mixed(src, Mode::CStr, &mut callback)
175+
}
176+
83177
/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without
84178
/// quotes) and produces a sequence of escaped characters or errors.
85179
///
86180
/// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
87181
/// the callback will be called exactly once.
88-
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
182+
fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
89183
where
90184
F: FnMut(Range<usize>, Result<char, EscapeError>),
91185
{
@@ -147,7 +241,7 @@ impl From<u8> for MixedUnit {
147241
/// a sequence of escaped characters or errors.
148242
///
149243
/// Values are returned by invoking `callback`.
150-
pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
244+
fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
151245
where
152246
F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
153247
{
@@ -444,7 +538,7 @@ where
444538
}
445539

446540
#[inline]
447-
pub fn byte_from_char(c: char) -> u8 {
541+
fn byte_from_char(c: char) -> u8 {
448542
let res = c as u32;
449543
debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
450544
res as u8

src/tests.rs

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,7 @@ fn test_unescape_char_good() {
100100
fn test_unescape_str_warn() {
101101
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
102102
let mut unescaped = Vec::with_capacity(literal.len());
103-
unescape_unicode(literal, Mode::Str, &mut |range, res| {
104-
unescaped.push((range, res))
105-
});
103+
unescape_str(literal, |range, res| unescaped.push((range, res)));
106104
assert_eq!(unescaped, expected);
107105
}
108106

@@ -132,7 +130,7 @@ fn test_unescape_str_warn() {
132130
fn test_unescape_str_good() {
133131
fn check(literal_text: &str, expected: &str) {
134132
let mut buf = Ok(String::with_capacity(literal_text.len()));
135-
unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
133+
unescape_str(literal_text, |range, c| {
136134
if let Ok(b) = &mut buf {
137135
match c {
138136
Ok(c) => b.push(c),
@@ -248,16 +246,16 @@ fn test_unescape_byte_good() {
248246
#[test]
249247
fn test_unescape_byte_str_good() {
250248
fn check(literal_text: &str, expected: &[u8]) {
251-
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
252-
unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
253-
if let Ok(b) = &mut buf {
254-
match c {
255-
Ok(c) => b.push(byte_from_char(c)),
256-
Err(e) => buf = Err((range, e)),
249+
let mut result = Ok(Vec::with_capacity(literal_text.len()));
250+
unescape_byte_str(literal_text, |range, res| {
251+
if let Ok(buf) = &mut result {
252+
match res {
253+
Ok(b) => buf.push(b),
254+
Err(e) => result = Err((range, e)),
257255
}
258256
}
259257
});
260-
assert_eq!(buf.as_deref(), Ok(expected))
258+
assert_eq!(result.as_deref(), Ok(expected))
261259
}
262260

263261
check("foo", b"foo");
@@ -272,9 +270,7 @@ fn test_unescape_byte_str_good() {
272270
fn test_unescape_raw_str() {
273271
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
274272
let mut unescaped = Vec::with_capacity(literal.len());
275-
unescape_unicode(literal, Mode::RawStr, &mut |range, res| {
276-
unescaped.push((range, res))
277-
});
273+
check_raw_str(literal, |range, res| unescaped.push((range, res)));
278274
assert_eq!(unescaped, expected);
279275
}
280276

@@ -293,11 +289,9 @@ fn test_unescape_raw_str() {
293289

294290
#[test]
295291
fn test_unescape_raw_byte_str() {
296-
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
292+
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
297293
let mut unescaped = Vec::with_capacity(literal.len());
298-
unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| {
299-
unescaped.push((range, res))
300-
});
294+
check_raw_byte_str(literal, |range, res| unescaped.push((range, res)));
301295
assert_eq!(unescaped, expected);
302296
}
303297

@@ -310,7 +304,7 @@ fn test_unescape_raw_byte_str() {
310304
"🦀a",
311305
&[
312306
(0..4, Err(EscapeError::NonAsciiCharInByte)),
313-
(4..5, Ok('a')),
307+
(4..5, Ok(b'a')),
314308
],
315309
);
316310
}

0 commit comments

Comments
 (0)