Skip to content

Commit 725d17a

Browse files
committed
ruff_linter: fix handling of unprintable characters
Previously, we were replacing unprintable ASCII characters with a printable representation of them via fancier Unicode characters. Since `annotate-snippets` used to use codepoint offsets, this didn't make our ranges incorrect: we swapped one codepoint for another. But now, with the `annotate-snippets` upgrade, we use byte offsets (which is IMO the correct choice). However, this means our ranges can be thrown off since an ASCII codepoint is always one byte and a non-ASCII codepoint is always more than one byte. Instead of tweaking the `ShowNonprinting` trait and making it more complicated (which is used in places other than this diagnostic rendering it seems), we instead change `replace_whitespace` to handle non-printable characters. This works out because `replace_whitespace` was already updating the annotation range to account for the tab replacement. We copy that approach for unprintable characters.
1 parent 8a8ba10 commit 725d17a

File tree

1 file changed

+46
-19
lines changed
  • crates/ruff_linter/src/message

1 file changed

+46
-19
lines changed

crates/ruff_linter/src/message/text.rs

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use crate::line_width::{IndentWidth, LineWidthBuilder};
1515
use crate::message::diff::Diff;
1616
use crate::message::{Emitter, EmitterContext, Message};
1717
use crate::settings::types::UnsafeFixes;
18-
use crate::text_helpers::ShowNonprinting;
1918

2019
bitflags! {
2120
#[derive(Default)]
@@ -245,13 +244,11 @@ impl Display for MessageCodeFrame<'_> {
245244
let start_offset = source_code.line_start(start_index);
246245
let end_offset = source_code.line_end(end_index);
247246

248-
let source = replace_whitespace(
247+
let source = replace_whitespace_and_unprintable(
249248
source_code.slice(TextRange::new(start_offset, end_offset)),
250249
self.message.range() - start_offset,
251250
);
252251

253-
let source_text = source.text.show_nonprinting();
254-
255252
let label = self
256253
.message
257254
.rule()
@@ -270,7 +267,7 @@ impl Display for MessageCodeFrame<'_> {
270267
let span = usize::from(source.annotation_range.start())
271268
..usize::from(source.annotation_range.end());
272269
let annotation = Level::Error.span(span).label(&label);
273-
let snippet = Snippet::source(&source_text)
270+
let snippet = Snippet::source(&source.text)
274271
.line_start(line_start)
275272
.annotation(annotation)
276273
.fold(false);
@@ -286,38 +283,68 @@ impl Display for MessageCodeFrame<'_> {
286283
}
287284
}
288285

289-
fn replace_whitespace(source: &str, annotation_range: TextRange) -> SourceCode {
286+
/// Given some source code and an annotation range, this routine replaces
287+
/// tabs with ASCII whitespace, and unprintable characters with printable
288+
/// representations of them.
289+
///
290+
/// The source code returned has an annotation that is updated to reflect
291+
/// changes made to the source code (if any).
292+
fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode {
290293
let mut result = String::new();
291294
let mut last_end = 0;
292295
let mut range = annotation_range;
293296
let mut line_width = LineWidthBuilder::new(IndentWidth::default());
294297

298+
// Updates the range given by the caller whenever a single byte (at
299+
// `index` in `source`) is replaced with `len` bytes.
300+
//
301+
// When the index occurs before the start of the range, the range is
302+
// offset by `len`. When the range occurs after or at the start but before
303+
// the end, then the end of the range only is offset by `len`.
304+
let mut update_range = |index, len| {
305+
if index < usize::from(annotation_range.start()) {
306+
range += TextSize::new(len - 1);
307+
} else if index < usize::from(annotation_range.end()) {
308+
range = range.add_end(TextSize::new(len - 1));
309+
}
310+
};
311+
312+
// If `c` is an unprintable character, then this returns a printable
313+
// representation of it (using a fancier Unicode codepoint).
314+
let unprintable_replacement = |c: char| -> Option<char> {
315+
match c {
316+
'\x07' => Some('␇'),
317+
'\x08' => Some('␈'),
318+
'\x1b' => Some('␛'),
319+
'\x7f' => Some('␡'),
320+
_ => None,
321+
}
322+
};
323+
295324
for (index, c) in source.char_indices() {
296325
let old_width = line_width.get();
297326
line_width = line_width.add_char(c);
298327

299328
if matches!(c, '\t') {
300-
// SAFETY: The difference is a value in the range [1..TAB_SIZE] which is guaranteed to be less than `u32`.
301-
#[allow(clippy::cast_possible_truncation)]
302-
let tab_width = (line_width.get() - old_width) as u32;
303-
304-
if index < usize::from(annotation_range.start()) {
305-
range += TextSize::new(tab_width - 1);
306-
} else if index < usize::from(annotation_range.end()) {
307-
range = range.add_end(TextSize::new(tab_width - 1));
308-
}
309-
329+
let tab_width = u32::try_from(line_width.get() - old_width)
330+
.expect("small width because of tab size");
310331
result.push_str(&source[last_end..index]);
311-
312332
for _ in 0..tab_width {
313333
result.push(' ');
314334
}
315-
316335
last_end = index + 1;
336+
update_range(index, tab_width);
337+
} else if let Some(printable) = unprintable_replacement(c) {
338+
result.push_str(&source[last_end..index]);
339+
result.push(printable);
340+
last_end = index + 1;
341+
342+
let len = u32::try_from(printable.len_utf8()).expect("4 or fewer UTF-8 code units");
343+
update_range(index, len);
317344
}
318345
}
319346

320-
// No tabs
347+
// No tabs or unprintable chars
321348
if result.is_empty() {
322349
SourceCode {
323350
annotation_range,

0 commit comments

Comments
 (0)