Skip to content

Commit 25c95c7

Browse files
committed
Deal with non-UTF-8 bytes after valid UTF-8 bytes
If the program outputs a little bit of UTF-8 content and then a bunch on non-UTF-8 content, it's possible that we'd have 4 or more incomplete bytes. This means that the previous assertion was overzealous.
1 parent 44fc838 commit 25c95c7

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

compiler/base/orchestrator/src/worker.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,11 +1087,11 @@ where
10871087

10881088
// We can't parse any UTF-8
10891089
if valid_utf_8_bytes == 0 {
1090+
// This should be enough bytes to get one UTF-8 character.
1091+
ensure!(n_valid < 4, InvalidUtf8Snafu);
1092+
10901093
// We aren't going to get any more input
10911094
ensure!(n_read != 0, RanOutOfInputSnafu);
1092-
1093-
// This should be enough bytes to get one UTF-8 character.
1094-
ensure!(n_valid < 4, InvalidUtf8Snafu)
10951095
}
10961096

10971097
// Safety: We just calculated the number of valid UTF-8 bytes
@@ -1106,11 +1106,6 @@ where
11061106
self.buffer.copy_within(valid_utf_8_bytes..n_valid, 0);
11071107

11081108
self.n_incomplete = n_valid - valid_utf_8_bytes;
1109-
assert!(
1110-
self.n_incomplete < 4,
1111-
"Should never have 4 or more incomplete bytes, had {}",
1112-
self.n_incomplete,
1113-
);
11141109

11151110
if !s.is_empty() {
11161111
return Ok(Some(s));
@@ -1217,6 +1212,18 @@ mod test {
12171212
assert!(!buffer.reader.is_empty());
12181213
}
12191214

1215+
#[tokio::test]
1216+
async fn valid_followed_by_invalid_utf8() {
1217+
let bytes = [b'A', 0xc3, 0x28, 0xc3, 0x28, 0xc3, 0x28];
1218+
1219+
let reader = FixedAsyncRead::success_exact([bytes]);
1220+
let mut buffer = Utf8BufReader::new(reader);
1221+
1222+
assert_matches!(buffer.next().await, Ok(Some(s)) => s == "A");
1223+
assert_matches!(buffer.next().await, Err(Utf8BufReaderError::InvalidUtf8));
1224+
assert!(buffer.reader.is_empty());
1225+
}
1226+
12201227
#[tokio::test]
12211228
async fn split_across_responses() {
12221229
let bytes: [u8; 12] = "🙂🙂🙂".as_bytes().try_into().unwrap();

0 commit comments

Comments
 (0)