Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 7681f63

Browse files
Implement new eBNF for codeblock attributes
1 parent 4ce17fa commit 7681f63

8 files changed

+373
-196
lines changed

src/librustdoc/html/markdown.rs

Lines changed: 189 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -862,19 +862,34 @@ pub(crate) struct TagIterator<'a, 'tcx> {
862862
extra: Option<&'a ExtraInfo<'tcx>>,
863863
}
864864

865-
#[derive(Debug, PartialEq)]
866-
pub(crate) enum TokenKind<'a> {
867-
Token(&'a str),
868-
Attribute(&'a str),
865+
#[derive(Clone, Debug, Eq, PartialEq)]
866+
pub(crate) enum LangStringToken<'a> {
867+
LangToken(&'a str),
868+
ClassAttribute(&'a str),
869+
KeyValueAttribute(&'a str, &'a str),
869870
}
870871

872+
fn is_bareword_char(c: char) -> bool {
873+
c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
874+
}
871875
fn is_separator(c: char) -> bool {
872876
c == ' ' || c == ',' || c == '\t'
873877
}
874878

879+
struct Indices {
880+
start: usize,
881+
end: usize,
882+
}
883+
875884
impl<'a, 'tcx> TagIterator<'a, 'tcx> {
876885
pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
877-
Self { inner: data.char_indices().peekable(), data, extra, is_in_attribute_block: false }
886+
Self { inner: data.char_indices().peekable(), data, is_in_attribute_block: false, extra }
887+
}
888+
889+
fn emit_error(&self, err: &str) {
890+
if let Some(extra) = self.extra {
891+
extra.error_invalid_codeblock_attr(err);
892+
}
878893
}
879894

880895
fn skip_separators(&mut self) -> Option<usize> {
@@ -887,84 +902,183 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
887902
None
888903
}
889904

890-
fn emit_error(&self, err: &str) {
891-
if let Some(extra) = self.extra {
892-
extra.error_invalid_codeblock_attr(err);
905+
fn parse_string(&mut self, start: usize) -> Option<Indices> {
906+
while let Some((pos, c)) = self.inner.next() {
907+
if c == '"' {
908+
return Some(Indices { start: start + 1, end: pos });
909+
}
893910
}
911+
self.emit_error("unclosed quote string `\"`");
912+
None
894913
}
895914

896-
/// Returns false if the string is unfinished.
897-
fn skip_string(&mut self) -> bool {
898-
while let Some((_, c)) = self.inner.next() {
899-
if c == '"' {
900-
return true;
915+
fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
916+
while let Some((pos, c)) = self.inner.peek().copied() {
917+
if is_bareword_char(c) {
918+
self.inner.next();
919+
} else {
920+
let class = &self.data[start + 1..pos];
921+
if class.is_empty() {
922+
self.emit_error(&format!("unexpected `{c}` character after `.`"));
923+
return None;
924+
} else if self.check_after_token() {
925+
return Some(LangStringToken::ClassAttribute(class));
926+
} else {
927+
return None;
928+
}
901929
}
902930
}
903-
self.emit_error("unclosed quote string: missing `\"` at the end");
904-
false
931+
let class = &self.data[start + 1..];
932+
if class.is_empty() {
933+
self.emit_error("missing character after `.`");
934+
None
935+
} else if self.check_after_token() {
936+
Some(LangStringToken::ClassAttribute(class))
937+
} else {
938+
None
939+
}
940+
}
941+
942+
fn parse_token(&mut self, start: usize) -> Option<Indices> {
943+
while let Some((pos, c)) = self.inner.peek() {
944+
if !is_bareword_char(*c) {
945+
return Some(Indices { start, end: *pos });
946+
}
947+
self.inner.next();
948+
}
949+
self.emit_error("unexpected end");
950+
None
951+
}
952+
953+
fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
954+
let key_indices =
955+
if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
956+
if key_indices.start == key_indices.end {
957+
self.emit_error("unexpected empty string as key");
958+
return None;
959+
}
960+
961+
if let Some((_, c)) = self.inner.next() {
962+
if c != '=' {
963+
self.emit_error(&format!("expected `=`, found `{}`", c));
964+
return None;
965+
}
966+
} else {
967+
self.emit_error("unexpected end");
968+
return None;
969+
}
970+
let value_indices = match self.inner.next() {
971+
Some((pos, '"')) => self.parse_string(pos)?,
972+
Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
973+
Some((_, c)) => {
974+
self.emit_error(&format!("unexpected `{c}` character after `=`"));
975+
return None;
976+
}
977+
None => {
978+
self.emit_error("expected value after `=`");
979+
return None;
980+
}
981+
};
982+
if value_indices.start == value_indices.end {
983+
self.emit_error("unexpected empty string as value");
984+
None
985+
} else if self.check_after_token() {
986+
Some(LangStringToken::KeyValueAttribute(
987+
&self.data[key_indices.start..key_indices.end],
988+
&self.data[value_indices.start..value_indices.end],
989+
))
990+
} else {
991+
None
992+
}
905993
}
906994

907-
fn parse_in_attribute_block(&mut self, start: usize) -> Option<TokenKind<'a>> {
995+
/// Returns `false` if an error was emitted.
996+
fn check_after_token(&mut self) -> bool {
997+
if let Some((_, c)) = self.inner.peek().copied() {
998+
if c == '}' || is_separator(c) || c == '(' {
999+
true
1000+
} else {
1001+
self.emit_error(&format!("unexpected `{c}` character"));
1002+
false
1003+
}
1004+
} else {
1005+
// The error will be caught on the next iteration.
1006+
true
1007+
}
1008+
}
1009+
1010+
fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
9081011
while let Some((pos, c)) = self.inner.next() {
909-
if is_separator(c) {
910-
return Some(TokenKind::Attribute(&self.data[start..pos]));
911-
} else if c == '{' {
912-
// There shouldn't be a nested block!
913-
self.emit_error("unexpected `{` inside attribute block (`{}`)");
914-
let attr = &self.data[start..pos];
915-
if attr.is_empty() {
916-
return self.next();
917-
}
918-
self.inner.next();
919-
return Some(TokenKind::Attribute(attr));
920-
} else if c == '}' {
1012+
if c == '}' {
9211013
self.is_in_attribute_block = false;
922-
let attr = &self.data[start..pos];
923-
if attr.is_empty() {
924-
return self.next();
925-
}
926-
return Some(TokenKind::Attribute(attr));
927-
} else if c == '"' && !self.skip_string() {
1014+
return self.next();
1015+
} else if c == '.' {
1016+
return self.parse_class(pos);
1017+
} else if c == '"' || is_bareword_char(c) {
1018+
return self.parse_key_value(c, pos);
1019+
} else {
1020+
self.emit_error(&format!("unexpected character `{c}`"));
9281021
return None;
9291022
}
9301023
}
931-
// Unclosed attribute block!
9321024
self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
933-
let token = &self.data[start..];
934-
if token.is_empty() { None } else { Some(TokenKind::Attribute(token)) }
1025+
None
9351026
}
9361027

937-
fn parse_outside_attribute_block(&mut self, start: usize) -> Option<TokenKind<'a>> {
1028+
/// Returns `false` if an error was emitted.
1029+
fn skip_paren_block(&mut self) -> bool {
1030+
while let Some((_, c)) = self.inner.next() {
1031+
if c == ')' {
1032+
return true;
1033+
}
1034+
}
1035+
self.emit_error("unclosed comment: missing `)` at the end");
1036+
false
1037+
}
1038+
1039+
fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
9381040
while let Some((pos, c)) = self.inner.next() {
939-
if is_separator(c) {
940-
return Some(TokenKind::Token(&self.data[start..pos]));
1041+
if c == '"' {
1042+
if pos != start {
1043+
self.emit_error("expected ` `, `{` or `,` found `\"`");
1044+
return None;
1045+
}
1046+
let indices = self.parse_string(pos)?;
1047+
if let Some((_, c)) = self.inner.peek().copied() && c != '{' && !is_separator(c) && c != '(' {
1048+
self.emit_error(&format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1049+
return None;
1050+
}
1051+
return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
9411052
} else if c == '{' {
9421053
self.is_in_attribute_block = true;
943-
let token = &self.data[start..pos];
944-
if token.is_empty() {
945-
return self.next();
1054+
return self.next();
1055+
} else if is_bareword_char(c) {
1056+
continue;
1057+
} else if is_separator(c) {
1058+
if pos != start {
1059+
return Some(LangStringToken::LangToken(&self.data[start..pos]));
9461060
}
947-
return Some(TokenKind::Token(token));
948-
} else if c == '}' {
949-
// We're not in a block so it shouldn't be there!
950-
self.emit_error("unexpected `}` outside attribute block (`{}`)");
951-
let token = &self.data[start..pos];
952-
if token.is_empty() {
953-
return self.next();
1061+
return self.next();
1062+
} else if c == '(' {
1063+
if !self.skip_paren_block() {
1064+
return None;
9541065
}
955-
self.inner.next();
956-
return Some(TokenKind::Attribute(token));
957-
} else if c == '"' && !self.skip_string() {
1066+
if pos != start {
1067+
return Some(LangStringToken::LangToken(&self.data[start..pos]));
1068+
}
1069+
return self.next();
1070+
} else {
1071+
self.emit_error(&format!("unexpected character `{c}`"));
9581072
return None;
9591073
}
9601074
}
9611075
let token = &self.data[start..];
962-
if token.is_empty() { None } else { Some(TokenKind::Token(token)) }
1076+
if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
9631077
}
9641078
}
9651079

9661080
impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
967-
type Item = TokenKind<'a>;
1081+
type Item = LangStringToken<'a>;
9681082

9691083
fn next(&mut self) -> Option<Self::Item> {
9701084
let Some(start) = self.skip_separators() else {
@@ -974,7 +1088,7 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
9741088
return None;
9751089
};
9761090
if self.is_in_attribute_block {
977-
self.parse_in_attribute_block(start)
1091+
self.parse_in_attribute_block()
9781092
} else {
9791093
self.parse_outside_attribute_block(start)
9801094
}
@@ -999,16 +1113,6 @@ impl Default for LangString {
9991113
}
10001114
}
10011115

1002-
fn handle_class(class: &str, after: &str, data: &mut LangString, extra: Option<&ExtraInfo<'_>>) {
1003-
if class.is_empty() {
1004-
if let Some(extra) = extra {
1005-
extra.error_invalid_codeblock_attr(&format!("missing class name after `{after}`"));
1006-
}
1007-
} else {
1008-
data.added_classes.push(class.replace('"', ""));
1009-
}
1010-
}
1011-
10121116
impl LangString {
10131117
fn parse_without_check(
10141118
string: &str,
@@ -1034,41 +1138,41 @@ impl LangString {
10341138

10351139
for token in TagIterator::new(string, extra) {
10361140
match token {
1037-
TokenKind::Token("should_panic") => {
1141+
LangStringToken::LangToken("should_panic") => {
10381142
data.should_panic = true;
10391143
seen_rust_tags = !seen_other_tags;
10401144
}
1041-
TokenKind::Token("no_run") => {
1145+
LangStringToken::LangToken("no_run") => {
10421146
data.no_run = true;
10431147
seen_rust_tags = !seen_other_tags;
10441148
}
1045-
TokenKind::Token("ignore") => {
1149+
LangStringToken::LangToken("ignore") => {
10461150
data.ignore = Ignore::All;
10471151
seen_rust_tags = !seen_other_tags;
10481152
}
1049-
TokenKind::Token(x) if x.starts_with("ignore-") => {
1153+
LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
10501154
if enable_per_target_ignores {
10511155
ignores.push(x.trim_start_matches("ignore-").to_owned());
10521156
seen_rust_tags = !seen_other_tags;
10531157
}
10541158
}
1055-
TokenKind::Token("rust") => {
1159+
LangStringToken::LangToken("rust") => {
10561160
data.rust = true;
10571161
seen_rust_tags = true;
10581162
}
1059-
TokenKind::Token("test_harness") => {
1163+
LangStringToken::LangToken("test_harness") => {
10601164
data.test_harness = true;
10611165
seen_rust_tags = !seen_other_tags || seen_rust_tags;
10621166
}
1063-
TokenKind::Token("compile_fail") => {
1167+
LangStringToken::LangToken("compile_fail") => {
10641168
data.compile_fail = true;
10651169
seen_rust_tags = !seen_other_tags || seen_rust_tags;
10661170
data.no_run = true;
10671171
}
1068-
TokenKind::Token(x) if x.starts_with("edition") => {
1172+
LangStringToken::LangToken(x) if x.starts_with("edition") => {
10691173
data.edition = x[7..].parse::<Edition>().ok();
10701174
}
1071-
TokenKind::Token(x)
1175+
LangStringToken::LangToken(x)
10721176
if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
10731177
{
10741178
if x[1..].parse::<u32>().is_ok() {
@@ -1078,7 +1182,7 @@ impl LangString {
10781182
seen_other_tags = true;
10791183
}
10801184
}
1081-
TokenKind::Token(x) if extra.is_some() => {
1185+
LangStringToken::LangToken(x) if extra.is_some() => {
10821186
let s = x.to_lowercase();
10831187
if let Some((flag, help)) = if s == "compile-fail"
10841188
|| s == "compile_fail"
@@ -1120,22 +1224,24 @@ impl LangString {
11201224
seen_other_tags = true;
11211225
data.unknown.push(x.to_owned());
11221226
}
1123-
TokenKind::Token(x) => {
1227+
LangStringToken::LangToken(x) => {
11241228
seen_other_tags = true;
11251229
data.unknown.push(x.to_owned());
11261230
}
1127-
TokenKind::Attribute(attr) => {
1231+
LangStringToken::KeyValueAttribute(key, value) => {
11281232
seen_other_tags = true;
1129-
if let Some(class) = attr.strip_prefix('.') {
1130-
handle_class(class, ".", &mut data, extra);
1131-
} else if let Some(class) = attr.strip_prefix("class=") {
1132-
handle_class(class, "class=", &mut data, extra);
1233+
if key == "class" {
1234+
data.added_classes.push(value.to_owned());
11331235
} else if let Some(extra) = extra {
11341236
extra.error_invalid_codeblock_attr(&format!(
1135-
"unsupported attribute `{attr}`"
1237+
"unsupported attribute `{key}`"
11361238
));
11371239
}
11381240
}
1241+
LangStringToken::ClassAttribute(class) => {
1242+
seen_other_tags = true;
1243+
data.added_classes.push(class.to_owned());
1244+
}
11391245
}
11401246
}
11411247

0 commit comments

Comments
 (0)