@@ -862,19 +862,34 @@ pub(crate) struct TagIterator<'a, 'tcx> {
862
862
extra : Option < & ' a ExtraInfo < ' tcx > > ,
863
863
}
864
864
865
- #[ derive( Debug , PartialEq ) ]
866
- pub ( crate ) enum TokenKind < ' a > {
867
- Token ( & ' a str ) ,
868
- Attribute ( & ' a str ) ,
865
+ #[ derive( Clone , Debug , Eq , PartialEq ) ]
866
+ pub ( crate ) enum LangStringToken < ' a > {
867
+ LangToken ( & ' a str ) ,
868
+ ClassAttribute ( & ' a str ) ,
869
+ KeyValueAttribute ( & ' a str , & ' a str ) ,
869
870
}
870
871
872
+ fn is_bareword_char ( c : char ) -> bool {
873
+ c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
874
+ }
871
875
fn is_separator ( c : char ) -> bool {
872
876
c == ' ' || c == ',' || c == '\t'
873
877
}
874
878
879
+ struct Indices {
880
+ start : usize ,
881
+ end : usize ,
882
+ }
883
+
875
884
impl < ' a , ' tcx > TagIterator < ' a , ' tcx > {
876
885
pub ( crate ) fn new ( data : & ' a str , extra : Option < & ' a ExtraInfo < ' tcx > > ) -> Self {
877
- Self { inner : data. char_indices ( ) . peekable ( ) , data, extra, is_in_attribute_block : false }
886
+ Self { inner : data. char_indices ( ) . peekable ( ) , data, is_in_attribute_block : false , extra }
887
+ }
888
+
889
+ fn emit_error ( & self , err : & str ) {
890
+ if let Some ( extra) = self . extra {
891
+ extra. error_invalid_codeblock_attr ( err) ;
892
+ }
878
893
}
879
894
880
895
fn skip_separators ( & mut self ) -> Option < usize > {
@@ -887,84 +902,183 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
887
902
None
888
903
}
889
904
890
- fn emit_error ( & self , err : & str ) {
891
- if let Some ( extra) = self . extra {
892
- extra. error_invalid_codeblock_attr ( err) ;
905
+ fn parse_string ( & mut self , start : usize ) -> Option < Indices > {
906
+ while let Some ( ( pos, c) ) = self . inner . next ( ) {
907
+ if c == '"' {
908
+ return Some ( Indices { start : start + 1 , end : pos } ) ;
909
+ }
893
910
}
911
+ self . emit_error ( "unclosed quote string `\" `" ) ;
912
+ None
894
913
}
895
914
896
- /// Returns false if the string is unfinished.
897
- fn skip_string ( & mut self ) -> bool {
898
- while let Some ( ( _, c) ) = self . inner . next ( ) {
899
- if c == '"' {
900
- return true ;
915
+ fn parse_class ( & mut self , start : usize ) -> Option < LangStringToken < ' a > > {
916
+ while let Some ( ( pos, c) ) = self . inner . peek ( ) . copied ( ) {
917
+ if is_bareword_char ( c) {
918
+ self . inner . next ( ) ;
919
+ } else {
920
+ let class = & self . data [ start + 1 ..pos] ;
921
+ if class. is_empty ( ) {
922
+ self . emit_error ( & format ! ( "unexpected `{c}` character after `.`" ) ) ;
923
+ return None ;
924
+ } else if self . check_after_token ( ) {
925
+ return Some ( LangStringToken :: ClassAttribute ( class) ) ;
926
+ } else {
927
+ return None ;
928
+ }
901
929
}
902
930
}
903
- self . emit_error ( "unclosed quote string: missing `\" ` at the end" ) ;
904
- false
931
+ let class = & self . data [ start + 1 ..] ;
932
+ if class. is_empty ( ) {
933
+ self . emit_error ( "missing character after `.`" ) ;
934
+ None
935
+ } else if self . check_after_token ( ) {
936
+ Some ( LangStringToken :: ClassAttribute ( class) )
937
+ } else {
938
+ None
939
+ }
940
+ }
941
+
942
+ fn parse_token ( & mut self , start : usize ) -> Option < Indices > {
943
+ while let Some ( ( pos, c) ) = self . inner . peek ( ) {
944
+ if !is_bareword_char ( * c) {
945
+ return Some ( Indices { start, end : * pos } ) ;
946
+ }
947
+ self . inner . next ( ) ;
948
+ }
949
+ self . emit_error ( "unexpected end" ) ;
950
+ None
951
+ }
952
+
953
+ fn parse_key_value ( & mut self , c : char , start : usize ) -> Option < LangStringToken < ' a > > {
954
+ let key_indices =
955
+ if c == '"' { self . parse_string ( start) ? } else { self . parse_token ( start) ? } ;
956
+ if key_indices. start == key_indices. end {
957
+ self . emit_error ( "unexpected empty string as key" ) ;
958
+ return None ;
959
+ }
960
+
961
+ if let Some ( ( _, c) ) = self . inner . next ( ) {
962
+ if c != '=' {
963
+ self . emit_error ( & format ! ( "expected `=`, found `{}`" , c) ) ;
964
+ return None ;
965
+ }
966
+ } else {
967
+ self . emit_error ( "unexpected end" ) ;
968
+ return None ;
969
+ }
970
+ let value_indices = match self . inner . next ( ) {
971
+ Some ( ( pos, '"' ) ) => self . parse_string ( pos) ?,
972
+ Some ( ( pos, c) ) if is_bareword_char ( c) => self . parse_token ( pos) ?,
973
+ Some ( ( _, c) ) => {
974
+ self . emit_error ( & format ! ( "unexpected `{c}` character after `=`" ) ) ;
975
+ return None ;
976
+ }
977
+ None => {
978
+ self . emit_error ( "expected value after `=`" ) ;
979
+ return None ;
980
+ }
981
+ } ;
982
+ if value_indices. start == value_indices. end {
983
+ self . emit_error ( "unexpected empty string as value" ) ;
984
+ None
985
+ } else if self . check_after_token ( ) {
986
+ Some ( LangStringToken :: KeyValueAttribute (
987
+ & self . data [ key_indices. start ..key_indices. end ] ,
988
+ & self . data [ value_indices. start ..value_indices. end ] ,
989
+ ) )
990
+ } else {
991
+ None
992
+ }
905
993
}
906
994
907
- fn parse_in_attribute_block ( & mut self , start : usize ) -> Option < TokenKind < ' a > > {
995
+ /// Returns `false` if an error was emitted.
996
+ fn check_after_token ( & mut self ) -> bool {
997
+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) {
998
+ if c == '}' || is_separator ( c) || c == '(' {
999
+ true
1000
+ } else {
1001
+ self . emit_error ( & format ! ( "unexpected `{c}` character" ) ) ;
1002
+ false
1003
+ }
1004
+ } else {
1005
+ // The error will be caught on the next iteration.
1006
+ true
1007
+ }
1008
+ }
1009
+
1010
+ fn parse_in_attribute_block ( & mut self ) -> Option < LangStringToken < ' a > > {
908
1011
while let Some ( ( pos, c) ) = self . inner . next ( ) {
909
- if is_separator ( c) {
910
- return Some ( TokenKind :: Attribute ( & self . data [ start..pos] ) ) ;
911
- } else if c == '{' {
912
- // There shouldn't be a nested block!
913
- self . emit_error ( "unexpected `{` inside attribute block (`{}`)" ) ;
914
- let attr = & self . data [ start..pos] ;
915
- if attr. is_empty ( ) {
916
- return self . next ( ) ;
917
- }
918
- self . inner . next ( ) ;
919
- return Some ( TokenKind :: Attribute ( attr) ) ;
920
- } else if c == '}' {
1012
+ if c == '}' {
921
1013
self . is_in_attribute_block = false ;
922
- let attr = & self . data [ start..pos] ;
923
- if attr. is_empty ( ) {
924
- return self . next ( ) ;
925
- }
926
- return Some ( TokenKind :: Attribute ( attr) ) ;
927
- } else if c == '"' && !self . skip_string ( ) {
1014
+ return self . next ( ) ;
1015
+ } else if c == '.' {
1016
+ return self . parse_class ( pos) ;
1017
+ } else if c == '"' || is_bareword_char ( c) {
1018
+ return self . parse_key_value ( c, pos) ;
1019
+ } else {
1020
+ self . emit_error ( & format ! ( "unexpected character `{c}`" ) ) ;
928
1021
return None ;
929
1022
}
930
1023
}
931
- // Unclosed attribute block!
932
1024
self . emit_error ( "unclosed attribute block (`{}`): missing `}` at the end" ) ;
933
- let token = & self . data [ start..] ;
934
- if token. is_empty ( ) { None } else { Some ( TokenKind :: Attribute ( token) ) }
1025
+ None
935
1026
}
936
1027
937
- fn parse_outside_attribute_block ( & mut self , start : usize ) -> Option < TokenKind < ' a > > {
1028
+ /// Returns `false` if an error was emitted.
1029
+ fn skip_paren_block ( & mut self ) -> bool {
1030
+ while let Some ( ( _, c) ) = self . inner . next ( ) {
1031
+ if c == ')' {
1032
+ return true ;
1033
+ }
1034
+ }
1035
+ self . emit_error ( "unclosed comment: missing `)` at the end" ) ;
1036
+ false
1037
+ }
1038
+
1039
+ fn parse_outside_attribute_block ( & mut self , start : usize ) -> Option < LangStringToken < ' a > > {
938
1040
while let Some ( ( pos, c) ) = self . inner . next ( ) {
939
- if is_separator ( c) {
940
- return Some ( TokenKind :: Token ( & self . data [ start..pos] ) ) ;
1041
+ if c == '"' {
1042
+ if pos != start {
1043
+ self . emit_error ( "expected ` `, `{` or `,` found `\" `" ) ;
1044
+ return None ;
1045
+ }
1046
+ let indices = self . parse_string ( pos) ?;
1047
+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1048
+ self . emit_error ( & format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
1049
+ return None ;
1050
+ }
1051
+ return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
941
1052
} else if c == '{' {
942
1053
self . is_in_attribute_block = true ;
943
- let token = & self . data [ start..pos] ;
944
- if token. is_empty ( ) {
945
- return self . next ( ) ;
1054
+ return self . next ( ) ;
1055
+ } else if is_bareword_char ( c) {
1056
+ continue ;
1057
+ } else if is_separator ( c) {
1058
+ if pos != start {
1059
+ return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
946
1060
}
947
- return Some ( TokenKind :: Token ( token) ) ;
948
- } else if c == '}' {
949
- // We're not in a block so it shouldn't be there!
950
- self . emit_error ( "unexpected `}` outside attribute block (`{}`)" ) ;
951
- let token = & self . data [ start..pos] ;
952
- if token. is_empty ( ) {
953
- return self . next ( ) ;
1061
+ return self . next ( ) ;
1062
+ } else if c == '(' {
1063
+ if !self . skip_paren_block ( ) {
1064
+ return None ;
954
1065
}
955
- self . inner . next ( ) ;
956
- return Some ( TokenKind :: Attribute ( token) ) ;
957
- } else if c == '"' && !self . skip_string ( ) {
1066
+ if pos != start {
1067
+ return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
1068
+ }
1069
+ return self . next ( ) ;
1070
+ } else {
1071
+ self . emit_error ( & format ! ( "unexpected character `{c}`" ) ) ;
958
1072
return None ;
959
1073
}
960
1074
}
961
1075
let token = & self . data [ start..] ;
962
- if token. is_empty ( ) { None } else { Some ( TokenKind :: Token ( token ) ) }
1076
+ if token. is_empty ( ) { None } else { Some ( LangStringToken :: LangToken ( & self . data [ start.. ] ) ) }
963
1077
}
964
1078
}
965
1079
966
1080
impl < ' a , ' tcx > Iterator for TagIterator < ' a , ' tcx > {
967
- type Item = TokenKind < ' a > ;
1081
+ type Item = LangStringToken < ' a > ;
968
1082
969
1083
fn next ( & mut self ) -> Option < Self :: Item > {
970
1084
let Some ( start) = self . skip_separators ( ) else {
@@ -974,7 +1088,7 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
974
1088
return None ;
975
1089
} ;
976
1090
if self . is_in_attribute_block {
977
- self . parse_in_attribute_block ( start )
1091
+ self . parse_in_attribute_block ( )
978
1092
} else {
979
1093
self . parse_outside_attribute_block ( start)
980
1094
}
@@ -999,16 +1113,6 @@ impl Default for LangString {
999
1113
}
1000
1114
}
1001
1115
1002
- fn handle_class ( class : & str , after : & str , data : & mut LangString , extra : Option < & ExtraInfo < ' _ > > ) {
1003
- if class. is_empty ( ) {
1004
- if let Some ( extra) = extra {
1005
- extra. error_invalid_codeblock_attr ( & format ! ( "missing class name after `{after}`" ) ) ;
1006
- }
1007
- } else {
1008
- data. added_classes . push ( class. replace ( '"' , "" ) ) ;
1009
- }
1010
- }
1011
-
1012
1116
impl LangString {
1013
1117
fn parse_without_check (
1014
1118
string : & str ,
@@ -1034,41 +1138,41 @@ impl LangString {
1034
1138
1035
1139
for token in TagIterator :: new ( string, extra) {
1036
1140
match token {
1037
- TokenKind :: Token ( "should_panic" ) => {
1141
+ LangStringToken :: LangToken ( "should_panic" ) => {
1038
1142
data. should_panic = true ;
1039
1143
seen_rust_tags = !seen_other_tags;
1040
1144
}
1041
- TokenKind :: Token ( "no_run" ) => {
1145
+ LangStringToken :: LangToken ( "no_run" ) => {
1042
1146
data. no_run = true ;
1043
1147
seen_rust_tags = !seen_other_tags;
1044
1148
}
1045
- TokenKind :: Token ( "ignore" ) => {
1149
+ LangStringToken :: LangToken ( "ignore" ) => {
1046
1150
data. ignore = Ignore :: All ;
1047
1151
seen_rust_tags = !seen_other_tags;
1048
1152
}
1049
- TokenKind :: Token ( x) if x. starts_with ( "ignore-" ) => {
1153
+ LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
1050
1154
if enable_per_target_ignores {
1051
1155
ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
1052
1156
seen_rust_tags = !seen_other_tags;
1053
1157
}
1054
1158
}
1055
- TokenKind :: Token ( "rust" ) => {
1159
+ LangStringToken :: LangToken ( "rust" ) => {
1056
1160
data. rust = true ;
1057
1161
seen_rust_tags = true ;
1058
1162
}
1059
- TokenKind :: Token ( "test_harness" ) => {
1163
+ LangStringToken :: LangToken ( "test_harness" ) => {
1060
1164
data. test_harness = true ;
1061
1165
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1062
1166
}
1063
- TokenKind :: Token ( "compile_fail" ) => {
1167
+ LangStringToken :: LangToken ( "compile_fail" ) => {
1064
1168
data. compile_fail = true ;
1065
1169
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1066
1170
data. no_run = true ;
1067
1171
}
1068
- TokenKind :: Token ( x) if x. starts_with ( "edition" ) => {
1172
+ LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
1069
1173
data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
1070
1174
}
1071
- TokenKind :: Token ( x)
1175
+ LangStringToken :: LangToken ( x)
1072
1176
if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
1073
1177
{
1074
1178
if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
@@ -1078,7 +1182,7 @@ impl LangString {
1078
1182
seen_other_tags = true ;
1079
1183
}
1080
1184
}
1081
- TokenKind :: Token ( x) if extra. is_some ( ) => {
1185
+ LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
1082
1186
let s = x. to_lowercase ( ) ;
1083
1187
if let Some ( ( flag, help) ) = if s == "compile-fail"
1084
1188
|| s == "compile_fail"
@@ -1120,22 +1224,24 @@ impl LangString {
1120
1224
seen_other_tags = true ;
1121
1225
data. unknown . push ( x. to_owned ( ) ) ;
1122
1226
}
1123
- TokenKind :: Token ( x) => {
1227
+ LangStringToken :: LangToken ( x) => {
1124
1228
seen_other_tags = true ;
1125
1229
data. unknown . push ( x. to_owned ( ) ) ;
1126
1230
}
1127
- TokenKind :: Attribute ( attr ) => {
1231
+ LangStringToken :: KeyValueAttribute ( key , value ) => {
1128
1232
seen_other_tags = true ;
1129
- if let Some ( class) = attr. strip_prefix ( '.' ) {
1130
- handle_class ( class, "." , & mut data, extra) ;
1131
- } else if let Some ( class) = attr. strip_prefix ( "class=" ) {
1132
- handle_class ( class, "class=" , & mut data, extra) ;
1233
+ if key == "class" {
1234
+ data. added_classes . push ( value. to_owned ( ) ) ;
1133
1235
} else if let Some ( extra) = extra {
1134
1236
extra. error_invalid_codeblock_attr ( & format ! (
1135
- "unsupported attribute `{attr }`"
1237
+ "unsupported attribute `{key }`"
1136
1238
) ) ;
1137
1239
}
1138
1240
}
1241
+ LangStringToken :: ClassAttribute ( class) => {
1242
+ seen_other_tags = true ;
1243
+ data. added_classes . push ( class. to_owned ( ) ) ;
1244
+ }
1139
1245
}
1140
1246
}
1141
1247
0 commit comments