Skip to content

Commit 4756235

Browse files
authored
[move] Added restricted identifier syntax. New 2024 keywords (#14108)
## Description - Added syntax to make identifiers from any keyword, e.g. ``let `false` = true;`` - `mut` and `enum` are keywords in 2024. - This also adds support for spaces between `&` and `mut`, i.e. `& mut x.f` was not previously parsed ## Test Plan Added new tests --- If your changes are not user-facing and not a breaking change, you can skip the following section. Otherwise, please indicate what changed, and then add to the Release Notes section as highlighted during the release process. ### Type of Change (Check all that apply) - [ ] protocol change - [X] user-visible impact - [ ] breaking change for a client SDKs - [ ] breaking change for FNs (FN binary must upgrade) - [ ] breaking change for validators or node operators (must upgrade binaries) - [ ] breaking change for on-chain data layout - [ ] necessitate either a data wipe or data migration ### Release notes In Move 2024 edition, some previously accepted identifiers will now be keywords. With this change, `mut` and `enum` will be keywords. `mut` as a keyword will allow spaces between `&` and `mut`, which will also cause less ambiguity with the introduction of `let mut` variable declarations. To help with any migrations, syntax has been added to allow for the declarations of identifiers that are keywords, e.g. ``let `struct` = S {};`` or ``let `false` = false;``. More keywords may be reserved in 2024 edition.
1 parent 0ae7448 commit 4756235

27 files changed

+332
-34
lines changed

move-analyzer/src/completion.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@ use crate::{context::Context, symbols::Symbols};
66
use lsp_server::Request;
77
use lsp_types::{CompletionItem, CompletionItemKind, CompletionParams, Position};
88
use move_command_line_common::files::FileHash;
9-
use move_compiler::parser::{
10-
keywords::{BUILTINS, CONTEXTUAL_KEYWORDS, KEYWORDS, PRIMITIVE_TYPES},
11-
lexer::{Lexer, Tok},
9+
use move_compiler::{
10+
editions::SyntaxEdition,
11+
parser::{
12+
keywords::{BUILTINS, CONTEXTUAL_KEYWORDS, KEYWORDS, PRIMITIVE_TYPES},
13+
lexer::{Lexer, Tok},
14+
},
1215
};
1316
use move_symbol_pool::Symbol;
1417
use std::{collections::HashSet, path::PathBuf};
@@ -70,7 +73,8 @@ fn builtins() -> Vec<CompletionItem> {
7073
/// the future, the server should be modified to return semantically valid completion items, not
7174
/// simple textual suggestions.
7275
fn identifiers(buffer: &str, symbols: &Symbols, path: &PathBuf) -> Vec<CompletionItem> {
73-
let mut lexer = Lexer::new(buffer, FileHash::new(buffer));
76+
// TODO thread through package configs
77+
let mut lexer = Lexer::new(buffer, FileHash::new(buffer), SyntaxEdition::Legacy);
7478
if lexer.advance().is_err() {
7579
return vec![];
7680
}

move-compiler/src/diagnostics/codes.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ codes!(
180180
SpecContextRestricted:
181181
{ msg: "syntax item restricted to spec contexts", severity: BlockingError },
182182
InvalidSpecBlockMember: { msg: "invalid spec block member", severity: NonblockingError },
183+
InvalidRestrictedIdentifier:
184+
{ msg: "invalid identifier escape", severity: NonblockingError },
183185
],
184186
// errors for any rules around declaration items
185187
Declarations: [

move-compiler/src/editions/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ pub enum Flavor {
4141
Sui,
4242
}
4343

44+
#[derive(PartialEq, Eq, Clone, Copy, Debug, PartialOrd, Ord)]
45+
pub enum SyntaxEdition {
46+
Legacy,
47+
E2024,
48+
}
49+
4450
//**************************************************************************************************
4551
// Entry
4652
//**************************************************************************************************
@@ -119,6 +125,13 @@ impl Edition {
119125
SUPPORTED_FEATURES.get(self).unwrap().contains(&feature)
120126
}
121127

128+
pub fn syntax(&self) -> SyntaxEdition {
129+
match *self {
130+
Self::LEGACY => SyntaxEdition::Legacy,
131+
Self::E2024_ALPHA => SyntaxEdition::E2024,
132+
_ => self.unknown_edition_panic(),
133+
}
134+
}
122135
// Intended only for implementing the lazy static (supported feature map) above
123136
fn prev(&self) -> Option<Self> {
124137
match *self {

move-compiler/src/parser/lexer.rs

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
// SPDX-License-Identifier: Apache-2.0
44

55
use crate::{
6-
diag, diagnostics::Diagnostic, parser::syntax::make_loc, shared::CompilationEnv,
7-
FileCommentMap, MatchedFileCommentMap,
6+
diag, diagnostics::Diagnostic, editions::SyntaxEdition, parser::syntax::make_loc,
7+
shared::CompilationEnv, FileCommentMap, MatchedFileCommentMap,
88
};
99
use move_command_line_common::files::FileHash;
1010
use move_ir_types::location::Loc;
@@ -80,6 +80,11 @@ pub enum Tok {
8080
Friend,
8181
NumSign,
8282
AtSign,
83+
RestrictedIdentifier,
84+
Mut,
85+
Enum,
86+
Type,
87+
Match,
8388
}
8489

8590
impl fmt::Display for Tok {
@@ -154,6 +159,11 @@ impl fmt::Display for Tok {
154159
Friend => "friend",
155160
NumSign => "#",
156161
AtSign => "@",
162+
RestrictedIdentifier => "r#[Identifier]",
163+
Mut => "mut",
164+
Enum => "enum",
165+
Type => "type",
166+
Match => "match",
157167
};
158168
fmt::Display::fmt(s, formatter)
159169
}
@@ -162,6 +172,7 @@ impl fmt::Display for Tok {
162172
pub struct Lexer<'input> {
163173
text: &'input str,
164174
file_hash: FileHash,
175+
syntax_edition: SyntaxEdition,
165176
doc_comments: FileCommentMap,
166177
matched_doc_comments: MatchedFileCommentMap,
167178
prev_end: usize,
@@ -171,10 +182,15 @@ pub struct Lexer<'input> {
171182
}
172183

173184
impl<'input> Lexer<'input> {
174-
pub fn new(text: &'input str, file_hash: FileHash) -> Lexer<'input> {
185+
pub fn new(
186+
text: &'input str,
187+
file_hash: FileHash,
188+
syntax_edition: SyntaxEdition,
189+
) -> Lexer<'input> {
175190
Lexer {
176191
text,
177192
file_hash,
193+
syntax_edition,
178194
doc_comments: FileCommentMap::new(),
179195
matched_doc_comments: MatchedFileCommentMap::new(),
180196
prev_end: 0,
@@ -316,28 +332,22 @@ impl<'input> Lexer<'input> {
316332

317333
// Look ahead to the next token after the current one and return it, and its starting offset,
318334
// without advancing the state of the lexer.
319-
pub fn lookahead_with_start_loc(&mut self) -> Result<(Tok, usize), Box<Diagnostic>> {
335+
pub fn lookahead(&mut self) -> Result<Tok, Box<Diagnostic>> {
320336
let text = self.trim_whitespace_and_comments(self.cur_end)?;
321337
let next_start = self.text.len() - text.len();
322-
let (tok, _) = find_token(self.file_hash, text, next_start)?;
323-
Ok((tok, next_start))
324-
}
325-
326-
// Look ahead to the next token after the current one and return it without advancing
327-
// the state of the lexer.
328-
pub fn lookahead(&mut self) -> Result<Tok, Box<Diagnostic>> {
329-
Ok(self.lookahead_with_start_loc()?.0)
338+
let (tok, _) = find_token(self.file_hash, self.syntax_edition, text, next_start)?;
339+
Ok(tok)
330340
}
331341

332342
// Look ahead to the next two tokens after the current one and return them without advancing
333343
// the state of the lexer.
334344
pub fn lookahead2(&mut self) -> Result<(Tok, Tok), Box<Diagnostic>> {
335345
let text = self.trim_whitespace_and_comments(self.cur_end)?;
336346
let offset = self.text.len() - text.len();
337-
let (first, length) = find_token(self.file_hash, text, offset)?;
347+
let (first, length) = find_token(self.file_hash, self.syntax_edition, text, offset)?;
338348
let text2 = self.trim_whitespace_and_comments(offset + length)?;
339349
let offset2 = self.text.len() - text2.len();
340-
let (second, _) = find_token(self.file_hash, text2, offset2)?;
350+
let (second, _) = find_token(self.file_hash, self.syntax_edition, text2, offset2)?;
341351
Ok((first, second))
342352
}
343353

@@ -392,7 +402,7 @@ impl<'input> Lexer<'input> {
392402
self.prev_end = self.cur_end;
393403
let text = self.trim_whitespace_and_comments(self.cur_end)?;
394404
self.cur_start = self.text.len() - text.len();
395-
let (token, len) = find_token(self.file_hash, text, self.cur_start)?;
405+
let (token, len) = find_token(self.file_hash, self.syntax_edition, text, self.cur_start)?;
396406
self.cur_end = self.cur_start + len;
397407
self.token = token;
398408
Ok(())
@@ -410,6 +420,7 @@ impl<'input> Lexer<'input> {
410420
// Find the next token and its length without changing the state of the lexer.
411421
fn find_token(
412422
file_hash: FileHash,
423+
syntax_edition: SyntaxEdition,
413424
text: &str,
414425
start_offset: usize,
415426
) -> Result<(Tok, usize), Box<Diagnostic>> {
@@ -433,6 +444,31 @@ fn find_token(
433444
get_decimal_number(text)
434445
}
435446
}
447+
'`' => {
448+
let (is_valid, len) = if (text.len() > 1)
449+
&& matches!(text[1..].chars().next(), Some('A'..='Z' | 'a'..='z' | '_'))
450+
{
451+
let sub = &text[1..];
452+
let len = get_name_len(sub);
453+
if !matches!(text[1 + len..].chars().next(), Some('`')) {
454+
(false, len + 1)
455+
} else {
456+
(true, len + 2)
457+
}
458+
} else {
459+
(false, 1)
460+
};
461+
if !is_valid {
462+
let loc = make_loc(file_hash, start_offset, start_offset + len);
463+
let msg = "Missing closing backtick (`) for restricted identifier escaping";
464+
return Err(Box::new(diag!(
465+
Syntax::InvalidRestrictedIdentifier,
466+
(loc, msg)
467+
)));
468+
} else {
469+
(Tok::RestrictedIdentifier, len)
470+
}
471+
}
436472
'A'..='Z' | 'a'..='z' | '_' => {
437473
let is_hex = text.starts_with("x\"");
438474
if is_hex || text.starts_with("b\"") {
@@ -453,7 +489,7 @@ fn find_token(
453489
}
454490
} else {
455491
let len = get_name_len(text);
456-
(get_name_token(&text[..len]), len)
492+
(get_name_token(syntax_edition, &text[..len]), len)
457493
}
458494
}
459495
'&' => {
@@ -610,7 +646,7 @@ fn get_string_len(text: &str) -> Option<usize> {
610646
None
611647
}
612648

613-
fn get_name_token(name: &str) -> Tok {
649+
fn get_name_token(syntax_edition: SyntaxEdition, name: &str) -> Tok {
614650
match name {
615651
"abort" => Tok::Abort,
616652
"acquires" => Tok::Acquires,
@@ -638,7 +674,17 @@ fn get_name_token(name: &str) -> Tok {
638674
"true" => Tok::True,
639675
"use" => Tok::Use,
640676
"while" => Tok::While,
641-
_ => Tok::Identifier,
677+
_ => match syntax_edition {
678+
SyntaxEdition::Legacy => Tok::Identifier,
679+
// New keywords in the 2024 edition
680+
SyntaxEdition::E2024 => match name {
681+
"mut" => Tok::Mut,
682+
"enum" => Tok::Enum,
683+
"type" => Tok::Type,
684+
"match" => Tok::Match,
685+
_ => Tok::Identifier,
686+
},
687+
},
642688
}
643689
}
644690

move-compiler/src/parser/syntax.rs

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,23 @@ where
287287
// Parse an identifier:
288288
// Identifier = <IdentifierValue>
289289
fn parse_identifier(context: &mut Context) -> Result<Name, Box<Diagnostic>> {
290-
if context.tokens.peek() != Tok::Identifier {
291-
return Err(unexpected_token_error(context.tokens, "an identifier"));
292-
}
290+
if matches!(
291+
context.tokens.peek(),
292+
Tok::Identifier | Tok::RestrictedIdentifier
293+
) {}
294+
let id: Symbol = match context.tokens.peek() {
295+
Tok::Identifier => context.tokens.content().into(),
296+
Tok::RestrictedIdentifier => {
297+
// peel off backticks ``
298+
let content = context.tokens.content();
299+
let peeled = &content[1..content.len() - 1];
300+
peeled.into()
301+
}
302+
_ => {
303+
return Err(unexpected_token_error(context.tokens, "an identifier"));
304+
}
305+
};
293306
let start_loc = context.tokens.start_loc();
294-
let id = context.tokens.content().into();
295307
context.tokens.advance()?;
296308
let end_loc = context.tokens.previous_end_loc();
297309
Ok(spanned(context.tokens.file_hash(), start_loc, end_loc, id))
@@ -329,7 +341,7 @@ fn parse_leading_name_access_<'a, F: FnOnce() -> &'a str>(
329341
item_description: F,
330342
) -> Result<LeadingNameAccess, Box<Diagnostic>> {
331343
match context.tokens.peek() {
332-
Tok::Identifier => {
344+
Tok::RestrictedIdentifier | Tok::Identifier => {
333345
let loc = current_token_loc(context.tokens);
334346
let n = parse_identifier(context)?;
335347
Ok(sp(loc, LeadingNameAccess_::Name(n)))
@@ -659,7 +671,10 @@ fn parse_bind_field(context: &mut Context) -> Result<(Field, Bind), Box<Diagnost
659671
// | <NameAccessChain> <OptionalTypeArgs> "(" Comma<Bind> ")"
660672
fn parse_bind(context: &mut Context) -> Result<Bind, Box<Diagnostic>> {
661673
let start_loc = context.tokens.start_loc();
662-
if context.tokens.peek() == Tok::Identifier {
674+
if matches!(
675+
context.tokens.peek(),
676+
Tok::Identifier | Tok::RestrictedIdentifier
677+
) {
663678
let next_tok = context.tokens.lookahead()?;
664679
if !matches!(
665680
next_tok,
@@ -995,7 +1010,7 @@ fn parse_term(context: &mut Context) -> Result<Exp, Box<Diagnostic>> {
9951010
Exp_::Vector(vec_loc, tys_opt, args)
9961011
}
9971012

998-
Tok::Identifier => parse_name_exp(context)?,
1013+
Tok::Identifier | Tok::RestrictedIdentifier => parse_name_exp(context)?,
9991014

10001015
Tok::NumValue => {
10011016
// Check if this is a ModuleIdent (in a ModuleAccess).
@@ -1284,6 +1299,7 @@ fn at_start_of_exp(context: &mut Context) -> bool {
12841299
| Tok::NumTypedValue
12851300
| Tok::ByteStringValue
12861301
| Tok::Identifier
1302+
| Tok::RestrictedIdentifier
12871303
| Tok::AtSign
12881304
| Tok::Copy
12891305
| Tok::Move
@@ -1477,8 +1493,9 @@ fn parse_unary_exp(context: &mut Context) -> Result<Exp, Box<Diagnostic>> {
14771493
}
14781494
Tok::Amp => {
14791495
context.tokens.advance()?;
1496+
let is_mut = match_token(context.tokens, Tok::Mut)?;
14801497
let e = parse_unary_exp(context)?;
1481-
Exp_::Borrow(false, Box::new(e))
1498+
Exp_::Borrow(is_mut, Box::new(e))
14821499
}
14831500
Tok::Star => {
14841501
context.tokens.advance()?;
@@ -1796,8 +1813,9 @@ fn parse_type(context: &mut Context) -> Result<Type, Box<Diagnostic>> {
17961813
}
17971814
Tok::Amp => {
17981815
context.tokens.advance()?;
1816+
let is_mut = match_token(context.tokens, Tok::Mut)?;
17991817
let t = parse_type(context)?;
1800-
Type_::Ref(false, Box::new(t))
1818+
Type_::Ref(is_mut, Box::new(t))
18011819
}
18021820
Tok::AmpMut => {
18031821
context.tokens.advance()?;
@@ -2866,7 +2884,7 @@ fn parse_spec_block(
28662884
let type_parameters = parse_optional_type_parameters(context)?;
28672885
SpecBlockTarget_::Schema(name, type_parameters)
28682886
}
2869-
Tok::Identifier => {
2887+
Tok::RestrictedIdentifier | Tok::Identifier => {
28702888
let name = parse_identifier(context)?;
28712889
let signature = parse_spec_target_signature_opt(&name.loc, context)?;
28722890
SpecBlockTarget_::Member(name, signature)
@@ -3543,7 +3561,8 @@ pub fn parse_file_string(
35433561
input: &str,
35443562
package: Option<Symbol>,
35453563
) -> Result<(Vec<Definition>, MatchedFileCommentMap), Diagnostics> {
3546-
let mut tokens = Lexer::new(input, file_hash);
3564+
let edition = env.syntax_edition(package);
3565+
let mut tokens = Lexer::new(input, file_hash, edition);
35473566
match tokens.advance() {
35483567
Err(err) => Err(Diagnostics::from(vec![*err])),
35493568
Ok(..) => Ok(()),

move-compiler/src/shared/mod.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use crate::{
99
codes::{Category, Declarations, DiagnosticsID, Severity, UnusedItem, WarningFilter},
1010
Diagnostic, Diagnostics, WarningFilters,
1111
},
12-
editions::{check_feature as edition_check_feature, Edition, FeatureGate, Flavor},
12+
editions::{
13+
check_feature as edition_check_feature, Edition, FeatureGate, Flavor, SyntaxEdition,
14+
},
1315
expansion::ast as E,
1416
naming::ast as N,
1517
naming::ast::ModuleDefinition,
@@ -547,6 +549,10 @@ impl CompilationEnv {
547549
self.package_config(package).edition.supports(feature)
548550
}
549551

552+
pub fn syntax_edition(&self, package: Option<Symbol>) -> SyntaxEdition {
553+
self.package_config(package).edition.syntax()
554+
}
555+
550556
pub fn package_config(&self, package: Option<Symbol>) -> &PackageConfig {
551557
package
552558
.and_then(|p| self.package_configs.get(&p))
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
error[E01002]: unexpected token
2+
┌─ tests/move_2024/parser/enum_keyword.move:1:11
3+
4+
1 │ module a::enum {}
5+
│ ^^^^
6+
│ │
7+
│ Unexpected 'enum'
8+
│ Expected an identifier
9+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module a::enum {}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
error[E01002]: unexpected token
2+
┌─ tests/move_2024/parser/match_keyword.move:2:9
3+
4+
2 │ fun match(_: u64): bool { false }
5+
│ ^^^^^
6+
│ │
7+
│ Unexpected 'match'
8+
│ Expected an identifier
9+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module a::m {
2+
fun match(_: u64): bool { false }
3+
}

0 commit comments

Comments
 (0)