Skip to content

Commit 63bee39

Browse files
authored
Tweak macro name representation (#572)
The current representation for macro names is a bit peculiar. When the parser encounters `@a`, it treats `@` as notation for the macrocall and then `reset_node!`'s (which itself may be considered a bit of a code smell) the `a` to a special MacroName token kind that (when translated back to julia Expr) implicitly adds back the `@`. Things get even more peculiar with `@var"a"` where only the token inside the string macro gets reset. One particular consequence of this is JuliaLang/julia#58885, because our translation back to Expr does not check the RAW_STRING_FLAG (whereas the translation for K"Identifier" does). A second issue is that we currently parse `@A.b.c` and `A.b.@c` to the same SyntaxTree (of course the green tree is different). We aren't currently being super precise about the required invariants for syntax trees, but in general it would be desirable for non-trivia notation (like `@`) to be precisely recoverable from the tree, which is not the case here. This is especially annoying because there are syntax cases that are errors for one of these, but not the other (e.g. `@A.b.$` is an error, but `A.B.@$` is allowed). Now, I think the wisdom of some of those syntax choices can be debated, but that is the situation we face. So this PR tries to clean that all up a bit by: - Replacing the terminal K"MacroName" by a non-terminal K"macro_name". With this form, `@A.c` parses as `(macro_name (. A c))` while `A.@c` parses as `(. A (macro_name c))`. - (In particular the `@` notation is now always associated with the macro_name). - Emitting the dots in `@..` and `@...` as direct identifier tokens rather than having to reset them back. - Adjusting everything else accordingly. Partially written by Claude Code, though it had some trouble with the actual code changes.
1 parent e02f29f commit 63bee39

File tree

9 files changed

+271
-253
lines changed

9 files changed

+271
-253
lines changed

src/integration/expr.jl

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,6 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
246246
val isa UInt128 ? Symbol("@uint128_str") :
247247
Symbol("@big_str")
248248
return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
249-
elseif k == K"MacroName" && val === Symbol("@.")
250-
return Symbol("@__dot__")
251249
else
252250
return val
253251
end
@@ -296,7 +294,31 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
296294
nodehead, source)
297295
end
298296

299-
# Split out from the above for codesize reasons, to avoid specialization on multiple
297+
function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind)
298+
if !(retexpr isa Symbol)
299+
retexpr::Expr
300+
# can happen for incomplete or errors
301+
(length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr
302+
arg2 = retexpr.args[2]
303+
isa(arg2, QuoteNode) || return retexpr
304+
retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k))
305+
return retexpr
306+
end
307+
if k == K"macro_name"
308+
if retexpr === Symbol(".")
309+
return Symbol("@__dot__")
310+
else
311+
return Symbol("@$retexpr")
312+
end
313+
elseif k == K"macro_name_cmd"
314+
return Symbol("@$(retexpr)_cmd")
315+
else
316+
@assert k == K"macro_name_str"
317+
return Symbol("@$(retexpr)_str")
318+
end
319+
end
320+
321+
# Split out from `node_to_expr` for codesize reasons, to avoid specialization on multiple
300322
# tree types.
301323
@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode,
302324
srcrange::UnitRange{UInt32},
@@ -312,6 +334,8 @@ end
312334
# However, errors can add additional errors tokens which we represent
313335
# as e.g. `Expr(:var, ..., Expr(:error))`.
314336
return retexpr.args[1]
337+
elseif k in KSet"macro_name macro_name_cmd macro_name_str"
338+
return adjust_macro_name!(retexpr.args[1], k)
315339
elseif k == K"?"
316340
retexpr.head = :if
317341
elseif k == K"op=" && length(args) == 3
@@ -331,7 +355,7 @@ end
331355
elseif k == K"macrocall"
332356
if length(args) >= 2
333357
a2 = args[2]
334-
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName"
358+
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd"
335359
# Fix up for custom cmd macros like foo`x`
336360
args[2] = a2.args[3]
337361
end

src/julia/kinds.jl

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -194,15 +194,6 @@ register_kinds!(JuliaSyntax, 0, [
194194
"BEGIN_IDENTIFIERS"
195195
"Identifier"
196196
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
197-
# Macro names are modelled as special kinds of identifiers because the full
198-
# macro name may not appear as characters in the source: The `@` may be
199-
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
200-
# suffix appended.
201-
"BEGIN_MACRO_NAMES"
202-
"MacroName"
203-
"StringMacroName"
204-
"CmdMacroName"
205-
"END_MACRO_NAMES"
206197
"END_IDENTIFIERS"
207198

208199
"BEGIN_KEYWORDS"
@@ -1048,6 +1039,10 @@ register_kinds!(JuliaSyntax, 0, [
10481039
"iteration"
10491040
"comprehension"
10501041
"typed_comprehension"
1042+
# Macro names
1043+
"macro_name"
1044+
"macro_name_cmd"
1045+
"macro_name_str"
10511046
# Container for a single statement/atom plus any trivia and errors
10521047
"wrapper"
10531048
"END_SYNTAX_KINDS"
@@ -1111,10 +1106,6 @@ const _nonunique_kind_names = Set([
11111106
K"String"
11121107
K"Char"
11131108
K"CmdString"
1114-
1115-
K"MacroName"
1116-
K"StringMacroName"
1117-
K"CmdMacroName"
11181109
])
11191110

11201111
"""
@@ -1201,7 +1192,6 @@ is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS"
12011192
is_prec_pipe_lt(x) = kind(x) == K"<|"
12021193
is_prec_pipe_gt(x) = kind(x) == K"|>"
12031194
is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS"
1204-
is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES"
12051195
is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS"
12061196

12071197
function is_string_delim(x)

src/julia/literal_parsing.jl

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -430,12 +430,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
430430
Symbol(normalize_identifier(val_str))
431431
elseif k == K"error"
432432
ErrorVal()
433-
elseif k == K"MacroName"
434-
Symbol("@$(normalize_identifier(val_str))")
435-
elseif k == K"StringMacroName"
436-
Symbol("@$(normalize_identifier(val_str))_str")
437-
elseif k == K"CmdMacroName"
438-
Symbol("@$(normalize_identifier(val_str))_cmd")
439433
elseif is_syntax_kind(head)
440434
nothing
441435
elseif is_keyword(k)

0 commit comments

Comments
 (0)