Skip to content

Commit e02f29f

Browse files
Kenoclaude
andauthored
Refactor node-specific flags to overlap with numeric flags (#570)
This change moves node-specific flags (TRIPLE_STRING_FLAG, PARENS_FLAG, etc.) from bits 5-6 to overlap with numeric flags in bits 8-15. This is safe because: 1. Node types that use specific flags never use numeric flags 2. Numeric flags are only used by ncat/nrow nodes, which don't use node-specific flags 3. The parser now passes dimensions separately to avoid flag conflicts Key changes: - Moved most node-specific flags to bit 8, some to bit 9 - Refactored parse_cat/parse_array to return dimension as a separate value - Updated emit_braces to accept dimension parameter - Made untokenize function head-aware for proper flag display - Simplified flag handling by removing conditionals (just OR flags together) - Added tests for dimension 4 ncat to ensure higher dimensions work correctly 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Keno Fischer <Keno@users.noreply.github.com> Co-authored-by: Claude <noreply@anthropic.com>
1 parent c029996 commit e02f29f

File tree

3 files changed

+67
-51
lines changed

3 files changed

+67
-51
lines changed

src/julia/julia_parse_stream.jl

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,54 +9,54 @@ const INFIX_FLAG = RawFlags(1<<3)
99
const PREFIX_OP_FLAG = RawFlags(2<<3)
1010
const POSTFIX_OP_FLAG = RawFlags(3<<3)
1111

12-
# The following flags are quite head-specific and may overlap
12+
# The following flags are quite head-specific and may overlap with numeric flags
1313

1414
"""
1515
Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ```
1616
"""
17-
const TRIPLE_STRING_FLAG = RawFlags(1<<5)
17+
const TRIPLE_STRING_FLAG = RawFlags(1<<8)
1818

1919
"""
2020
Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping
2121
"""
22-
const RAW_STRING_FLAG = RawFlags(1<<6)
22+
const RAW_STRING_FLAG = RawFlags(1<<9)
2323

2424
"""
2525
Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses
2626
"""
27-
const PARENS_FLAG = RawFlags(1<<5)
27+
const PARENS_FLAG = RawFlags(1<<8)
2828

2929
"""
3030
Set for various delimited constructs when they contains a trailing comma. For
3131
example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where
3232
this applies are: `tuple call dotcall macrocall vect curly braces <: >:`.
3333
"""
34-
const TRAILING_COMMA_FLAG = RawFlags(1<<6)
34+
const TRAILING_COMMA_FLAG = RawFlags(1<<9)
3535

3636
"""
3737
Set for K"quote" for the short form `:x` as opposed to long form `quote x end`
3838
"""
39-
const COLON_QUOTE = RawFlags(1<<5)
39+
const COLON_QUOTE = RawFlags(1<<8)
4040

4141
"""
4242
Set for K"toplevel" which is delimited by parentheses
4343
"""
44-
const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5)
44+
const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<8)
4545

4646
"""
4747
Set for K"function" in short form definitions such as `f() = 1`
4848
"""
49-
const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5)
49+
const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<8)
5050

5151
"""
5252
Set for K"struct" when mutable
5353
"""
54-
const MUTABLE_FLAG = RawFlags(1<<5)
54+
const MUTABLE_FLAG = RawFlags(1<<8)
5555

5656
"""
5757
Set for K"module" when it's not bare (`module`, not `baremodule`)
5858
"""
59-
const BARE_MODULE_FLAG = RawFlags(1<<5)
59+
const BARE_MODULE_FLAG = RawFlags(1<<8)
6060

6161
# Flags holding the dimension of an nrow or other UInt8 not held in the source
6262
# TODO: Given this is only used for nrow/ncat, we could actually use all the flags?
@@ -137,29 +137,34 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
137137
is_postfix_op_call(head) && (str = str*"-post")
138138

139139
k = kind(head)
140-
if k in KSet"string cmdstring Identifier"
141-
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
142-
has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
143-
elseif k in KSet"tuple block macrocall"
144-
has_flags(head, PARENS_FLAG) && (str = str*"-p")
145-
elseif k == K"quote"
146-
has_flags(head, COLON_QUOTE) && (str = str*"-:")
147-
elseif k == K"toplevel"
148-
has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
149-
elseif k == K"function"
150-
has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
151-
elseif k == K"struct"
152-
has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
153-
elseif k == K"module"
154-
has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare")
155-
end
156-
if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" &&
157-
has_flags(head, TRAILING_COMMA_FLAG)
158-
str *= "-,"
140+
# Handle numeric flags for nrow/ncat nodes
141+
if k in KSet"nrow ncat typed_ncat"
142+
n = numeric_flags(head)
143+
n != 0 && (str = str*"-"*string(n))
144+
else
145+
# Handle head-specific flags that overlap with numeric flags
146+
if k in KSet"string cmdstring Identifier"
147+
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
148+
has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
149+
elseif k in KSet"tuple block macrocall"
150+
has_flags(head, PARENS_FLAG) && (str = str*"-p")
151+
elseif k == K"quote"
152+
has_flags(head, COLON_QUOTE) && (str = str*"-:")
153+
elseif k == K"toplevel"
154+
has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
155+
elseif k == K"function"
156+
has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
157+
elseif k == K"struct"
158+
has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
159+
elseif k == K"module"
160+
has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare")
161+
end
162+
if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" &&
163+
has_flags(head, TRAILING_COMMA_FLAG)
164+
str *= "-,"
165+
end
159166
end
160167
is_suffixed(head) && (str = str*"-suf")
161-
n = numeric_flags(head)
162-
n != 0 && (str = str*"-"*string(n))
163168
end
164169
str
165170
end

src/julia/parser.jl

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,8 +1098,8 @@ function parse_where_chain(ps0::ParseState, mark)
10981098
# x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys)))))
10991099
m = position(ps)
11001100
bump(ps, TRIVIA_FLAG)
1101-
ckind, cflags = parse_cat(ps, K"}", ps.end_symbol)
1102-
emit_braces(ps, m, ckind, cflags)
1101+
ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
1102+
emit_braces(ps, m, ckind, cflags, dim)
11031103
emit(ps, mark, K"where")
11041104
else
11051105
# x where T ==> (where x T)
@@ -1589,7 +1589,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15891589
# a [i] ==> (ref a (error-t) i)
15901590
bump_disallowed_space(ps)
15911591
bump(ps, TRIVIA_FLAG)
1592-
ckind, cflags = parse_cat(ParseState(ps, end_symbol=true),
1592+
ckind, cflags, dim = parse_cat(ParseState(ps, end_symbol=true),
15931593
K"]", ps.end_symbol)
15941594
if is_macrocall
15951595
# @S[a,b] ==> (macrocall @S (vect a b))
@@ -1600,7 +1600,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16001600
#v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b))
16011601
#v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b)))
16021602
fix_macro_name_kind!(ps, macro_name_position)
1603-
emit(ps, m, ckind, cflags)
1603+
emit(ps, m, ckind, cflags | set_numeric_flags(dim))
16041604
check_ncat_compat(ps, m, ckind)
16051605
emit(ps, mark, K"macrocall")
16061606
is_macrocall = false
@@ -1621,7 +1621,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16211621
ckind == K"comprehension" ? K"typed_comprehension" :
16221622
ckind == K"ncat" ? K"typed_ncat" :
16231623
internal_error("unrecognized kind in parse_cat ", string(ckind))
1624-
emit(ps, mark, outk, cflags)
1624+
emit(ps, mark, outk, cflags | set_numeric_flags(dim))
16251625
check_ncat_compat(ps, mark, ckind)
16261626
end
16271627
elseif k == K"."
@@ -2840,7 +2840,7 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
28402840
if binding_power == typemin(Int)
28412841
# [x@y ==> (hcat x (error-t ✘ y))
28422842
bump_closing_token(ps, closer)
2843-
return (K"hcat", EMPTY_FLAGS)
2843+
return (K"hcat", 0)
28442844
end
28452845
while true
28462846
(next_dim, next_bp) = parse_array_inner(ps, binding_power, array_order)
@@ -2856,9 +2856,9 @@ function parse_array(ps::ParseState, mark, closer, end_is_symbol)
28562856
binding_power = next_bp
28572857
end
28582858
bump_closing_token(ps, closer)
2859-
return binding_power == -1 ? (K"vcat", EMPTY_FLAGS) :
2860-
binding_power == 0 ? (K"hcat", EMPTY_FLAGS) :
2861-
(K"ncat", set_numeric_flags(dim))
2859+
return binding_power == -1 ? (K"vcat", 0) :
2860+
binding_power == 0 ? (K"hcat", 0) :
2861+
(K"ncat", dim)
28622862
end
28632863

28642864
# Parse equal and ascending precedence chains of array concatenation operators -
@@ -3012,7 +3012,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
30123012
mark = position(ps)
30133013
if k == closer
30143014
# [] ==> (vect)
3015-
return parse_vect(ps, closer, false)
3015+
ckind, cflags = parse_vect(ps, closer, false)
3016+
return (ckind, cflags, 0)
30163017
elseif k == K";"
30173018
#v1.8: [;] ==> (ncat-1)
30183019
#v1.8: [;;] ==> (ncat-2)
@@ -3022,7 +3023,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
30223023
dim, _ = parse_array_separator(ps, Ref(:unknown))
30233024
min_supported_version(v"1.8", ps, mark, "empty multidimensional array syntax")
30243025
bump_closing_token(ps, closer)
3025-
return (K"ncat", set_numeric_flags(dim))
3026+
return (K"ncat", EMPTY_FLAGS, dim)
30263027
end
30273028
parse_eq_star(ps)
30283029
k = peek(ps, skip_newlines=true)
@@ -3035,15 +3036,18 @@ function parse_cat(ps::ParseState, closer, end_is_symbol)
30353036
# [x] ==> (vect x)
30363037
# [x \n ] ==> (vect x)
30373038
# [x ==> (vect x (error-t))
3038-
parse_vect(ps, closer, prefix_trailing_comma)
3039+
ckind, cflags = parse_vect(ps, closer, prefix_trailing_comma)
3040+
return (ckind, cflags, 0)
30393041
elseif k == K"for"
30403042
# [x for a in as] ==> (comprehension (generator x (iteration (in a as))))
30413043
# [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as))))
3042-
parse_comprehension(ps, mark, closer)
3044+
ckind, cflags = parse_comprehension(ps, mark, closer)
3045+
return (ckind, cflags, 0)
30433046
else
30443047
# [x y] ==> (hcat x y)
30453048
# and other forms; See parse_array.
3046-
parse_array(ps, mark, closer, end_is_symbol)
3049+
ckind, dim = parse_array(ps, mark, closer, end_is_symbol)
3050+
return (ckind, EMPTY_FLAGS, dim)
30473051
end
30483052
end
30493053

@@ -3448,13 +3452,13 @@ function parse_string(ps::ParseState, raw::Bool)
34483452
emit(ps, mark, string_kind, str_flags)
34493453
end
34503454

3451-
function emit_braces(ps, mark, ckind, cflags)
3455+
function emit_braces(ps, mark, ckind, cflags, dim=0)
34523456
if ckind == K"hcat"
34533457
# {x y} ==> (bracescat (row x y))
34543458
emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG)
34553459
elseif ckind == K"ncat"
34563460
# {x ;;; y} ==> (bracescat (nrow-3 x y))
3457-
emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG)
3461+
emit(ps, mark, K"nrow", set_numeric_flags(dim))
34583462
end
34593463
check_ncat_compat(ps, mark, ckind)
34603464
outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat"
@@ -3638,13 +3642,13 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
36383642
parse_paren(ps, check_identifiers, has_unary_prefix)
36393643
elseif leading_kind == K"[" # cat expression
36403644
bump(ps, TRIVIA_FLAG)
3641-
ckind, cflags = parse_cat(ps, K"]", ps.end_symbol)
3642-
emit(ps, mark, ckind, cflags)
3645+
ckind, cflags, dim = parse_cat(ps, K"]", ps.end_symbol)
3646+
emit(ps, mark, ckind, cflags | set_numeric_flags(dim))
36433647
check_ncat_compat(ps, mark, ckind)
36443648
elseif leading_kind == K"{" # cat expression
36453649
bump(ps, TRIVIA_FLAG)
3646-
ckind, cflags = parse_cat(ps, K"}", ps.end_symbol)
3647-
emit_braces(ps, mark, ckind, cflags)
3650+
ckind, cflags, dim = parse_cat(ps, K"}", ps.end_symbol)
3651+
emit_braces(ps, mark, ckind, cflags, dim)
36483652
elseif leading_kind == K"@" # macro call
36493653
# Macro names can be keywords
36503654
# @end x ==> (macrocall @end x)

test/parser.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,8 @@ tests = [
890890
"{x,y,}" => "(braces-, x y)"
891891
"{x y}" => "(bracescat (row x y))"
892892
((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))"
893+
((v=v"1.7",), "{a ;; b}") => "(bracescat (nrow-2 a b))"
894+
((v=v"1.7",), "{a ;;;; b}") => "(bracescat (nrow-4 a b))"
893895
# Macro names can be keywords
894896
"@end x" => "(macrocall @end x)"
895897
# __dot__ macro
@@ -929,6 +931,11 @@ tests = [
929931
# Column major
930932
((v=v"1.7",), "[x ; y ;; z ; w ;;; a ; b ;; c ; d]") =>
931933
"(ncat-3 (nrow-2 (nrow-1 x y) (nrow-1 z w)) (nrow-2 (nrow-1 a b) (nrow-1 c d)))"
934+
# Dimension 4 ncat
935+
((v=v"1.7",), "[x ;;;; y]") => "(ncat-4 x y)"
936+
((v=v"1.7",), "[a ; b ;;;; c ; d]") => "(ncat-4 (nrow-1 a b) (nrow-1 c d))"
937+
((v=v"1.7",), "[a b ; c d ;;;; e f ; g h]") =>
938+
"(ncat-4 (nrow-1 (row a b) (row c d)) (nrow-1 (row e f) (row g h)))"
932939
# Array separators
933940
# Newlines before semicolons are not significant
934941
"[a \n ;]" => "(vcat a)"

0 commit comments

Comments
 (0)