Skip to content

Stop emitting K".." and K"..." in lexer #573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/integration/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ end
return adjust_macro_name!(retexpr.args[1], k)
elseif k == K"?"
retexpr.head = :if
elseif k == K"dots"
n = numeric_flags(flags(nodehead))
return n == 2 ? :(..) : :(...)
elseif k == K"op=" && length(args) == 3
lhs = args[1]
op = args[2]
Expand Down
23 changes: 14 additions & 9 deletions src/julia/julia_parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
is_postfix_op_call(head) && (str = str*"-post")

k = kind(head)
# Handle numeric flags for nrow/ncat nodes
if k in KSet"nrow ncat typed_ncat"
# Handle numeric flags for nodes that take them
if k in KSet"nrow ncat typed_ncat dots"
n = numeric_flags(head)
n != 0 && (str = str*"-"*string(n))
else
Expand Down Expand Up @@ -307,7 +307,12 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
isdotted = kind(t) == K"."
if isdotted
t2 = peek_token(ps, 2)
if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2))
if (!allow_whitespace && preceding_whitespace(t2))
isdotted = false
elseif !is_operator(t2)
isdotted = false
elseif kind(t2) == K"." && peek(ps, 3) == K"."
# Treat `..` as dotted K".", unless there's another dot after
isdotted = false
else
t = t2
Expand All @@ -316,13 +321,13 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
return (isdotted, t)
end

function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
if isdot
if emit_dot_node
dotmark = position(ps)
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
else
bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG
dotmark = position(ps)
bump(ps, TRIVIA_FLAG)
if kind(t) == K"."
bump(ps, TRIVIA_FLAG)
return emit(ps, dotmark, K"dots", set_numeric_flags(2))
end
end
pos = bump(ps, flags, remap_kind=remap_kind)
Expand Down
7 changes: 4 additions & 3 deletions src/julia/kinds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,6 @@ register_kinds!(JuliaSyntax, 0, [
"ErrorInvalidOperator"
"Error**"

"..."

# Level 1
"BEGIN_ASSIGNMENTS"
"BEGIN_SYNTACTIC_ASSIGNMENTS"
Expand Down Expand Up @@ -774,7 +772,6 @@ register_kinds!(JuliaSyntax, 0, [
# Level 8
"BEGIN_COLON"
":"
".."
"…"
"⁝"
"⋮"
Expand Down Expand Up @@ -1033,6 +1030,10 @@ register_kinds!(JuliaSyntax, 0, [
"typed_ncat"
"row"
"nrow"
# splat/slurp
"..."
# ../... as a identifier
"dots"
# Comprehensions
"generator"
"filter"
Expand Down
74 changes: 45 additions & 29 deletions src/julia/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ function parse_RtoL(ps::ParseState, down, is_op, self)
down(ps)
isdot, tk = peek_dotted_op_token(ps)
if is_op(tk)
bump_dotted(ps, isdot, remap_kind=K"Identifier")
bump_dotted(ps, isdot, tk, remap_kind=K"Identifier")
self(ps)
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
end
Expand Down Expand Up @@ -598,7 +598,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
# a .~ b ==> (dotcall-i a ~ b)
# [a ~ b c] ==> (hcat (call-i a ~ b) c)
# [a~b] ==> (vect (call-i a ~ b))
bump_dotted(ps, isdot, remap_kind=K"Identifier")
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
bump_trivia(ps)
parse_assignment(ps, down)
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
Expand All @@ -617,7 +617,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
(-1, K"Identifier", EMPTY_FLAGS), # op
(1, K"=", TRIVIA_FLAG))
else
bump_dotted(ps, isdot, TRIVIA_FLAG)
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
end
bump_trivia(ps)
# Syntax Edition TODO: We'd like to call `down` here when
Expand Down Expand Up @@ -743,7 +743,7 @@ function parse_arrow(ps::ParseState)
# x <--> y ==> (call-i x <--> y)
# x .--> y ==> (dotcall-i x --> y)
# x -->₁ y ==> (call-i x -->₁ y)
bump_dotted(ps, isdot, remap_kind=K"Identifier")
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
parse_arrow(ps)
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
end
Expand Down Expand Up @@ -771,7 +771,7 @@ function parse_lazy_cond(ps::ParseState, down, is_op, self)
(isdot, t) = peek_dotted_op_token(ps)
k = kind(t)
if is_op(k)
bump_dotted(ps, isdot, TRIVIA_FLAG)
bump_dotted(ps, isdot, t, TRIVIA_FLAG)
self(ps)
emit(ps, mark, isdot ? dotted(k) : k, flags(t))
if isdot
Expand Down Expand Up @@ -819,7 +819,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false)
while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t))
n_comparisons += 1
op_dotted = isdot
op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier")
op_pos = bump_dotted(ps, isdot, t, emit_dot_node=true, remap_kind=K"Identifier")
parse_pipe_lt(ps)
end
if n_comparisons == 1
Expand Down Expand Up @@ -873,15 +873,16 @@ end
function parse_range(ps::ParseState)
mark = position(ps)
parse_invalid_ops(ps)

(initial_dot, initial_tok) = peek_dotted_op_token(ps)
initial_kind = kind(initial_tok)
if initial_kind != K":" && is_prec_colon(initial_kind)
# a..b ==> (call-i a .. b)
if initial_kind != K":" && (is_prec_colon(initial_kind) || (initial_dot && initial_kind == K"."))
# a..b ==> (call-i a (dots-2) b)
# a … b ==> (call-i a … b)
# a .… b ==> (dotcall-i a … b)
bump_dotted(ps, initial_dot, remap_kind=K"Identifier")
bump_dotted(ps, initial_dot, initial_tok, remap_kind=K"Identifier")
parse_invalid_ops(ps)
emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG)
emit(ps, mark, (initial_dot && initial_kind != K".") ? K"dotcall" : K"call", INFIX_FLAG)
elseif initial_kind == K":" && ps.range_colon_enabled
# a ? b : c:d ==> (? a b (call-i c : d))
n_colons = 0
Expand Down Expand Up @@ -948,8 +949,10 @@ function parse_range(ps::ParseState)
# x... ==> (... x)
# x:y... ==> (... (call-i x : y))
# x..y... ==> (... (call-i x .. y)) # flisp parser fails here
if peek(ps) == K"..."
if peek(ps) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
bump(ps, TRIVIA_FLAG)
bump(ps, TRIVIA_FLAG) # second dot
bump(ps, TRIVIA_FLAG) # third dot
emit(ps, mark, K"...")
end
end
Expand All @@ -965,7 +968,7 @@ function parse_invalid_ops(ps::ParseState)
parse_expr(ps)
while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**")
bump_trivia(ps)
bump_dotted(ps, isdot)
bump_dotted(ps, isdot, t)
parse_expr(ps)
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
end
Expand Down Expand Up @@ -1006,7 +1009,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
# [x+y + z] ==> (vect (call-i x + y z))
break
end
bump_dotted(ps, isdot, remap_kind=K"Identifier")
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
down(ps)
if kind(t) in chain_ops && !is_suffixed(t) && !isdot
# a + b + c ==> (call-i a + b c)
Expand Down Expand Up @@ -1258,7 +1261,7 @@ function parse_unary(ps::ParseState)
#
# (The flisp parser only considers commas before `;` and thus gets this
# last case wrong)
op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
op_pos = bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")

space_before_paren = preceding_whitespace(t2)
if space_before_paren
Expand Down Expand Up @@ -1352,12 +1355,12 @@ function parse_unary(ps::ParseState)
# -0x1 ==> (call-pre - 0x01)
# - 2 ==> (call-pre - 2)
# .-2 ==> (dotcall-pre - 2)
op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier")
op_pos = bump_dotted(ps, op_dotted, op_t, remap_kind=K"Identifier")
else
# /x ==> (call-pre (error /) x)
# +₁ x ==> (call-pre (error +₁) x)
# .<: x ==> (dotcall-pre (error (. <:)) x)
bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier")
bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier")
op_pos = emit(ps, mark, K"error", error="not a unary operator")
end
parse_unary(ps)
Expand Down Expand Up @@ -1388,7 +1391,7 @@ end
function parse_factor_with_initial_ex(ps::ParseState, mark)
parse_decl_with_initial_ex(ps, mark)
if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t)))
bump_dotted(ps, isdot, remap_kind=K"Identifier")
bump_dotted(ps, isdot, t, remap_kind=K"Identifier")
parse_factor_after(ps)
emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG)
end
Expand Down Expand Up @@ -2476,11 +2479,11 @@ function parse_import_atsym(ps::ParseState, allow_quotes=true)
end
end
b = peek_behind(ps, pos)
if warn_parens && b.orig_kind != K".."
if warn_parens && b.kind != K"dots"
emit_diagnostic(ps, mark, warning="parentheses are not required here")
end
ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) ||
(!b.is_leaf && b.kind in KSet"$ var")
(!b.is_leaf && (b.kind in KSet"$ var" || b.kind == K"dots"))
if !ok
emit(ps, mark, K"error", error="expected identifier")
end
Expand Down Expand Up @@ -2589,10 +2592,6 @@ function parse_import_path(ps::ParseState)
end
if k == K"."
bump(ps)
elseif k == K".."
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
elseif k == K"..."
bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS))
else
break
end
Expand All @@ -2611,6 +2610,17 @@ function parse_import_path(ps::ParseState)
# import A.⋆.f ==> (import (importpath A ⋆ f))
next_tok = peek_token(ps, 2)
if is_operator(kind(next_tok))
if kind(next_tok) == K"." && peek(ps, 3) == K"."
# Import the .. operator
# import A... ==> (import (importpath A (dots-2)))
bump_disallowed_space(ps)
bump(ps, TRIVIA_FLAG)
dotmark = position(ps)
bump(ps, TRIVIA_FLAG)
bump(ps, TRIVIA_FLAG)
emit(ps, dotmark, K"dots", set_numeric_flags(2))
continue
end
if preceding_whitespace(t)
# Whitespace in import path allowed but discouraged
# import A .== ==> (import (importpath A ==))
Expand All @@ -2623,10 +2633,6 @@ function parse_import_path(ps::ParseState)
end
bump(ps, TRIVIA_FLAG)
parse_import_atsym(ps)
elseif k == K"..."
# Import the .. operator
# import A... ==> (import (importpath A ..))
bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS))
elseif k in KSet"NewlineWs ; , : EndMarker"
# import A; B ==> (import (importpath A))
break
Expand Down Expand Up @@ -3496,6 +3502,16 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
# . ==> (error .)
emit(ps, mark, K"error", error="invalid identifier")
end
elseif kind(leading_tok) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"."
# ...
bump(ps, TRIVIA_FLAG)
bump(ps, TRIVIA_FLAG)
bump(ps, TRIVIA_FLAG)
emit(ps, mark, K"dots", set_numeric_flags(3))
if check_identifiers
# ... ==> (error ...)
emit(ps, mark, K"error", error="invalid identifier")
end
elseif is_error(leading_kind)
# Errors for bad tokens are emitted in validate_tokens() rather than
# here.
Expand Down Expand Up @@ -3583,9 +3599,9 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal
@label is_operator
# + ==> +
# .+ ==> (. +)
bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind=
bump_dotted(ps, leading_dot, leading_tok, emit_dot_node=true, remap_kind=
is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier")
if check_identifiers && !is_valid_identifier(leading_kind)
if check_identifiers && !(is_valid_identifier(leading_kind) || (leading_dot && leading_kind == K"."))
# += ==> (error (op= +))
# ? ==> (error ?)
# .+= ==> (error (. (op= +)))
Expand Down
20 changes: 7 additions & 13 deletions src/julia/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ end
function optakessuffix(k)
(K"BEGIN_OPS" <= k <= K"END_OPS") &&
!(
k == K"..." ||
K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" ||
k == K"?" ||
k == K"<:" ||
Expand All @@ -165,7 +164,6 @@ function optakessuffix(k)
k == K"≔" ||
k == K"⩴" ||
k == K":" ||
k == K".." ||
k == K"$" ||
k == K"::" ||
k == K"where" ||
Expand Down Expand Up @@ -987,7 +985,7 @@ function lex_digit(l::Lexer, kind)
pc,ppc = dpeekchar(l)
if pc == '.'
if ppc == '.'
# Number followed by K".." or K"..."
# Number followed by K"."
return emit(l, kind)
elseif kind === K"Float"
# If we enter the function with kind == K"Float" then a '.' has been parsed.
Expand Down Expand Up @@ -1166,23 +1164,19 @@ function lex_backslash(l::Lexer)
end

function lex_dot(l::Lexer)
if accept(l, '.')
if l.last_token == K"@"
if accept(l, '.')
l.last_token == K"@" && return emit(l, K"Identifier")
return emit(l, K"...")
else
if is_dottable_operator_start_char(peekchar(l))
if !accept(l, '.') && is_dottable_operator_start_char(peekchar(l))
readchar(l)
return emit(l, K"ErrorInvalidOperator")
else
l.last_token == K"@" && return emit(l, K"Identifier")
return emit(l, K"..")
end
end
elseif Base.isdigit(peekchar(l))
# Emit `.`, `..` and `...` as identifiers after `@`
emit(l, K"Identifier")
elseif l.last_token != K"." && Base.isdigit(peekchar(l))
# Only start a numeric constant if the previous token wasn't a dot
return lex_digit(l, K"Float")
else
l.last_token == K"@" && return emit(l, K"Identifier")
return emit(l, K".")
end
end
Expand Down
2 changes: 2 additions & 0 deletions test/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
@test parseatom(":(a)") == QuoteNode(:a)
@test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a))
@test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2))
@test parseatom(":...") == QuoteNode(Symbol("..."))
@test parseatom(":(...)") == QuoteNode(Symbol("..."))
# Compatibility hack for VERSION >= v"1.4"
# https://github.com/JuliaLang/julia/pull/34077
@test parseatom(":true") == Expr(:quote, true)
Expand Down
Loading
Loading