Skip to content

Remove separate syntax heads for each operator #575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: kf/dots
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ JuliaSyntax.is_infix_op_call
JuliaSyntax.is_prefix_op_call
JuliaSyntax.is_postfix_op_call
JuliaSyntax.is_dotted
JuliaSyntax.is_suffixed
JuliaSyntax.is_decorated
JuliaSyntax.numeric_flags
```
Expand Down
7 changes: 5 additions & 2 deletions src/JuliaSyntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ export SourceFile
@_public source_line_range

# Expression predicates, kinds and flags
export @K_str, kind
export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT,
PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND,
PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS,
PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL,
PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN, generic_operators_by_level
@_public Kind

@_public flags,
Expand All @@ -53,7 +57,6 @@ export @K_str, kind
is_prefix_op_call,
is_postfix_op_call,
is_dotted,
is_suffixed,
is_decorated,
numeric_flags,
has_flags,
Expand Down
7 changes: 5 additions & 2 deletions src/core/parse_stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ kind(head::SyntaxHead) = head.kind

Return the flag bits of a syntactic construct. Prefer to query these with the
predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`,
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`,
`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`,
`is_decorated`.

Or extract numeric portion of the flags with `numeric_flags`.
Expand Down Expand Up @@ -376,7 +376,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
was_whitespace = is_whitespace(k)
had_whitespace |= was_whitespace
f = EMPTY_FLAGS
raw.suffix && (f |= SUFFIXED_FLAG)
if k == K"Operator" && raw.op_precedence != Tokenize.PREC_NONE
# Store operator precedence in numeric flags
f |= set_numeric_flags(Int(raw.op_precedence))
end
push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
had_whitespace, raw.endbyte + 2))
token_count += 1
Expand Down
36 changes: 22 additions & 14 deletions src/integration/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -341,20 +341,28 @@ end
elseif k == K"dots"
n = numeric_flags(flags(nodehead))
return n == 2 ? :(..) : :(...)
elseif k == K"op=" && length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif k == K".op=" && length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = '.' * string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif k == K"op="
if length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
elseif length(args) == 1
return Symbol(string(args[1], '='))
end
elseif k == K".op="
if length(args) == 3
lhs = args[1]
op = args[2]
rhs = args[3]
headstr = '.' * string(args[2], '=')
retexpr.head = Symbol(headstr)
retexpr.args = Any[lhs, rhs]
else
return Symbol(string('.', args[1], '='))
end
elseif k == K"macrocall"
if length(args) >= 2
a2 = args[2]
Expand Down
60 changes: 6 additions & 54 deletions src/julia/julia_parse_stream.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# Token flags - may be set for operator kinded tokens
# Operator has a suffix
const SUFFIXED_FLAG = RawFlags(1<<2)

# Set for K"call", K"dotcall" or any syntactic operator heads
# Distinguish various syntaxes which are mapped to K"call"
const PREFIX_CALL_FLAG = RawFlags(0<<3)
Expand Down Expand Up @@ -110,15 +106,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from
"""
is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG


"""
is_suffixed(x)

Return true for operators which have suffixes, such as `+₁`
"""
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)


"""
numeric_flags(x)

Expand Down Expand Up @@ -164,7 +151,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
str *= "-,"
end
end
is_suffixed(head) && (str = str*"-suf")
end
str
end
Expand Down Expand Up @@ -262,45 +248,6 @@ function validate_tokens(stream::ParseStream)
sort!(stream.diagnostics, by=first_byte)
end

"""
bump_split(stream, token_spec1, [token_spec2 ...])

Bump the next token, splitting it into several pieces

Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
If all `nbyte` are positive, the sum must equal the token length. If one
`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
all `nbyte` must equal zero.

This is a hack which helps resolves the occasional lexing ambiguity. For
example
* Whether .+ should be a single token or the composite (. +) which is used for
standalone operators.
* Whether ... is splatting (most of the time) or three . tokens in import paths

TODO: Are these the only cases? Can we replace this general utility with a
simpler one which only splits preceding dots?
"""
function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
tok = stream.lookahead[stream.lookahead_index]
stream.lookahead_index += 1
start_b = _next_byte(stream)
toklen = tok.next_byte - start_b
prev_b = start_b
for (i, (nbyte, k, f)) in enumerate(split_spec)
h = SyntaxHead(k, f)
actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
orig_k = k == K"." ? K"." : kind(tok)
node = RawGreenNode(h, actual_nbyte, orig_k)
push!(stream.output, node)
prev_b += actual_nbyte
stream.next_byte += actual_nbyte
end
@assert tok.next_byte == prev_b
stream.peek_count = 0
return position(stream)
end

function peek_dotted_op_token(ps, allow_whitespace=false)
# Peek the next token, but if it is a dot, peek the next one as well
t = peek_token(ps)
Expand All @@ -318,7 +265,12 @@ function peek_dotted_op_token(ps, allow_whitespace=false)
t = t2
end
end
return (isdotted, t)
isassign = false
if !allow_whitespace && is_operator(t)
t3 = peek_token(ps, 2+isdotted)
isassign = kind(t3) == K"=" && !preceding_whitespace(t3)
end
return (isdotted, isassign, t)
end

function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None")
Expand Down
Loading
Loading