From 1cf7c1e1fd61806b37a2f3aefcdf75f4a2430143 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 21:55:42 +0900 Subject: [PATCH 01/27] Split mark and span from `src/tokens.jl` --- src/YAML.jl | 2 ++ src/mark.jl | 10 ++++++++++ src/span.jl | 5 +++++ src/tokens.jl | 21 --------------------- 4 files changed, 17 insertions(+), 21 deletions(-) create mode 100644 src/mark.jl create mode 100644 src/span.jl diff --git a/src/YAML.jl b/src/YAML.jl index 7e38f0e..7df8868 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -29,6 +29,8 @@ using StringEncodings include("queue.jl") include("buffered_input.jl") +include("mark.jl") +include("span.jl") include("tokens.jl") include("scanner.jl") include("events.jl") diff --git a/src/mark.jl b/src/mark.jl new file mode 100644 index 0000000..d715a74 --- /dev/null +++ b/src/mark.jl @@ -0,0 +1,10 @@ +# Position within the document being parsed +struct Mark + index::UInt64 + line::UInt64 + column::UInt64 +end + +function show(io::IO, mark::Mark) + @printf(io, "line %d, column %d", mark.line, mark.column) +end diff --git a/src/span.jl b/src/span.jl new file mode 100644 index 0000000..0be0017 --- /dev/null +++ b/src/span.jl @@ -0,0 +1,5 @@ +# Where in the stream a particular token lies. +struct Span + start_mark::Mark + end_mark::Mark +end diff --git a/src/tokens.jl b/src/tokens.jl index 0b279de..a5e3d85 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -1,24 +1,3 @@ - -# Position within the document being parsed -struct Mark - index::UInt64 - line::UInt64 - column::UInt64 -end - - -function show(io::IO, mark::Mark) - @printf(io, "line %d, column %d", mark.line, mark.column) -end - - -# Where in the stream a particular token lies. -struct Span - start_mark::Mark - end_mark::Mark -end - - # YAML Tokens. # Each token must include at minimum member "span::Span". abstract type Token end From 60a1ece0ec2911a0b3fa427bc3c93e541b50d59c Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 22:02:52 +0900 Subject: [PATCH 02/27] Add iteration and indexing to `Span`. --- src/span.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/span.jl b/src/span.jl index 0be0017..7b566b5 100644 --- a/src/span.jl +++ b/src/span.jl @@ -1,5 +1,19 @@ +import Base: + iterate, eltype, length, getindex, + firstindex, lastindex, first, last + # Where in the stream a particular token lies. struct Span start_mark::Mark end_mark::Mark end + +iterate(span::Span, i::Real=1) = i > 2 ? nothing : (getfield(span, i), i + 1) +eltype(span::Span) = Mark +length(span::Span) = 2 +getindex(span::Span, i::Int) = getfield(span, i) +getindex(span::Span, i::Real) = getfield(span, convert(Int, i)) +firstindex(span::Span) = 1 +lastindex(span::Span) = 2 +first(span::Span) = span.start_mark +last(span::Span) = span.end_mark From f93dbb219504f5f922c5d8c8b7cea7993284ca27 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 22:08:11 +0900 Subject: [PATCH 03/27] Add methods to get start mark and end mark from tokens. --- src/tokens.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tokens.jl b/src/tokens.jl index a5e3d85..2f65f6e 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -1,7 +1,10 @@ # YAML Tokens. -# Each token must include at minimum member "span::Span". -abstract type Token end +abstract type Token + # span::Span +end +firstmark(token::Token) = first(token.span) +lastmark(token::Token) = last(token.span) # The '%YAML' directive. struct DirectiveToken <: Token From 70698b345419d63e1bb77645aea25bd72c717f56 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 22:15:24 +0900 Subject: [PATCH 04/27] Replace `token.span.start_mark` and `token.span.end_mark` to `firstmark(token)` and `lastmark(token)`. --- src/parser.jl | 100 +++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index ece2432..8f1f2e8 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -126,7 +126,7 @@ end function parse_stream_start(stream::EventStream) token = forward!(stream.input) :: StreamStartToken - event = StreamStartEvent(token.span.start_mark, token.span.end_mark, + event = StreamStartEvent(firstmark(token), lastmark(token), token.encoding) stream.state = parse_implicit_document_start event @@ -142,7 +142,7 @@ function parse_implicit_document_start(stream::EventStream) end if !(token isa Union{DirectiveToken, DocumentStartToken, StreamEndToken}) stream.tag_handles = DEFAULT_TAGS - event = DocumentStartEvent(token.span.start_mark, token.span.start_mark, + event = DocumentStartEvent(firstmark(token), firstmark(token), false) push!(stream.states, parse_document_end) @@ -170,14 +170,14 @@ function parse_document_start(stream::EventStream) # Parse explicit document. if !(token isa StreamEndToken) - start_mark = token.span.start_mark + start_mark = firstmark(token) version, tags = process_directives(stream) if !(peek(stream.input) isa DocumentStartToken) throw(ParserError(nothing, nothing, "expected '' but found $(typeof(token))")) end token = forward!(stream.input) - event = DocumentStartEvent(start_mark, token.span.end_mark, + event = DocumentStartEvent(start_mark, lastmark(token), true, version, tags) push!(stream.states, parse_document_end) stream.state = parse_document_content @@ -185,7 +185,7 @@ function parse_document_start(stream::EventStream) else # Parse the end of the stream token = forward!(stream.input) - event = StreamEndEvent(token.span.start_mark, token.span.end_mark) + event = StreamEndEvent(firstmark(token), lastmark(token)) @assert isempty(stream.states) @assert isempty(stream.marks) stream.state = nothing @@ -196,14 +196,14 @@ end function parse_document_end(stream::EventStream) token = peek(stream.input) - start_mark = end_mark = token.span.start_mark + start_mark = end_mark = firstmark(token) explicit = false if token isa DocumentEndToken forward!(stream.input) - end_mark = token.span.end_mark + end_mark = lastmark(token) explicit = true - stream.end_of_stream = StreamEndEvent(token.span.start_mark, - token.span.end_mark) + stream.end_of_stream = StreamEndEvent(firstmark(token), + lastmark(token)) end event = DocumentEndEvent(start_mark, end_mark, explicit) stream.state = parse_document_start @@ -240,12 +240,12 @@ end function _parse_node(token::AliasToken, stream::EventStream, block, indentless_sequence) forward!(stream.input) stream.state = pop!(stream.states) - return AliasEvent(token.span.start_mark, token.span.end_mark, token.value) + return AliasEvent(firstmark(token), lastmark(token), token.value) end function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) forward!(stream.input) - end_mark = token.span.end_mark + end_mark = lastmark(token) if (token.plain && tag === nothing) || tag == "!" implicit = true, false elseif tag === nothing @@ -259,14 +259,14 @@ function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark end function __parse_node(token::FlowSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - end_mark = token.span.end_mark + end_mark = lastmark(token) stream.state = parse_flow_sequence_first_entry SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, true) end function __parse_node(token::FlowMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - end_mark = token.span.end_mark + end_mark = lastmark(token) stream.state = parse_flow_mapping_first_key MappingStartEvent(start_mark, end_mark, anchor, tag, implicit, true) @@ -274,7 +274,7 @@ end function __parse_node(token::BlockSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing - end_mark = token.span.start_mark + end_mark = firstmark(token) stream.state = parse_block_sequence_first_entry SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, false) @@ -282,7 +282,7 @@ end function __parse_node(token::BlockMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing - end_mark = token.span.start_mark + end_mark = firstmark(token) stream.state = parse_block_mapping_first_key MappingStartEvent(start_mark, end_mark, anchor, tag, implicit, false) @@ -297,7 +297,7 @@ function __parse_node(token, stream::EventStream, block, start_mark, end_mark, a node = block ? "block" : "flow" throw(ParserError("while parsing a $(node) node", start_mark, "expected the node content, but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end @@ -307,20 +307,20 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) start_mark = end_mark = tag_mark = nothing if token isa AnchorToken forward!(stream.input) - start_mark = token.span.start_mark - end_mark = token.span.end_mark + start_mark = firstmark(token) + end_mark = lastmark(token) anchor = token.value token = peek(stream.input) if token isa TagToken forward!(stream.input) - tag_mark = token.span.start_mark - end_mark = token.span.end_mark + tag_mark = firstmark(token) + end_mark = lastmark(token) tag = token.value end elseif token isa TagToken forward!(stream.input) - start_mark = token.span.start_mark - end_mark = token.span.end_mark + start_mark = firstmark(token) + end_mark = lastmark(token) tag = token.value token = peek(stream.input) if token isa AnchorToken @@ -346,13 +346,13 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) token = peek(stream.input) if start_mark === nothing - start_mark = end_mark = token.span.start_mark + start_mark = end_mark = firstmark(token) end event = nothing implicit = tag === nothing || tag == "!" if indentless_sequence && token isa BlockEntryToken - end_mark = token.span.end_mark + end_mark = lastmark(token) stream.state = parse_indentless_sequence_entry event = SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, false) @@ -370,7 +370,7 @@ end function parse_block_sequence_first_entry(stream::EventStream) token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) + push!(stream.marks, firstmark(token)) parse_block_sequence_entry(stream) end @@ -384,20 +384,20 @@ function parse_block_sequence_entry(stream::EventStream) return parse_block_node(stream) else stream.state = parse_block_sequence_entry - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end if !(token isa BlockEndToken) throw(ParserError("while parsing a block collection", stream.marks[end], "expected , but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end forward!(stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end @@ -410,18 +410,18 @@ function parse_indentless_sequence_entry(stream::EventStream) return parse_block_node(stream) else stream.state = parse_indentless_sequence_entry - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end function parse_block_mapping_first_key(stream::EventStream) token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) + push!(stream.marks, firstmark(token)) parse_block_mapping_key(stream) end @@ -435,20 +435,20 @@ function parse_block_mapping_key(stream::EventStream) return parse_block_node_or_indentless_sequence(stream) else stream.state = parse_block_mapping_value - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end if !(token isa BlockEndToken) throw(ParserError("while parsing a block mapping", stream.marks[end], "expected , but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end forward!(stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + MappingEndEvent(firstmark(token), lastmark(token)) end @@ -461,18 +461,18 @@ function parse_block_mapping_value(stream::EventStream) parse_block_node_or_indentless_sequence(stream) else stream.state = parse_block_mapping_key - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_block_mapping_key - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end function parse_flow_sequence_first_entry(stream::EventStream) token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) + push!(stream.marks, firstmark(token)) parse_flow_sequence_entry(stream, true) end @@ -480,7 +480,7 @@ function _parse_flow_sequence_entry(token::FlowSequenceEndToken, stream::EventSt forward!(stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry=false) @@ -491,14 +491,14 @@ function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry throw(ParserError("while parsing a flow sequence", stream.marks[end], "expected ',' or ']', but got $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end token = peek(stream.input) if isa(token, KeyToken) stream.state = parse_flow_sequence_entry_mapping_key - MappingStartEvent(token.span.start_mark, token.span.end_mark, + MappingStartEvent(firstmark(token), lastmark(token), nothing, nothing, true, true) elseif isa(token, FlowSequenceEndToken) nothing @@ -520,7 +520,7 @@ function parse_flow_sequence_entry_mapping_key(stream::EventStream) parse_flow_node(stream) else stream.state = parse_flow_sequence_entry_mapping_value - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end end @@ -534,11 +534,11 @@ function parse_flow_sequence_entry_mapping_value(stream::EventStream) parse_flow_node(stream) else stream.state = parse_flow_sequence_entry_mapping_end - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_flow_sequence_entry_mapping_end - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end @@ -546,13 +546,13 @@ end function parse_flow_sequence_entry_mapping_end(stream::EventStream) stream.state = parse_flow_sequence_entry token = peek(stream.input) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + MappingEndEvent(firstmark(token), lastmark(token)) end function parse_flow_mapping_first_key(stream::EventStream) token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) + push!(stream.marks, firstmark(token)) parse_flow_mapping_key(stream, true) end @@ -567,7 +567,7 @@ function parse_flow_mapping_key(stream::EventStream, first_entry=false) throw(ParserError("while parsing a flow mapping", stream.marks[end], "expected ',' or '}', but got $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end @@ -579,7 +579,7 @@ function parse_flow_mapping_key(stream::EventStream, first_entry=false) return parse_flow_node(stream) else stream.state = parse_flow_mapping_value - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end elseif !(token isa FlowMappingEndToken) push!(stream.states, parse_flow_mapping_empty_value) @@ -590,7 +590,7 @@ function parse_flow_mapping_key(stream::EventStream, first_entry=false) forward!(stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + MappingEndEvent(firstmark(token), lastmark(token)) end @@ -603,11 +603,11 @@ function parse_flow_mapping_value(stream::EventStream) parse_flow_node(stream) else stream.state = parse_flow_mapping_key - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_flow_mapping_key - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end From b4cd9f1f950ba7b577528882857c1fa4b5e62a27 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 22:27:05 +0900 Subject: [PATCH 05/27] Fix existing bugs about getting marks from tokens. --- src/parser.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index 8f1f2e8..8044ad6 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -87,20 +87,20 @@ function process_directives(stream::EventStream) if stream.yaml_version !== nothing throw(ParserError(nothing, nothing, "found duplicate YAML directive", - token.start_mark)) + firstmark(token))) end major, minor = token.value if major != 1 throw(ParserError(nothing, nothing, "found incompatible YAML document (version 1.* is required)", - token.start_mark)) + firstmark(token))) end stream.yaml_version = token.value elseif token.name == "TAG" handle, prefix = token.value if haskey(stream.tag_handles, handle) throw(ParserError(nothing, nothing, - "duplicate tag handle $(handle)", token.start_mark)) + "duplicate tag handle $(handle)", firstmark(token))) end stream.tag_handles[handle] = prefix end @@ -325,7 +325,7 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) token = peek(stream.input) if token isa AnchorToken forward!(stream.input) - end_mark = token.end_mark + end_mark = lastmark(token) anchor = token.value end end From 6efb3d52aee5bdc963fa674856c68891aaf94dca Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 14 Jun 2024 22:30:35 +0900 Subject: [PATCH 06/27] Replace remained expressions. --- src/parser.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index 8044ad6..7ff4086 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -213,7 +213,7 @@ end function parse_document_content(stream::EventStream) if peek(stream.input) isa Union{DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken} - event = process_empty_scalar(stream, peek(stream.input).span.start_mark) + event = process_empty_scalar(stream, firstmark(peek(stream.input))) stream.state = pop!(stream.states) event else @@ -614,7 +614,7 @@ end function parse_flow_mapping_empty_value(stream::EventStream) stream.state = parse_flow_mapping_key - process_empty_scalar(stream, peek(stream.input).span.start_mark) + process_empty_scalar(stream, firstmark(peek(stream.input))) end From 1c9a97daf054c43cca3823c9ae243ba9a9496c40 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Sat, 15 Jun 2024 19:40:34 +0900 Subject: [PATCH 07/27] Refactoring of `scan_line_break`. * Performance improvement of `scan_line_break`. * Rename `scan_line_break` to `yaml_1_1_scan_line_break`. * Add `yaml_1_2_scan_line_break`. * Add better comments. * Add a TODO comment about possible bugs. --- src/scanner.jl | 108 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 28 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 9f59888..60b032b 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -757,31 +757,83 @@ end # If the stream is at a line break, advance past it. # -# Returns: -# '\r\n' : '\n' -# '\r' : '\n' -# '\n' : '\n' -# '\x85' : '\n' -# '\u2028' : '\u2028' -# '\u2029 : '\u2029' -# default : '' +# YAML 1.1 # -function scan_line_break(stream::TokenStream) - if in(peek(stream.input), "\r\n\u0085") - if prefix(stream.input, 2) == "\r\n" +# [22] b-line-feed ::= #xA /*LF*/ +# [23] b-carriage-return ::= #xD /*CR*/ +# [24] b-next-line ::= #x85 /*NEL*/ +# [25] b-line-separator ::= #x2028 /*LS*/ +# [26] b-paragraph-separator ::= #x2029 /*PS*/ +# [28] b-specific ::= b-line-separator | b-paragraph-separator +# [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line +# [30] b-as-line-feed ::= b-generic +# [31] b-normalized ::= b-as-line-feed | b-specific +# +# U+000D U+000A → U+000A +# U+000D → U+000A +# U+000A → U+000A +# U+0085 → U+000A +# U+2028 → U+2028 +# U+2029 → U+2029 +# otherwise → (empty) +# +function yaml_1_1_scan_line_break(stream::TokenStream)::String + c = peek(stream.input) + if c == '\u000d' + # TODO: + # This seems better for performance but gives errors and I don't know why. + # Perhaps, `prefx(stream.input, 2)` modifies `stream` and eventually escapes from an error. + # if peek(stream.input, 1) == '\u000a' + # forwardchars!(stream, 2) + # else + # forwardchars!(stream) + # end + if prefix(stream.input, 2) == "\u000d\u000a" forwardchars!(stream, 2) else forwardchars!(stream) end - return "\n" - elseif in(peek(stream.input), "\u2028\u2029") - ch = peek(stream.input) + "\u000a" + elseif c == '\u000a' || c == '\u0085' forwardchars!(stream) - return ch + "\u000a" + elseif c == '\u2028' || c == '\u2029' + forwardchars!(stream) + string(c) + else + "" + end +end +# +# YAML 1.2 +# +# [24] b-line-feed ::= x0A +# [25] b-carriage-return ::= x0D +# [26] b-char ::= b-line-feed | b-carriage-return +# [27] nb-char ::= c-printable - b-char - c-byte-order-mark +# [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed +# +# U+000D U+000A → U+000A +# U+000D → U+000A +# U+000A → U+000A +# otherwise → (empty) +# +function yaml_1_2_scan_line_break(stream::TokenStream)::String + c = peek(stream.input) + if c == '\u000d' + if peek(stream.input, 1) == '\u000a' + forwardchars!(stream, 2) + else + forwardchars!(stream) + end + "\u000a" + elseif c == '\u000a' + forwardchars!(stream) + "\u000a" + else + "" end - return "" end - # Scan past whitespace to the next token. function scan_to_next_token(stream::TokenStream) @@ -798,7 +850,7 @@ function scan_to_next_token(stream::TokenStream) end end - if scan_line_break(stream) != "" + if yaml_1_1_scan_line_break(stream) != "" if stream.flow_level == 0 stream.allow_simple_key = true end @@ -948,7 +1000,7 @@ function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) "expected a comment or a line break, but found '$(peek(stream.input))'", get_mark(stream))) end - scan_line_break(stream) + yaml_1_1_scan_line_break(stream) end @@ -1067,7 +1119,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) end push!(chunks, prefix(stream.input, length)) forwardchars!(stream, length) - line_break = scan_line_break(stream) + line_break = yaml_1_1_scan_line_break(stream) breaks, end_mark = scan_block_scalar_breaks(stream, indent) if stream.column == indent && peek(stream.input) != '\0' if folded && line_break == "\n" && @@ -1113,7 +1165,7 @@ function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) get_mark(stream))) end - scan_line_break(stream) + yaml_1_1_scan_line_break(stream) end @@ -1166,7 +1218,7 @@ function scan_block_scalar_indentation(stream::TokenStream) end_mark = get_mark(stream) while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) != ' ' - push!(chunks, scan_line_break(stream)) + push!(chunks, yaml_1_1_scan_line_break(stream)) end_mark = get_mark(stream) else forwardchars!(stream) @@ -1188,7 +1240,7 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) end while in(peek(stream.input), "\r\n\u0085\u2028\u2029") - push!(chunks, scan_line_break(stream)) + push!(chunks, yaml_1_1_scan_line_break(stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' forwardchars!(stream) @@ -1288,7 +1340,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) forwardchars!(stream, length) elseif in(c, "\r\n\u0085\u2028\u2029") - scan_line_break(stream) + yaml_1_1_scan_line_break(stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) else throw(ScannerError("while scanning a double-quoted scalar", @@ -1318,7 +1370,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", get_mark(stream))) elseif in(c, "\r\n\u0085\u2028\u2029") - line_break = scan_line_break(stream) + line_break = yaml_1_1_scan_line_break(stream) breaks = scan_flow_scalar_breaks(stream, double, start_mark) if line_break != '\n' push!(chunks, line_break) @@ -1351,7 +1403,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, end if in(peek(stream.input), "\r\n\u0085\u2028\u2029") - push!(chunks, scan_line_break(stream)) + push!(chunks, yaml_1_1_scan_line_break(stream)) else return chunks end @@ -1435,7 +1487,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, forwardchars!(stream, length) c = peek(stream.input) if in(c, "\r\n\u0085\u2028\u2029") - line_break = scan_line_break(stream) + line_break = yaml_1_1_scan_line_break(stream) stream.allow_simple_key = true if peek(stream.input) == '\uFEFF' return Any[] @@ -1451,7 +1503,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, if peek(stream.input) == ' ' forwardchars!(stream) else - push!(breaks, scan_line_break(stream)) + push!(breaks, yaml_1_1_scan_line_break(stream)) if peek(stream.input) == '\uFEFF' return Any[] end From e9c582734ca7900c4e7d6156fa7012bbe12017f8 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 09:34:16 +0900 Subject: [PATCH 08/27] Rewrite `forwardchars!(::TokenStream, ::Integer)`. * Bug fix. * Change to use `yaml_1_1_` prefix for YAML 1.1's `forwardchars!`. * Add `yaml_1_2_forwardchars!` for YAML 1.2's `forwardchars!`. * Add some helper functions for `forwardchars!`: * `forwardchar_skip!` * `forwardchar_nobreak!` * `forwardchar_breakline!` --- src/scanner.jl | 207 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 140 insertions(+), 67 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 084a57e..640ef08 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -137,25 +137,98 @@ function get_mark(stream::TokenStream) Mark(stream.index, stream.line, stream.column) end +# ------------- +# forwardchars! +# ------------- -# Advance the stream by k characters. -function forwardchars!(stream::TokenStream, k::Integer=1) - for _ in 1:k +# Advance the stream by a chacater and the index. +function forwardchar_skip!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + nothing +end + +# Advance the stream by a chacater and the index and a column. +function forwardchar_nobreak!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column += 1 + nothing +end + +# Advance the stream by a chacater and the index and break a line. +function forwardchar_breakline!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column = 0 + stream.line += 1 + nothing +end + +# YAML 1.1 [22] b-line-feed ::= #xA /*LF*/ +# YAML 1.2 [24] b-line-feed ::= x0A +const b_line_feed = '\n' + +# YAML 1.1 [23] b-carriage-return ::= #xD /*CR*/ +# YAML 1.2 [25] b-carriage-return ::= x0D +const b_carriage_return = '\r' + +# YAML 1.1 [24] b-next-line ::= #x85 /*NEL*/ +const yaml_1_1_b_next_line = '\u85' + +# YAML 1.1 [25] b-line-separator ::= #x2028 /*LS*/ +const yaml_1_1_b_line_separator = '\u2028' + +# YAML 1.1 [26] b-paragraph-separator ::= #x2029 /*PS*/ +const yaml_1_1_b_paragraph_separator = '\u2029' + +# Advance the stream by `n` characters. +# YAML 1.1 [28] b-specific ::= b-line-separator | b-paragraph-separator +yaml_1_1_is_b_specific(c::Char) = c == yaml_1_1_b_line_separator || c == yaml_1_1_b_paragraph_separator +# YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line +# YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific +function yaml_1_1_forwardchars!(stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n c = peek(stream.input) - forward!(stream.input) - stream.index += 1 - if in(c, "\n\u0085\u2028\u2029") || - (c == '\r' && peek(stream.input) == '\n') - stream.column = 0 - stream.line += 1 + if c == b_carriage_return + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + forwardchar_skip!(stream) + i += 1 + end + elseif c == b_line_feed || c == yaml_1_1_b_next_line || yaml_1_1_is_b_specific(c) + forwardchar_breakline!(stream) + i += 1 else - stream.column += 1 + forwardchar_nobreak!(stream) + i += 1 end end - stream.index += k - nothing end +# Advance the stream by `n` characters. +# YAML 1.2 [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed +function yaml_1_2_forwardchars!(stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n + c = peek(stream.input) + if c == b_carriage_return + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + i += 1 + end + elseif c == b_line_feed + forwardchar_breakline!(stream) + i += 1 + else + forwardchar_nobreak!(stream) + i += 1 + end + end +end function need_more_tokens(stream::TokenStream) if stream.done @@ -450,7 +523,7 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke # Add DOCUMENT-START or DOCUMENT-END. start_mark = get_mark(stream) - forwardchars!(stream, 3) + yaml_1_1_forwardchars!(stream, 3) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -491,7 +564,7 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -519,7 +592,7 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -534,7 +607,7 @@ function fetch_flow_entry(stream::TokenStream) # Add FLOW-ENTRY. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end @@ -570,7 +643,7 @@ function fetch_block_entry(stream::TokenStream) # Add BLOCK-ENTRY. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) @@ -602,7 +675,7 @@ function fetch_key(stream::TokenStream) # Add KEY. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end @@ -661,7 +734,7 @@ function fetch_value(stream::TokenStream) # Add VALUE. start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end @@ -771,14 +844,14 @@ end function scan_line_break(stream::TokenStream) if in(peek(stream.input), "\r\n\u0085") if prefix(stream.input, 2) == "\r\n" - forwardchars!(stream, 2) + yaml_1_1_forwardchars!(stream, 2) else - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end return "\n" elseif in(peek(stream.input), "\u2028\u2029") ch = peek(stream.input) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) return ch end return "" @@ -790,13 +863,13 @@ function scan_to_next_token(stream::TokenStream) while true # whitespace while peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end # comment if peek(stream.input) == '#' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end # line break @@ -814,7 +887,7 @@ end function scan_directive(stream::TokenStream) start_mark = get_mark(stream) - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) name = scan_directive_name(stream, start_mark) value = nothing @@ -831,7 +904,7 @@ function scan_directive(stream::TokenStream) end_mark = get_mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end @@ -855,7 +928,7 @@ function scan_directive_name(stream::TokenStream, start_mark::Mark) end value = prefix(stream.input, length) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) c = peek(stream.input) if !in(c, ":\0 \r\n\u0085\u2028\u2029") @@ -870,7 +943,7 @@ end function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' || peek(stream.input) == ':' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end major = scan_yaml_directive_number(stream, start_mark) @@ -879,7 +952,7 @@ function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) "expected '.' but found '$(peek(stream.input))'", get_mark(stream))) end - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) minor = scan_yaml_directive_number(stream, start_mark) if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, @@ -922,7 +995,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # --------------------------------------------------- # advance the stream by the length that has been read # --------------------------------------------------- - forwardchars!(stream, pos) + yaml_1_1_forwardchars!(stream, pos) # ----------------- # return the number # ----------------- @@ -932,7 +1005,7 @@ end function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end value = scan_tag_handle(stream, "directive", start_mark) @@ -947,7 +1020,7 @@ end function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end value = scan_tag_uri(stream, "directive", start_mark) @@ -962,12 +1035,12 @@ end function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end if peek(stream.input) == '#' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") @@ -987,7 +1060,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} else name = "anchor" end - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) length = 0 c = peek(stream.input) while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' @@ -1001,7 +1074,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} get_mark(stream))) end value = prefix(stream.input, length) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$(peek(stream.input))'", @@ -1017,18 +1090,18 @@ function scan_tag(stream::TokenStream) c = peek(stream.input, 1) if c == '<' handle = nothing - forwardchars!(stream, 2) + yaml_1_1_forwardchars!(stream, 2) suffix = scan_tag_uri(stream, "tag", start_mark) if peek(stream.input) != '>' throw(ScannerError("while parsing a tag", start_mark, "expected '>', but found '$(peek(stream.input))'", get_mark(stream))) end - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") handle = nothing suffix = '!' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) else length = 1 use_handle = false @@ -1044,7 +1117,7 @@ function scan_tag(stream::TokenStream) handle = scan_tag_handle(stream, "tag", start_mark) else handle = "!" - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end suffix = scan_tag_uri(stream, "tag", start_mark) end @@ -1069,7 +1142,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) start_mark = get_mark(stream) # Scan the header. - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) chomping, increment = scan_block_scalar_indicators(stream, start_mark) scan_block_scalar_ignored_line(stream, start_mark) @@ -1093,7 +1166,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) length += 1 end push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) line_break = scan_line_break(stream) breaks, end_mark = scan_block_scalar_breaks(stream, indent) if stream.column == indent && peek(stream.input) != '\0' @@ -1125,12 +1198,12 @@ end function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end if peek(stream.input) == '#' while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end @@ -1150,7 +1223,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) c = peek(stream.input) if in(c, "0123456789") increment = parse(Int, string(c)) @@ -1167,12 +1240,12 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) "expected indentation indicator in the range 1-9, but found 0", get_mark(stream))) end - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) c = peek(stream.input) if c == '+' || c == '-' comping = c == '+' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end @@ -1196,7 +1269,7 @@ function scan_block_scalar_indentation(stream::TokenStream) push!(chunks, scan_line_break(stream)) end_mark = get_mark(stream) else - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) if stream.column > max_indent max_indent = stream.column end @@ -1211,14 +1284,14 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) chunks = Any[] end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end while in(peek(stream.input), "\r\n\u0085\u2028\u2029") push!(chunks, scan_line_break(stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end end @@ -1231,14 +1304,14 @@ function scan_flow_scalar(stream::TokenStream, style::Char) chunks = Any[] start_mark = get_mark(stream) q = peek(stream.input) # quote - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) while peek(stream.input) != q || peek(stream.input, 1) == q append!(chunks, scan_flow_scalar_spaces(stream, double, start_mark)) append!(chunks, scan_flow_scalar_non_spaces(stream, double, start_mark)) end - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) end_mark = get_mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end @@ -1282,13 +1355,13 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) end c = peek(stream.input) if !double && c == '\'' && peek(stream.input, 1) == '\'' push!(chunks, '\'') - forwardchars!(stream, 2) + yaml_1_1_forwardchars!(stream, 2) elseif (double && c == '\'') || (!double && in(c, "\"\\")) push!(chunks, c) forward!(stream.input) @@ -1313,7 +1386,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) elseif in(c, "\r\n\u0085\u2028\u2029") scan_line_break(stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) @@ -1338,7 +1411,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, length += 1 end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) c = peek(stream.input) if c == '\0' @@ -1425,7 +1498,7 @@ function scan_plain(stream::TokenStream) c = peek(stream.input) if stream.flow_level != 0 && c == ':' && !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", get_mark(stream))) end @@ -1437,7 +1510,7 @@ function scan_plain(stream::TokenStream) stream.allow_simple_key = true append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) end_mark = get_mark(stream) spaces = scan_plain_spaces(stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || @@ -1459,7 +1532,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) c = peek(stream.input) if in(c, "\r\n\u0085\u2028\u2029") line_break = scan_line_break(stream) @@ -1476,7 +1549,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, breaks = Any[] while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) == ' ' - forwardchars!(stream) + yaml_1_1_forwardchars!(stream) else push!(breaks, scan_line_break(stream)) if peek(stream.input) == '\uFEFF' @@ -1518,7 +1591,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end if c != '!' - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) throw(ScannerError("while scanning a $(name)", start_mark, "expected '!', but found '$(c)'", get_mark(stream))) @@ -1527,7 +1600,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end value = prefix(stream.input, length) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) value end @@ -1539,7 +1612,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) while is_ns_ascii_letter(c) || isdigit(c) || in(c, "-;/?:@&=+\$,_.!~*\'()[]%") if c == '%' push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) length = 0 push!(chunks, scan_uri_escapes(stream, name, start_mark)) else @@ -1550,7 +1623,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + yaml_1_1_forwardchars!(stream, length) length = 0 end @@ -1579,7 +1652,7 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) end end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) - forwardchars!(stream, 2) + yaml_1_1_forwardchars!(stream, 2) end string(bytes...) From a9d3d1a611fe635d9d0fdaf08dae07595791283b Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 10:47:23 +0900 Subject: [PATCH 09/27] Change to use performant and explicit function for whitespace checking. --- src/scanner.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 084a57e..c73222f 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -352,7 +352,7 @@ end # Checkers # -------- -const whitespace = "\0 \t\r\n\u0085\u2028\u2029" +yaml_1_1_is_whitespace(c::Char) = c == '\0' || c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\u85' || c == '\u2028' || c == '\u2029' function check_directive(stream::TokenStream) @@ -362,31 +362,31 @@ end function check_document_start(stream::TokenStream) stream.column == 0 && prefix(stream.input, 3) == "---" && - in(peek(stream.input, 3), whitespace) + yaml_1_1_is_whitespace(peek(stream.input, 3)) end function check_document_end(stream::TokenStream) stream.column == 0 && prefix(stream.input, 3) == "..." && - (in(peek(stream.input, 3), whitespace) || peek(stream.input, 3) === nothing) + (yaml_1_1_is_whitespace(peek(stream.input, 3)) || peek(stream.input, 3) === nothing) end function check_block_entry(stream::TokenStream) - in(peek(stream.input, 1), whitespace) + yaml_1_1_is_whitespace(peek(stream.input, 1)) end function check_key(stream::TokenStream) - stream.flow_level > 0 || in(peek(stream.input, 1), whitespace) + stream.flow_level > 0 || yaml_1_1_is_whitespace(peek(stream.input, 1)) end function check_value(stream::TokenStream) cnext = peek(stream.input, 1) - stream.flow_level > 0 || in(cnext, whitespace) || cnext === nothing + stream.flow_level > 0 || yaml_1_1_is_whitespace(cnext) || cnext === nothing end function check_plain(stream::TokenStream) !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029-?:,[]{}#&*!|>\'\"%@`\uFEFF") || - (!in(peek(stream.input, 1), whitespace) && + (!yaml_1_1_is_whitespace(peek(stream.input, 1)) && (peek(stream.input) == '-' || (stream.flow_level == 0 && in(peek(stream.input), "?:")))) end @@ -1411,10 +1411,10 @@ function scan_plain(stream::TokenStream) while true c = peek(stream.input, length) cnext = peek(stream.input, length + 1) - if in(c, whitespace) || + if yaml_1_1_is_whitespace(c) || c === nothing || (stream.flow_level == 0 && c == ':' && - (cnext === nothing || in(cnext, whitespace))) || + (cnext === nothing || yaml_1_1_is_whitespace(cnext))) || (stream.flow_level != 0 && in(c, ",:?[]{}")) break end From a56bdce45793a599e77b98eae2de52dc7e01cdb4 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 11:38:34 +0900 Subject: [PATCH 10/27] Use `yaml_1_1_is_whitespace` where not used. --- src/scanner.jl | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index c73222f..41134a2 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1002,9 +1002,10 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} end value = prefix(stream.input, length) forwardchars!(stream, length) - if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") + c = peek(stream.input) + if !(yaml_1_1_is_whitespace(c) || in(c, "?:,]}%@`")) throw(ScannerError("while scanning an $(name)", start_mark, - "expected an alphanumeric character, but found '$(peek(stream.input))'", + "expected an alphanumeric character, but found '$c'", get_mark(stream))) end end_mark = get_mark(stream) @@ -1025,7 +1026,7 @@ function scan_tag(stream::TokenStream) get_mark(stream))) end forwardchars!(stream) - elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") + elseif yaml_1_1_is_whitespace(c) handle = nothing suffix = '!' forwardchars!(stream) @@ -1277,8 +1278,10 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, chunks = Any[] while true length = 0 - while !in(peek(stream.input, length), "\'\"\\\0 \t\r\n\u0085\u2028\u2029") + c = peek(stream.input, length) + while !(in(c, "\'\"\\") || yaml_1_1_is_whitespace(c)) length += 1 + c = peek(stream.input, length) end if length > 0 push!(chunks, prefix(stream.input, length)) @@ -1366,8 +1369,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, chunks = Any[] while true pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") + if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document seperator", get_mark(stream))) @@ -1423,8 +1425,10 @@ function scan_plain(stream::TokenStream) # It's not clear what we should do with ':' in the flow context. c = peek(stream.input) - if stream.flow_level != 0 && c == ':' && - !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") + if stream.flow_level != 0 && c == ':' && begin + cnext = peek(stream.input, length + 1) + !(yaml_1_1_is_whitespace(cnext) || in(cnext, ",[]{}")) + end forwardchars!(stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", get_mark(stream))) @@ -1468,8 +1472,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") + if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) return Any[] end @@ -1483,8 +1486,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") + if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) return Any[] end end From 76a2aa57298ca835944d881a2ed8317a40c702d1 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 15:21:07 +0900 Subject: [PATCH 11/27] Change `forward!(::BufferedInput, ::Integer)` to explicitly return `nothing` --- src/buffered_input.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/buffered_input.jl b/src/buffered_input.jl index dfa7b5a..39ca0d7 100644 --- a/src/buffered_input.jl +++ b/src/buffered_input.jl @@ -70,6 +70,7 @@ function forward!(bi::BufferedInput, n::Integer=1) n -= 1 end end + nothing end # Ugly hack to allow peeking of `StringDecoder`s From aa04daecaa261e51ca78b993ee6e9363cd51183f Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 17:12:53 +0900 Subject: [PATCH 12/27] Change `_fill` and `__fill` to better implementation and rename to `buffer!`. --- src/buffered_input.jl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/buffered_input.jl b/src/buffered_input.jl index dfa7b5a..af1ec0d 100644 --- a/src/buffered_input.jl +++ b/src/buffered_input.jl @@ -15,29 +15,26 @@ mutable struct BufferedInput end end - -# Read and buffer n more characters -function __fill(bi::BufferedInput, bi_input::IO, n::Integer) - for _ in 1:n - c = eof(bi_input) ? '\0' : read(bi_input, Char) - i = bi.offset + bi.avail + 1 +# Read and buffer `n` more characters +function buffer!(bi::BufferedInput, n::Integer)::Nothing + for i in bi.offset + bi.avail .+ (1:n) + c = eof(bi.input) ? '\0' : read(bi.input, Char) if i ≤ length(bi.buffer) bi.buffer[i] = c else push!(bi.buffer, c) end - bi.avail += 1 end + bi.avail += n + nothing end -_fill(bi::BufferedInput, n::Integer) = __fill(bi, bi.input, n) - # Peek the character in the i-th position relative to the current position. # (0-based) function peek(bi::BufferedInput, i::Integer=0) i1 = i + 1 if bi.avail < i1 - _fill(bi, i1 - bi.avail) + buffer!(bi, i1 - bi.avail) end bi.buffer[bi.offset + i1] end @@ -48,7 +45,7 @@ end function prefix(bi::BufferedInput, n::Integer=1) n1 = n + 1 if bi.avail < n1 - _fill(bi, n1 - bi.avail) + buffer!(bi, n1 - bi.avail) end String(bi.buffer[bi.offset .+ (1:n)]) end From c4ef02d181ffa3cd68d1662b7e8ea6bd60cc7531 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Mon, 17 Jun 2024 17:24:43 +0900 Subject: [PATCH 13/27] Bug fix of `prefix(::BufferedInput, ::Integer)`. Change to not overbuffer. This bug fix brake the test `windows_newlines` but I think the test is incorrect. --- src/buffered_input.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/buffered_input.jl b/src/buffered_input.jl index af1ec0d..d48c2cb 100644 --- a/src/buffered_input.jl +++ b/src/buffered_input.jl @@ -42,11 +42,8 @@ end # Return the string formed from the first n characters from the current position # of the stream. -function prefix(bi::BufferedInput, n::Integer=1) - n1 = n + 1 - if bi.avail < n1 - buffer!(bi, n1 - bi.avail) - end +function prefix(bi::BufferedInput, n::Integer=1)::String + bi.avail < n && buffer!(bi, n - bi.avail) String(bi.buffer[bi.offset .+ (1:n)]) end From 5d6632c71e35d1ae75f81aaeae1667b74032624a Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Tue, 18 Jun 2024 13:31:45 +0900 Subject: [PATCH 14/27] Make it explicit that scalar addition then vector addition. --- src/buffered_input.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/buffered_input.jl b/src/buffered_input.jl index af1ec0d..41d6ca4 100644 --- a/src/buffered_input.jl +++ b/src/buffered_input.jl @@ -17,7 +17,7 @@ end # Read and buffer `n` more characters function buffer!(bi::BufferedInput, n::Integer)::Nothing - for i in bi.offset + bi.avail .+ (1:n) + for i in (bi.offset + bi.avail) .+ (1:n) c = eof(bi.input) ? '\0' : read(bi.input, Char) if i ≤ length(bi.buffer) bi.buffer[i] = c From 6d7dd03dc83151118cb682d8eadf7b751b1adb27 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Tue, 18 Jun 2024 18:49:03 +0900 Subject: [PATCH 15/27] Remove `equivalent` and change to `isequal` and remove chomping. --- test/runtests.jl | 83 ++++++++---------------------------------------- 1 file changed, 14 insertions(+), 69 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 335c70e..1d9babe 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -61,61 +61,6 @@ const test_write_ignored = [ "multi-constructor" ] - -function equivalent(xs::AbstractDict, ys::AbstractDict) - if Set(collect(keys(xs))) != Set(collect(keys(ys))) - @info "Not equivalent" Set(collect(keys(xs))) Set(collect(keys(ys))) - return false - end - - for k in keys(xs) - if !equivalent(xs[k], ys[k]) - @info "Not equivalent" xs[k] ys[k] - return false - end - end - - true -end - - -function equivalent(xs::AbstractArray, ys::AbstractArray) - if length(xs) != length(ys) - @info "Not equivalent" length(xs) length(ys) - return false - end - - for (x, y) in zip(xs, ys) - if !equivalent(x, y) - @info "Not equivalent" x y - return false - end - end - - true -end - - -function equivalent(x::Float64, y::Float64) - isnan(x) && isnan(y) ? true : x == y -end - - -function equivalent(x::AbstractString, y::AbstractString) - while endswith(x, "\n") - x = x[1:end-1] # trailing newline characters are ambiguous - end - while endswith(y, "\n") - y = y[1:end-1] - end - x == y -end - -function equivalent(x, y) - x == y -end - - # test custom tags function construct_type_map(t::Symbol, constructor::YAML.Constructor, node::YAML.Node) @@ -166,7 +111,7 @@ end const testdir = dirname(@__FILE__) @testset for test in tests yamlString = open(joinpath(testdir, string(test, ".data"))) do f - chomp(read(f, String)) + read(f, String) end expected = evalfile(joinpath(testdir, string(test, ".expected"))) @@ -176,14 +121,14 @@ const testdir = dirname(@__FILE__) joinpath(testdir, string(test, ".data")), TestConstructor() ) - equivalent(data, expected) + isequal(data, expected) end @test begin dictData = YAML.load_file( joinpath(testdir, string(test, ".data")), more_constructors, multi_constructors ) - equivalent(dictData, expected) + isequal(dictData, expected) end end @@ -193,7 +138,7 @@ const testdir = dirname(@__FILE__) yamlString, TestConstructor() ) - equivalent(data, expected) + isequal(data, expected) end @test begin @@ -201,7 +146,7 @@ const testdir = dirname(@__FILE__) yamlString, more_constructors, multi_constructors ) - equivalent(dictData, expected) + isequal(dictData, expected) end end @@ -211,7 +156,7 @@ const testdir = dirname(@__FILE__) joinpath(testdir, string(test, ".data")), TestConstructor() ) - equivalent(first(data), expected) + isequal(first(data), expected) end @test begin @@ -219,7 +164,7 @@ const testdir = dirname(@__FILE__) joinpath(testdir, string(test, ".data")), more_constructors, multi_constructors ) - equivalent(first(dictData), expected) + isequal(first(dictData), expected) end end @@ -229,7 +174,7 @@ const testdir = dirname(@__FILE__) yamlString, TestConstructor() ) - equivalent(first(data), expected) + isequal(first(data), expected) end @test begin @@ -237,7 +182,7 @@ const testdir = dirname(@__FILE__) yamlString, more_constructors, multi_constructors ) - equivalent(first(dictData), expected) + isequal(first(dictData), expected) end end @@ -249,7 +194,7 @@ const testdir = dirname(@__FILE__) joinpath(testdir, string(test, ".data")), more_constructors ) - equivalent(write_and_load(data), expected) + isequal(write_and_load(data), expected) end end else @@ -282,11 +227,11 @@ test: 2 test: 3 """) (val, state) = iterate(iterable) - @test equivalent(val, Dict("test" => 1)) + @test isequal(val, Dict("test" => 1)) (val, state) = iterate(iterable, state) - @test equivalent(val, Dict("test" => 2)) + @test isequal(val, Dict("test" => 2)) (val, state) = iterate(iterable, state) - @test equivalent(val, Dict("test" => 3)) + @test isequal(val, Dict("test" => 3)) @test iterate(iterable, state) === nothing end @@ -370,7 +315,7 @@ end expected = Dict{Any,Any}("Test" => Dict{Any,Any}("test2"=>["test1", "test2"],"test1"=>"data")) - @test equivalent(YAML.load(yamlString, MySafeConstructor()), expected) + @test isequal(YAML.load(yamlString, MySafeConstructor()), expected) @test_throws YAML.ConstructorError YAML.load( yamlString, MyReallySafeConstructor() From acfb2b3fbee8747d7dd4a9719a62af85a6422a08 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Thu, 20 Jun 2024 09:58:24 +0900 Subject: [PATCH 16/27] Remove methods for `Span`. --- src/span.jl | 14 -------------- src/tokens.jl | 4 ++-- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/span.jl b/src/span.jl index 7b566b5..0be0017 100644 --- a/src/span.jl +++ b/src/span.jl @@ -1,19 +1,5 @@ -import Base: - iterate, eltype, length, getindex, - firstindex, lastindex, first, last - # Where in the stream a particular token lies. struct Span start_mark::Mark end_mark::Mark end - -iterate(span::Span, i::Real=1) = i > 2 ? nothing : (getfield(span, i), i + 1) -eltype(span::Span) = Mark -length(span::Span) = 2 -getindex(span::Span, i::Int) = getfield(span, i) -getindex(span::Span, i::Real) = getfield(span, convert(Int, i)) -firstindex(span::Span) = 1 -lastindex(span::Span) = 2 -first(span::Span) = span.start_mark -last(span::Span) = span.end_mark diff --git a/src/tokens.jl b/src/tokens.jl index c415ad9..3f0acc0 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -3,8 +3,8 @@ abstract type Token # span::Span end -firstmark(token::Token) = first(token.span) -lastmark(token::Token) = last(token.span) +firstmark(token::Token) = token.span.start_mark +lastmark(token::Token) = token.span.end_mark # The '%YAML' directive. struct DirectiveToken <: Token From 73a52ff68748966f4e4dc318979b72dfe6415299 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Thu, 20 Jun 2024 12:35:04 +0900 Subject: [PATCH 17/27] Add objects to represent YAML versions. Here we use abstract type & subtyping because it's common traits pattern in Julia. We do not need to export these objects because we can use strings for versions in user-facing functions like: ```julia function load(str::AbstractString; version::YAMLVersion) # ... end function load(str::AbstractString; version::AbstractString) version == "1.1" ? load(str, version=YAMLV1_1()) : version == "1.2" ? load(str, version=YAMLV1_2()) : throw(ErrorException()) end load(str, version="1.1") ``` --- src/YAML.jl | 1 + src/versions.jl | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 src/versions.jl diff --git a/src/YAML.jl b/src/YAML.jl index 477239f..31a0274 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -27,6 +27,7 @@ using Dates using Printf using StringEncodings +include("versions.jl") include("queue.jl") include("buffered_input.jl") include("tokens.jl") diff --git a/src/versions.jl b/src/versions.jl new file mode 100644 index 0000000..30b51c8 --- /dev/null +++ b/src/versions.jl @@ -0,0 +1,25 @@ +""" + YAMLVersion + +A type used for controlling the YAML version. + +Planned to be supported versions are: + +- [`YAMLV1_1`](@ref): YAML version 1.1 +- [`YAMLV1_2`](@ref): YAML version 1.2 +""" +abstract type YAMLVersion end + +""" + YAMLV1_1 + +A singleton type for YAML version 1.1. +""" +struct YAMLV1_1 <: YAMLVersion end + +""" + YAMLV1_2 + +A singleton type for YAML version 1.2. +""" +struct YAMLV1_2 <: YAMLVersion end From 3dbeadfe0e759414f460d0cf6b9da7f16dd55fb6 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 15:22:08 +0900 Subject: [PATCH 18/27] Use YAML version traits for `b-char`. --- src/scanner.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index d1d327f..eb784c6 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1,5 +1,5 @@ # YAML 1.1 [27] b-char ::= b-line-feed | b-carriage-return | b-next-line | b-line-separator | b-paragraph-separator -yaml_1_1_is_b_char(c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == '\u2028' || c == '\u2029' +is_b_char(::YAMLV1_1, c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == '\u2028' || c == '\u2029' # YAML 1.1 [41] ns-ascii-letter ::= [#x41-#x5A] /*A-Z*/ | [#61-#x7A] /*a-z*/ # YAML 1.2 [37] ns-ascii-letter ::= [x41-x5A] | [x61-x7A] # A-Z a-z @@ -1217,7 +1217,7 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) forwardchars!(stream) end - while yaml_1_1_is_b_char(peek(stream.input)) + while is_b_char(YAMLV1_1(), peek(stream.input)) push!(chunks, scan_line_break(stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' @@ -1317,7 +1317,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) forwardchars!(stream, length) - elseif yaml_1_1_is_b_char(c) + elseif is_b_char(YAMLV1_1(), c) scan_line_break(stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) else @@ -1347,7 +1347,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, if c == '\0' throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", get_mark(stream))) - elseif yaml_1_1_is_b_char(c) + elseif is_b_char(YAMLV1_1(), c) line_break = scan_line_break(stream) breaks = scan_flow_scalar_breaks(stream, double, start_mark) if line_break != '\n' @@ -1380,7 +1380,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, forward!(stream.input) end - if yaml_1_1_is_b_char(peek(stream.input)) + if is_b_char(YAMLV1_1(), peek(stream.input)) push!(chunks, scan_line_break(stream)) else return chunks @@ -1464,7 +1464,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, whitespaces = prefix(stream.input, length) forwardchars!(stream, length) c = peek(stream.input) - if yaml_1_1_is_b_char(c) + if is_b_char(YAMLV1_1(), c) line_break = scan_line_break(stream) stream.allow_simple_key = true if peek(stream.input) == '\uFEFF' From 6738103d9ab2ac4b6673acd6cb22945207ee491a Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 15:30:44 +0900 Subject: [PATCH 19/27] Use better implementation because the document iterator bug has been fixed. --- src/scanner.jl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 840dac2..49aa0ae 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -785,15 +785,7 @@ end function yaml_1_1_scan_line_break(stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' - # TODO: - # This seems better for performance but gives errors and I don't know why. - # Perhaps, `prefx(stream.input, 2)` modifies `stream` and eventually escapes from an error. - # if peek(stream.input, 1) == '\u000a' - # forwardchars!(stream, 2) - # else - # forwardchars!(stream) - # end - if prefix(stream.input, 2) == "\u000d\u000a" + if peek(stream.input, 1) == '\u000a' forwardchars!(stream, 2) else forwardchars!(stream) From 854203915f260e6c7c7a5d7e62a17c9262de907f Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 15:33:03 +0900 Subject: [PATCH 20/27] Use YAML version traits for `scan_line_break`. --- src/scanner.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 49aa0ae..e3ef689 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -782,7 +782,7 @@ end # U+2029 → U+2029 # otherwise → (empty) # -function yaml_1_1_scan_line_break(stream::TokenStream)::String +function scan_line_break(::YAMLV1_1, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' @@ -815,7 +815,7 @@ end # U+000A → U+000A # otherwise → (empty) # -function yaml_1_2_scan_line_break(stream::TokenStream)::String +function scan_line_break(::YAMLV1_2, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' @@ -847,7 +847,7 @@ function scan_to_next_token(stream::TokenStream) end end # line break - if yaml_1_1_scan_line_break(stream) != "" + if scan_line_break(YAMLV1_1(), stream) != "" if stream.flow_level == 0 stream.allow_simple_key = true end @@ -1022,7 +1022,7 @@ function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) "expected a comment or a line break, but found '$(peek(stream.input))'", get_mark(stream))) end - yaml_1_1_scan_line_break(stream) + scan_line_break(YAMLV1_1(), stream) end @@ -1141,7 +1141,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) end push!(chunks, prefix(stream.input, length)) forwardchars!(stream, length) - line_break = yaml_1_1_scan_line_break(stream) + line_break = scan_line_break(YAMLV1_1(), stream) breaks, end_mark = scan_block_scalar_breaks(stream, indent) if stream.column == indent && peek(stream.input) != '\0' if folded && line_break == "\n" && @@ -1187,7 +1187,7 @@ function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) get_mark(stream))) end - yaml_1_1_scan_line_break(stream) + scan_line_break(YAMLV1_1(), stream) end @@ -1240,7 +1240,7 @@ function scan_block_scalar_indentation(stream::TokenStream) end_mark = get_mark(stream) while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) != ' ' - push!(chunks, yaml_1_1_scan_line_break(stream)) + push!(chunks, scan_line_break(YAMLV1_1(), stream)) end_mark = get_mark(stream) else forwardchars!(stream) @@ -1262,7 +1262,7 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) end while is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, yaml_1_1_scan_line_break(stream)) + push!(chunks, scan_line_break(YAMLV1_1(), stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' forwardchars!(stream) @@ -1362,7 +1362,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) forwardchars!(stream, length) elseif is_b_char(YAMLV1_1(), c) - yaml_1_1_scan_line_break(stream) + scan_line_break(YAMLV1_1(), stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) else throw(ScannerError("while scanning a double-quoted scalar", @@ -1392,7 +1392,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected end of stream", get_mark(stream))) elseif is_b_char(YAMLV1_1(), c) - line_break = yaml_1_1_scan_line_break(stream) + line_break = scan_line_break(YAMLV1_1(), stream) breaks = scan_flow_scalar_breaks(stream, double, start_mark) if line_break != '\n' push!(chunks, line_break) @@ -1425,7 +1425,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, end if is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, yaml_1_1_scan_line_break(stream)) + push!(chunks, scan_line_break(YAMLV1_1(), stream)) else return chunks end @@ -1509,7 +1509,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, forwardchars!(stream, length) c = peek(stream.input) if is_b_char(YAMLV1_1(), c) - line_break = yaml_1_1_scan_line_break(stream) + line_break = scan_line_break(YAMLV1_1(), stream) stream.allow_simple_key = true if peek(stream.input) == '\uFEFF' return Any[] @@ -1525,7 +1525,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, if peek(stream.input) == ' ' forwardchars!(stream) else - push!(breaks, yaml_1_1_scan_line_break(stream)) + push!(breaks, scan_line_break(YAMLV1_1(), stream)) if peek(stream.input) == '\uFEFF' return Any[] end From 1f329f2289dd14d8ade7162201a8ffd83c7c5855 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 15:44:35 +0900 Subject: [PATCH 21/27] Use YAML version traits for `is_whitespace` and move its definition to be sorted. --- src/scanner.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 73b5be8..bc12fd2 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -5,6 +5,8 @@ is_b_char(::YAMLV1_1, c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == ' # YAML 1.2 [37] ns-ascii-letter ::= [x41-x5A] | [x61-x7A] # A-Z a-z is_ns_ascii_letter(c::Char) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' +is_whitespace(::YAMLV1_1, c::Char) = c == '\0' || c == ' ' || c == '\t' || is_b_char(YAMLV1_1(), c) + struct SimpleKey token_number::UInt64 required::Bool @@ -355,8 +357,6 @@ end # Checkers # -------- -yaml_1_1_is_whitespace(c::Char) = c == '\0' || c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\u85' || c == '\u2028' || c == '\u2029' - function check_directive(stream::TokenStream) stream.column == 0 @@ -365,31 +365,31 @@ end function check_document_start(stream::TokenStream) stream.column == 0 && prefix(stream.input, 3) == "---" && - yaml_1_1_is_whitespace(peek(stream.input, 3)) + is_whitespace(YAMLV1_1(), peek(stream.input, 3)) end function check_document_end(stream::TokenStream) stream.column == 0 && prefix(stream.input, 3) == "..." && - (yaml_1_1_is_whitespace(peek(stream.input, 3)) || peek(stream.input, 3) === nothing) + (is_whitespace(YAMLV1_1(), peek(stream.input, 3)) || peek(stream.input, 3) === nothing) end function check_block_entry(stream::TokenStream) - yaml_1_1_is_whitespace(peek(stream.input, 1)) + is_whitespace(YAMLV1_1(), peek(stream.input, 1)) end function check_key(stream::TokenStream) - stream.flow_level > 0 || yaml_1_1_is_whitespace(peek(stream.input, 1)) + stream.flow_level > 0 || is_whitespace(YAMLV1_1(), peek(stream.input, 1)) end function check_value(stream::TokenStream) cnext = peek(stream.input, 1) - stream.flow_level > 0 || yaml_1_1_is_whitespace(cnext) || cnext === nothing + stream.flow_level > 0 || is_whitespace(YAMLV1_1(), cnext) || cnext === nothing end function check_plain(stream::TokenStream) !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029-?:,[]{}#&*!|>\'\"%@`\uFEFF") || - (!yaml_1_1_is_whitespace(peek(stream.input, 1)) && + (!is_whitespace(YAMLV1_1(), peek(stream.input, 1)) && (peek(stream.input) == '-' || (stream.flow_level == 0 && in(peek(stream.input), "?:")))) end @@ -1414,10 +1414,10 @@ function scan_plain(stream::TokenStream) while true c = peek(stream.input, length) cnext = peek(stream.input, length + 1) - if yaml_1_1_is_whitespace(c) || + if is_whitespace(YAMLV1_1(), c) || c === nothing || (stream.flow_level == 0 && c == ':' && - (cnext === nothing || yaml_1_1_is_whitespace(cnext))) || + (cnext === nothing || is_whitespace(YAMLV1_1(), cnext))) || (stream.flow_level != 0 && in(c, ",:?[]{}")) break end From 78c04b36c0d26b52dd95d4084855f90b3e2cd98b Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 15:53:14 +0900 Subject: [PATCH 22/27] Change remained `yaml_1_1_is_whitespace` to use YAML version traits. --- src/scanner.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 97d476b..089ae55 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1006,7 +1006,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} value = prefix(stream.input, length) forwardchars!(stream, length) c = peek(stream.input) - if !(yaml_1_1_is_whitespace(c) || in(c, "?:,]}%@`")) + if !(is_whitespace(YAMLV1_1(), c) || in(c, "?:,]}%@`")) throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$c'", get_mark(stream))) @@ -1029,7 +1029,7 @@ function scan_tag(stream::TokenStream) get_mark(stream))) end forwardchars!(stream) - elseif yaml_1_1_is_whitespace(c) + elseif is_whitespace(YAMLV1_1(), c) handle = nothing suffix = '!' forwardchars!(stream) @@ -1282,7 +1282,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, while true length = 0 c = peek(stream.input, length) - while !(in(c, "\'\"\\") || yaml_1_1_is_whitespace(c)) + while !(in(c, "\'\"\\") || is_whitespace(YAMLV1_1(), c)) length += 1 c = peek(stream.input, length) end @@ -1372,7 +1372,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, chunks = Any[] while true pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) + if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document seperator", get_mark(stream))) @@ -1430,7 +1430,7 @@ function scan_plain(stream::TokenStream) c = peek(stream.input) if stream.flow_level != 0 && c == ':' && begin cnext = peek(stream.input, length + 1) - !(yaml_1_1_is_whitespace(cnext) || in(cnext, ",[]{}")) + !(is_whitespace(YAMLV1_1(), cnext) || in(cnext, ",[]{}")) end forwardchars!(stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, @@ -1475,7 +1475,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) + if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) return Any[] end @@ -1489,7 +1489,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && yaml_1_1_is_whitespace(peek(stream.input, 3)) + if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) return Any[] end end From be9d213bd28b883ac69f06a2278a242e709d1a67 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 16:28:11 +0900 Subject: [PATCH 23/27] Use YAML version traits for `forwardchars!`. --- src/scanner.jl | 114 ++++++++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index b665f26..40d9fc2 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -190,7 +190,7 @@ const yaml_1_1_b_paragraph_separator = '\u2029' yaml_1_1_is_b_specific(c::Char) = c == yaml_1_1_b_line_separator || c == yaml_1_1_b_paragraph_separator # YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line # YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific -function yaml_1_1_forwardchars!(stream::TokenStream, n::Integer=1) +function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) i = 1 while i ≤ n c = peek(stream.input) @@ -213,7 +213,7 @@ end # Advance the stream by `n` characters. # YAML 1.2 [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed -function yaml_1_2_forwardchars!(stream::TokenStream, n::Integer=1) +function forwardchars!(::YAMLV1_2, stream::TokenStream, n::Integer=1) i = 1 while i ≤ n c = peek(stream.input) @@ -526,7 +526,7 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke # Add DOCUMENT-START or DOCUMENT-END. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream, 3) + forwardchars!(YAMLV1_1(), stream, 3) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -567,7 +567,7 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -595,7 +595,7 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -610,7 +610,7 @@ function fetch_flow_entry(stream::TokenStream) # Add FLOW-ENTRY. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end @@ -646,7 +646,7 @@ function fetch_block_entry(stream::TokenStream) # Add BLOCK-ENTRY. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) @@ -678,7 +678,7 @@ function fetch_key(stream::TokenStream) # Add KEY. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end @@ -737,7 +737,7 @@ function fetch_value(stream::TokenStream) # Add VALUE. start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end @@ -847,14 +847,14 @@ end function scan_line_break(stream::TokenStream) if in(peek(stream.input), "\r\n\u0085") if prefix(stream.input, 2) == "\r\n" - yaml_1_1_forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) else - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end return "\n" elseif in(peek(stream.input), "\u2028\u2029") ch = peek(stream.input) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) return ch end return "" @@ -866,13 +866,13 @@ function scan_to_next_token(stream::TokenStream) while true # whitespace while peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end # comment if peek(stream.input) == '#' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end # line break @@ -890,7 +890,7 @@ end function scan_directive(stream::TokenStream) start_mark = get_mark(stream) - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) name = scan_directive_name(stream, start_mark) value = nothing @@ -907,7 +907,7 @@ function scan_directive(stream::TokenStream) end_mark = get_mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -931,7 +931,7 @@ function scan_directive_name(stream::TokenStream, start_mark::Mark) end value = prefix(stream.input, length) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if !in(c, ":\0 \r\n\u0085\u2028\u2029") @@ -946,7 +946,7 @@ end function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' || peek(stream.input) == ':' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end major = scan_yaml_directive_number(stream, start_mark) @@ -955,7 +955,7 @@ function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) "expected '.' but found '$(peek(stream.input))'", get_mark(stream))) end - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) minor = scan_yaml_directive_number(stream, start_mark) if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, @@ -998,7 +998,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # --------------------------------------------------- # advance the stream by the length that has been read # --------------------------------------------------- - yaml_1_1_forwardchars!(stream, pos) + forwardchars!(YAMLV1_1(), stream, pos) # ----------------- # return the number # ----------------- @@ -1008,7 +1008,7 @@ end function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end value = scan_tag_handle(stream, "directive", start_mark) @@ -1023,7 +1023,7 @@ end function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end value = scan_tag_uri(stream, "directive", start_mark) @@ -1038,12 +1038,12 @@ end function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end if peek(stream.input) == '#' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") @@ -1063,7 +1063,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} else name = "anchor" end - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) length = 0 c = peek(stream.input) while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' @@ -1077,7 +1077,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} get_mark(stream))) end value = prefix(stream.input, length) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$(peek(stream.input))'", @@ -1093,18 +1093,18 @@ function scan_tag(stream::TokenStream) c = peek(stream.input, 1) if c == '<' handle = nothing - yaml_1_1_forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) suffix = scan_tag_uri(stream, "tag", start_mark) if peek(stream.input) != '>' throw(ScannerError("while parsing a tag", start_mark, "expected '>', but found '$(peek(stream.input))'", get_mark(stream))) end - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") handle = nothing suffix = '!' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) else length = 1 use_handle = false @@ -1120,7 +1120,7 @@ function scan_tag(stream::TokenStream) handle = scan_tag_handle(stream, "tag", start_mark) else handle = "!" - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end suffix = scan_tag_uri(stream, "tag", start_mark) end @@ -1145,7 +1145,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) start_mark = get_mark(stream) # Scan the header. - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) chomping, increment = scan_block_scalar_indicators(stream, start_mark) scan_block_scalar_ignored_line(stream, start_mark) @@ -1169,7 +1169,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) length += 1 end push!(chunks, prefix(stream.input, length)) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) line_break = scan_line_break(stream) breaks, end_mark = scan_block_scalar_breaks(stream, indent) if stream.column == indent && peek(stream.input) != '\0' @@ -1201,12 +1201,12 @@ end function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end if peek(stream.input) == '#' while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -1226,7 +1226,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) c = peek(stream.input) if in(c, "0123456789") increment = parse(Int, string(c)) @@ -1243,12 +1243,12 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) "expected indentation indicator in the range 1-9, but found 0", get_mark(stream))) end - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) c = peek(stream.input) if c == '+' || c == '-' comping = c == '+' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -1272,7 +1272,7 @@ function scan_block_scalar_indentation(stream::TokenStream) push!(chunks, scan_line_break(stream)) end_mark = get_mark(stream) else - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) if stream.column > max_indent max_indent = stream.column end @@ -1287,14 +1287,14 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) chunks = Any[] end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end while is_b_char(YAMLV1_1(), peek(stream.input)) push!(chunks, scan_line_break(stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -1307,14 +1307,14 @@ function scan_flow_scalar(stream::TokenStream, style::Char) chunks = Any[] start_mark = get_mark(stream) q = peek(stream.input) # quote - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while peek(stream.input) != q || peek(stream.input, 1) == q append!(chunks, scan_flow_scalar_spaces(stream, double, start_mark)) append!(chunks, scan_flow_scalar_non_spaces(stream, double, start_mark)) end - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end @@ -1358,13 +1358,13 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end if length > 0 push!(chunks, prefix(stream.input, length)) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) end c = peek(stream.input) if !double && c == '\'' && peek(stream.input, 1) == '\'' push!(chunks, '\'') - yaml_1_1_forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) elseif (double && c == '\'') || (!double && in(c, "\"\\")) push!(chunks, c) forward!(stream.input) @@ -1389,7 +1389,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) elseif is_b_char(YAMLV1_1(), c) scan_line_break(stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) @@ -1414,7 +1414,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, length += 1 end whitespaces = prefix(stream.input, length) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if c == '\0' @@ -1501,7 +1501,7 @@ function scan_plain(stream::TokenStream) c = peek(stream.input) if stream.flow_level != 0 && c == ':' && !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", get_mark(stream))) end @@ -1513,7 +1513,7 @@ function scan_plain(stream::TokenStream) stream.allow_simple_key = true append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) end_mark = get_mark(stream) spaces = scan_plain_spaces(stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || @@ -1535,7 +1535,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end whitespaces = prefix(stream.input, length) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if is_b_char(YAMLV1_1(), c) line_break = scan_line_break(stream) @@ -1552,7 +1552,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, breaks = Any[] while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) == ' ' - yaml_1_1_forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) else push!(breaks, scan_line_break(stream)) if peek(stream.input) == '\uFEFF' @@ -1594,7 +1594,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end if c != '!' - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) throw(ScannerError("while scanning a $(name)", start_mark, "expected '!', but found '$(c)'", get_mark(stream))) @@ -1603,7 +1603,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end value = prefix(stream.input, length) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) value end @@ -1615,7 +1615,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) while is_ns_ascii_letter(c) || isdigit(c) || in(c, "-;/?:@&=+\$,_.!~*\'()[]%") if c == '%' push!(chunks, prefix(stream.input, length)) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) length = 0 push!(chunks, scan_uri_escapes(stream, name, start_mark)) else @@ -1626,7 +1626,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if length > 0 push!(chunks, prefix(stream.input, length)) - yaml_1_1_forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) length = 0 end @@ -1655,7 +1655,7 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) end end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) - yaml_1_1_forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) end string(bytes...) From 427158f95fc48b76b8b5d016bfee59988c4964a9 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 16:48:45 +0900 Subject: [PATCH 24/27] Refactoring and sort out functions. * Sort out functions. * Add `b-char` for YAML 1.2. * Add comments to `forwardchars!`. --- src/scanner.jl | 71 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 40d9fc2..bfdec93 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1,5 +1,41 @@ +# YAML 1.1 [22] b-line-feed ::= #xA /*LF*/ +# YAML 1.2 [24] b-line-feed ::= x0A +const b_line_feed = '\n' + +# YAML 1.1 [23] b-carriage-return ::= #xD /*CR*/ +# YAML 1.2 [25] b-carriage-return ::= x0D +const b_carriage_return = '\r' + +# YAML 1.1 [24] b-next-line ::= #x85 /*NEL*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_next_line = '\u85' + +# YAML 1.1 [25] b-line-separator ::= #x2028 /*LS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_line_separator = '\u2028' + +# YAML 1.1 [26] b-paragraph-separator ::= #x2029 /*PS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_paragraph_separator = '\u2029' + # YAML 1.1 [27] b-char ::= b-line-feed | b-carriage-return | b-next-line | b-line-separator | b-paragraph-separator -is_b_char(::YAMLV1_1, c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == '\u2028' || c == '\u2029' +is_b_char(::YAMLV1_1, c::Char) = + c == b_line_feed || + c == b_carriage_return || + c == yaml_1_1_b_next_line || + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator + +# YAML 1.2 [26] b-char ::= b-line-feed | b-carriage-return # x0A x0D +is_b_char(::YAMLV1_2, c::Char) = + c == b_line_feed || + c == b_carriage_return + +# YAML 1.1 [28] b-specific ::= b-line-separator | b-paragraph-separator +# YAML 1.2 don't have this. +is_b_specific(::YAMLV1_1, c::Char) = + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator # YAML 1.1 [41] ns-ascii-letter ::= [#x41-#x5A] /*A-Z*/ | [#61-#x7A] /*a-z*/ # YAML 1.2 [37] ns-ascii-letter ::= [x41-x5A] | [x61-x7A] # A-Z a-z @@ -168,42 +204,31 @@ function forwardchar_breakline!(stream::TokenStream) nothing end -# YAML 1.1 [22] b-line-feed ::= #xA /*LF*/ -# YAML 1.2 [24] b-line-feed ::= x0A -const b_line_feed = '\n' - -# YAML 1.1 [23] b-carriage-return ::= #xD /*CR*/ -# YAML 1.2 [25] b-carriage-return ::= x0D -const b_carriage_return = '\r' - -# YAML 1.1 [24] b-next-line ::= #x85 /*NEL*/ -const yaml_1_1_b_next_line = '\u85' - -# YAML 1.1 [25] b-line-separator ::= #x2028 /*LS*/ -const yaml_1_1_b_line_separator = '\u2028' - -# YAML 1.1 [26] b-paragraph-separator ::= #x2029 /*PS*/ -const yaml_1_1_b_paragraph_separator = '\u2029' - +# forwardchars!(::YAMLVersion, ::TokenStream, ::Integer=1) # Advance the stream by `n` characters. -# YAML 1.1 [28] b-specific ::= b-line-separator | b-paragraph-separator -yaml_1_1_is_b_specific(c::Char) = c == yaml_1_1_b_line_separator || c == yaml_1_1_b_paragraph_separator -# YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line + +# forwardchars!(::YAMLV1_1, ::TokenStream, ::Integer=1) +# YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed | b-next-line # YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) i = 1 while i ≤ n + # check whether the stream head is `b-ignored-any` c = peek(stream.input) + # check whether the stream head is `b-carriage-return` if c == b_carriage_return + # `b-carriage-return` or `b-carriage-return b-line-feed` forwardchar_breakline!(stream) i += 1 if peek(stream.input) == b_line_feed forwardchar_skip!(stream) i += 1 end - elseif c == b_line_feed || c == yaml_1_1_b_next_line || yaml_1_1_is_b_specific(c) + # check whether the stream head is `b-ignored-any - b-carriage-return - ( b-carriage-return b-line-feed )` + elseif c == b_line_feed || c == yaml_1_1_b_next_line || is_b_specific(YAMLV1_1(), c) forwardchar_breakline!(stream) i += 1 + # the stream head is not `b-ignored-any` else forwardchar_nobreak!(stream) i += 1 @@ -211,7 +236,7 @@ function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) end end -# Advance the stream by `n` characters. +# forwardchars!(::YAMLV1_2, ::TokenStream, ::Integer=1) # YAML 1.2 [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed function forwardchars!(::YAMLV1_2, stream::TokenStream, n::Integer=1) i = 1 From d9b45afe3359825a9d41ec85bb9f321bdb24de66 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Fri, 21 Jun 2024 20:39:14 +0900 Subject: [PATCH 25/27] Rename `get_mark` to `Mark`. --- src/scanner.jl | 146 ++++++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 74 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index d1d327f..a7d46cd 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -136,9 +136,7 @@ function reset!(stream::TokenStream) end -function get_mark(stream::TokenStream) - Mark(stream.index, stream.line, stream.column) -end +Mark(stream::TokenStream) = Mark(stream.index, stream.line, stream.column) # Advance the stream by k characters. @@ -289,7 +287,7 @@ function stale_possible_simple_keys(stream::TokenStream) if key.mark.line != stream.line || stream.index - key.mark.index > 1024 if key.required throw(ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", get_mark(stream))) + "could not find expected ':'", Mark(stream))) end delete!(stream.possible_simple_keys, level) end @@ -305,7 +303,7 @@ function save_possible_simple_key(stream::TokenStream) if stream.allow_simple_key remove_possible_simple_key(stream) token_number = stream.tokens_taken + length(stream.token_queue) - key = SimpleKey(token_number, required, get_mark(stream)) + key = SimpleKey(token_number, required, Mark(stream)) stream.possible_simple_keys[stream.flow_level] = key end end @@ -317,7 +315,7 @@ function remove_possible_simple_key(stream::TokenStream) key = stream.possible_simple_keys[stream.flow_level] if key.required throw(ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", get_mark(stream))) + "could not find expected ':'", Mark(stream))) end delete!(stream.possible_simple_keys, stream.flow_level) end @@ -333,7 +331,7 @@ function unwind_indent(stream::TokenStream, column) # In block context, we may need to issue the BLOCK-END tokens. while stream.indent > column - mark = get_mark(stream) + mark = Mark(stream) stream.indent = pop!(stream.indents) enqueue!(stream.token_queue, BlockEndToken(Span(mark, mark))) end @@ -399,7 +397,7 @@ end # -------- function fetch_stream_start(stream::TokenStream) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, StreamStartToken(Span(mark, mark), string(stream.encoding))) end @@ -414,7 +412,7 @@ function fetch_stream_end(stream::TokenStream) stream.allow_simple_key = false empty!(stream.possible_simple_keys) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, StreamEndToken(Span(mark, mark))) stream.done = true end @@ -452,9 +450,9 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke stream.allow_simple_key = false # Add DOCUMENT-START or DOCUMENT-END. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream, 3) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -463,10 +461,10 @@ function fetch_byte_order_mark(stream::TokenStream) # Set the current intendation to -1. unwind_indent(stream, -1) - start_mark = get_mark(stream) + start_mark = Mark(stream) forward!(stream.input) stream.index += 1 - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, ByteOrderMarkToken(Span(start_mark, end_mark))) end @@ -493,9 +491,9 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -521,9 +519,9 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok stream.allow_simple_key = false # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -536,9 +534,9 @@ function fetch_flow_entry(stream::TokenStream) remove_possible_simple_key(stream) # Add FLOW-ENTRY. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end @@ -550,11 +548,11 @@ function fetch_block_entry(stream::TokenStream) if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "sequence entries not allowed here", - get_mark(stream))) + Mark(stream))) end if add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockSequenceStartToken(Span(mark, mark))) end @@ -572,9 +570,9 @@ function fetch_block_entry(stream::TokenStream) remove_possible_simple_key(stream) # Add BLOCK-ENTRY. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) end @@ -586,12 +584,12 @@ function fetch_key(stream::TokenStream) if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "mapping keys are not allowed here", - get_mark(stream))) + Mark(stream))) end # We may need to add BLOCK-MAPPING-START. if add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockMappingStartToken(Span(mark, mark))) end @@ -604,9 +602,9 @@ function fetch_key(stream::TokenStream) remove_possible_simple_key(stream) # Add KEY. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end @@ -642,7 +640,7 @@ function fetch_value(stream::TokenStream) if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "mapping values are not allowed here", - get_mark(stream))) + Mark(stream))) end end @@ -650,7 +648,7 @@ function fetch_value(stream::TokenStream) # BLOCK-MAPPING-START. It will be detected as an error later by # the parser. if stream.flow_level == 0 && add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockMappingStartToken(Span(mark, mark))) end @@ -663,9 +661,9 @@ function fetch_value(stream::TokenStream) end # Add VALUE. - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end @@ -816,22 +814,22 @@ end function scan_directive(stream::TokenStream) - start_mark = get_mark(stream) + start_mark = Mark(stream) forwardchars!(stream) name = scan_directive_name(stream, start_mark) value = nothing if name == "YAML" value = scan_yaml_directive_value(stream, start_mark) - end_mark = get_mark(stream) + end_mark = Mark(stream) elseif name == "TAG" tag_handle = scan_tag_directive_handle(stream, start_mark) tag_prefix = scan_tag_directive_prefix(stream, start_mark) value = (tag_handle, tag_prefix) - end_mark = get_mark(stream) + end_mark = Mark(stream) else # Otherwise we warn and ignore the directive. - end_mark = get_mark(stream) + end_mark = Mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") forwardchars!(stream) @@ -854,7 +852,7 @@ function scan_directive_name(stream::TokenStream, start_mark::Mark) if length == 0 throw(ScannerError("while scanning a directive", start_mark, "expected alphanumeric character, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end value = prefix(stream.input, length) @@ -864,7 +862,7 @@ function scan_directive_name(stream::TokenStream, start_mark::Mark) if !in(c, ":\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, "expected alphanumeric character, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end value @@ -880,14 +878,14 @@ function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) if peek(stream.input) != '.' throw(ScannerError("while scanning a directive", start_mark, "expected '.' but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end forwardchars!(stream) minor = scan_yaml_directive_number(stream, start_mark) if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, "expected ' ' or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end return (major, minor) end @@ -905,7 +903,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # throw an error if the input is not decimal digits isdigit(c) || throw(ScannerError( "while scanning a directive", start_mark, - "expected a digit, but found '$c'", get_mark(stream), + "expected a digit, but found '$c'", Mark(stream), )) # ----------------------------------------------------------- # until the end of the decimal digits, increment the position @@ -942,7 +940,7 @@ function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) if peek(stream.input) != ' ' throw(ScannerError("while scanning a directive", start_mark, "expected ' ', but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end value end @@ -957,7 +955,7 @@ function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, "expected ' ', but found $(peek(stream.input))", - get_mark(stream))) + Mark(stream))) end value end @@ -976,14 +974,14 @@ function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end scan_line_break(stream) end function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} - start_mark = get_mark(stream) + start_mark = Mark(stream) indicator = peek(stream.input) if indicator == '*' name = "alias" @@ -1001,22 +999,22 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} if length == 0 throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end value = prefix(stream.input, length) forwardchars!(stream, length) if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end - end_mark = get_mark(stream) + end_mark = Mark(stream) T(Span(start_mark, end_mark), value) end function scan_tag(stream::TokenStream) - start_mark = get_mark(stream) + start_mark = Mark(stream) c = peek(stream.input, 1) if c == '<' handle = nothing @@ -1025,7 +1023,7 @@ function scan_tag(stream::TokenStream) if peek(stream.input) != '>' throw(ScannerError("while parsing a tag", start_mark, "expected '>', but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end forwardchars!(stream) elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") @@ -1056,11 +1054,11 @@ function scan_tag(stream::TokenStream) if !in(c, "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a tag", start_mark, "expected ' ' or a line break, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end value = (handle, suffix) - end_mark = get_mark(stream) + end_mark = Mark(stream) TagToken(Span(start_mark, end_mark), value) end @@ -1069,7 +1067,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) folded = style == '>' chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) # Scan the header. forwardchars!(stream) @@ -1140,7 +1138,7 @@ function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a block scalal", start_mark, "expected a comment or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) + Mark(stream))) end scan_line_break(stream) @@ -1160,7 +1158,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) if increment == 0 throw(ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - get_mark(stream))) + Mark(stream))) end end elseif in(c, "0123456789") @@ -1168,7 +1166,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) if increment == 0 throw(ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - get_mark(stream))) + Mark(stream))) end forwardchars!(stream) @@ -1183,7 +1181,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) if !in(c, "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end chomping, increment @@ -1193,11 +1191,11 @@ end function scan_block_scalar_indentation(stream::TokenStream) chunks = Any[] max_indent = 0 - end_mark = get_mark(stream) + end_mark = Mark(stream) while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) != ' ' push!(chunks, scan_line_break(stream)) - end_mark = get_mark(stream) + end_mark = Mark(stream) else forwardchars!(stream) if stream.column > max_indent @@ -1212,14 +1210,14 @@ end function scan_block_scalar_breaks(stream::TokenStream, indent) chunks = Any[] - end_mark = get_mark(stream) + end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' forwardchars!(stream) end while yaml_1_1_is_b_char(peek(stream.input)) push!(chunks, scan_line_break(stream)) - end_mark = get_mark(stream) + end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' forwardchars!(stream) end @@ -1232,7 +1230,7 @@ end function scan_flow_scalar(stream::TokenStream, style::Char) double = style == '"' chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) q = peek(stream.input) # quote forwardchars!(stream) @@ -1242,7 +1240,7 @@ function scan_flow_scalar(stream::TokenStream, style::Char) end forwardchars!(stream) - end_mark = get_mark(stream) + end_mark = Mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end @@ -1312,7 +1310,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, string("expected escape sequence of", " $(length) hexadecimal", "digits, but found '$(c)'"), - get_mark(stream))) + Mark(stream))) end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) @@ -1324,7 +1322,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, throw(ScannerError("while scanning a double-quoted scalar", start_mark, "found unknown escape character '$(c)'", - get_mark(stream))) + Mark(stream))) end else return chunks @@ -1346,7 +1344,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, c = peek(stream.input) if c == '\0' throw(ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", get_mark(stream))) + "found unexpected end of stream", Mark(stream))) elseif yaml_1_1_is_b_char(c) line_break = scan_line_break(stream) breaks = scan_flow_scalar_breaks(stream, double, start_mark) @@ -1373,7 +1371,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a quoted scalar", start_mark, "found unexpected document seperator", - get_mark(stream))) + Mark(stream))) end while in(peek(stream.input), " \t") @@ -1396,7 +1394,7 @@ function scan_plain(stream::TokenStream) # We also keep track of the `allow_simple_key` flag here. # Indentation rules are loosed for the flow context. chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) end_mark = start_mark indent = stream.indent + 1 @@ -1430,7 +1428,7 @@ function scan_plain(stream::TokenStream) !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") forwardchars!(stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, - "found unexpected ':'", get_mark(stream))) + "found unexpected ':'", Mark(stream))) end if length == 0 @@ -1441,7 +1439,7 @@ function scan_plain(stream::TokenStream) append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) forwardchars!(stream, length) - end_mark = get_mark(stream) + end_mark = Mark(stream) spaces = scan_plain_spaces(stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || (stream.flow_level == 0 && stream.column < indent) @@ -1510,7 +1508,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) c = peek(stream.input) if c != '!' throw(ScannerError("while scanning a $(name)", start_mark, - "expected '!', but found '$(c)'", get_mark(stream))) + "expected '!', but found '$(c)'", Mark(stream))) end length = 1 c = peek(stream.input, length) @@ -1524,7 +1522,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) forwardchars!(stream, length) throw(ScannerError("while scanning a $(name)", start_mark, "expected '!', but found '$(c)'", - get_mark(stream))) + Mark(stream))) end length += 1 end @@ -1560,7 +1558,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if isempty(chunks) throw(ScannerError("while parsing a $(name)", start_mark, "expected URI, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end string(chunks...) @@ -1569,7 +1567,7 @@ end function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) bytes = Any[] - mark = get_mark(stream) + mark = Mark(stream) while peek(stream.input) == '%' forward!(stream.input) for k in 0:1 @@ -1578,7 +1576,7 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) string("expected URI escape sequence of", " 2 hexadecimal digits, but found", " '$(peek(stream.input, k))'"), - get_mark(stream))) + Mark(stream))) end end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) From 1db2624d4fbeac3b6f7a05078e907ab60ac847be Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Sat, 22 Jun 2024 12:59:50 +0900 Subject: [PATCH 26/27] Use YAML version traits for most functions in `src/scanner.jl`. --- src/scanner.jl | 732 +++++++++++++++++++++++++------------------------ 1 file changed, 378 insertions(+), 354 deletions(-) diff --git a/src/scanner.jl b/src/scanner.jl index 244b752..f52e9e1 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -218,7 +218,7 @@ end # forwardchars!(::YAMLV1_1, ::TokenStream, ::Integer=1) # YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed | b-next-line # YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific -function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) +function forwardchars!(version::YAMLV1_1, stream::TokenStream, n::Integer=1) i = 1 while i ≤ n # check whether the stream head is `b-ignored-any` @@ -233,7 +233,7 @@ function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) i += 1 end # check whether the stream head is `b-ignored-any - b-carriage-return - ( b-carriage-return b-line-feed )` - elseif c == b_line_feed || c == yaml_1_1_b_next_line || is_b_specific(YAMLV1_1(), c) + elseif c == b_line_feed || c == yaml_1_1_b_next_line || is_b_specific(version, c) forwardchar_breakline!(stream) i += 1 # the stream head is not `b-ignored-any` @@ -267,20 +267,17 @@ function forwardchars!(::YAMLV1_2, stream::TokenStream, n::Integer=1) end function need_more_tokens(stream::TokenStream) - if stream.done - return false - elseif isempty(stream.token_queue) - return true - end - + stream.done && return false + isempty(stream.token_queue) && return true stale_possible_simple_keys(stream) next_possible_simple_key(stream) == stream.tokens_taken end function peek(stream::TokenStream) + version = YAMLV1_1() while need_more_tokens(stream) - fetch_more_tokens(stream) + fetch_more_tokens(version, stream) end if !isempty(stream.token_queue) @@ -292,8 +289,9 @@ end function forward!(stream::TokenStream) + version = YAMLV1_1() while need_more_tokens(stream) - fetch_more_tokens(stream) + fetch_more_tokens(version, stream) end if !isempty(stream.token_queue) @@ -306,9 +304,9 @@ end # Read one or more tokens from the input stream. -function fetch_more_tokens(stream::TokenStream) +function fetch_more_tokens(version::YAMLVersion, stream::TokenStream) # Eat whitespace. - scan_to_next_token(stream::TokenStream) + scan_to_next_token(version, stream) # Remove obsolete possible simple keys. stale_possible_simple_keys(stream) @@ -321,46 +319,46 @@ function fetch_more_tokens(stream::TokenStream) if c == '\0' || c === nothing fetch_stream_end(stream) elseif c == '%' && check_directive(stream) - fetch_directive(stream) - elseif c == '-' && check_document_start(stream) - fetch_document_start(stream) - elseif c == '.' && check_document_end(stream) - fetch_document_end(stream) + fetch_directive(version, stream) + elseif c == '-' && check_document_start(version, stream) + fetch_document_start(version, stream) + elseif c == '.' && check_document_end(version, stream) + fetch_document_end(version, stream) stream.done = true elseif c == '[' - fetch_flow_sequence_start(stream) + fetch_flow_sequence_start(version, stream) elseif c == '{' - fetch_flow_mapping_start(stream) + fetch_flow_mapping_start(version, stream) elseif c == ']' - fetch_flow_sequence_end(stream) + fetch_flow_sequence_end(version, stream) elseif c == '}' - fetch_flow_mapping_end(stream) + fetch_flow_mapping_end(version, stream) elseif c == ',' - fetch_flow_entry(stream) - elseif c == '-' && check_block_entry(stream) - fetch_block_entry(stream) - elseif c == '?' && check_key(stream) - fetch_key(stream) - elseif c == ':' && check_value(stream) - fetch_value(stream) + fetch_flow_entry(version, stream) + elseif c == '-' && check_block_entry(version, stream) + fetch_block_entry(version, stream) + elseif c == '?' && check_key(version, stream) + fetch_key(version, stream) + elseif c == ':' && check_value(version, stream) + fetch_value(version, stream) elseif c == '*' - fetch_alias(stream) + fetch_alias(version, stream) elseif c == '&' - fetch_anchor(stream) + fetch_anchor(version, stream) elseif c == '!' - fetch_tag(stream) + fetch_tag(version, stream) elseif c == '|' && stream.flow_level == 0 - fetch_literal(stream) + fetch_literal(version, stream) elseif c == '>' && stream.flow_level == 0 - fetch_folded(stream) + fetch_folded(version, stream) elseif c == '\'' - fetch_single(stream) + fetch_single(version, stream) elseif c == '\"' - fetch_double(stream) + fetch_double(version, stream) elseif c == '\uFEFF' fetch_byte_order_mark(stream) - elseif check_plain(stream) - fetch_plain(stream) + elseif check_plain(version, stream) + fetch_plain(version, stream) else # TODO: Throw a meaningful exception. throw(c) @@ -466,36 +464,35 @@ function check_directive(stream::TokenStream) stream.column == 0 end -function check_document_start(stream::TokenStream) +check_document_start(version::YAMLVersion, stream::TokenStream) = stream.column == 0 && - prefix(stream.input, 3) == "---" && - is_whitespace(YAMLV1_1(), peek(stream.input, 3)) -end + prefix(stream.input, 3) == "---" && + is_whitespace(version, peek(stream.input, 3)) - function check_document_end(stream::TokenStream) - stream.column == 0 && - prefix(stream.input, 3) == "..." && - (is_whitespace(YAMLV1_1(), peek(stream.input, 3)) || peek(stream.input, 3) === nothing) - end +check_document_end(version::YAMLVersion, stream::TokenStream) = + stream.column == 0 && + prefix(stream.input, 3) == "..." && begin + c = peek(stream.input, 3) + is_whitespace(version, c) || c === nothing + end -function check_block_entry(stream::TokenStream) - is_whitespace(YAMLV1_1(), peek(stream.input, 1)) +function check_block_entry(version::YAMLVersion, stream::TokenStream) + is_whitespace(version, peek(stream.input, 1)) end -function check_key(stream::TokenStream) - stream.flow_level > 0 || is_whitespace(YAMLV1_1(), peek(stream.input, 1)) +function check_key(version::YAMLVersion, stream::TokenStream) + stream.flow_level > 0 || is_whitespace(version, peek(stream.input, 1)) end -function check_value(stream::TokenStream) +function check_value(version::YAMLVersion, stream::TokenStream) cnext = peek(stream.input, 1) - stream.flow_level > 0 || is_whitespace(YAMLV1_1(), cnext) || cnext === nothing + stream.flow_level > 0 || is_whitespace(version, cnext) || cnext === nothing end -function check_plain(stream::TokenStream) - !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029-?:,[]{}#&*!|>\'\"%@`\uFEFF") || - (!is_whitespace(YAMLV1_1(), peek(stream.input, 1)) && - (peek(stream.input) == '-' || (stream.flow_level == 0 && - in(peek(stream.input), "?:")))) +function check_plain(version::YAMLVersion, stream::TokenStream) + c = peek(stream.input) + !(c == '\0' || c == ' ' || c == '\t' || is_b_char(version, c) || in(c, "-?:,[]{}#&*!|>\'\"%@`\uFEFF")) || + (!is_whitespace(version, peek(stream.input, 1)) && (c == '-' || (stream.flow_level == 0 && in(c, "?:")))) end @@ -524,7 +521,7 @@ function fetch_stream_end(stream::TokenStream) end -function fetch_directive(stream::TokenStream) +function fetch_directive(version::YAMLVersion, stream::TokenStream) # Set the current intendation to -1. unwind_indent(stream, -1) @@ -532,21 +529,21 @@ function fetch_directive(stream::TokenStream) remove_possible_simple_key(stream) stream.allow_simple_key = false - enqueue!(stream.token_queue, scan_directive(stream)) + enqueue!(stream.token_queue, scan_directive(version, stream)) end -function fetch_document_start(stream::TokenStream) - fetch_document_indicator(stream, DocumentStartToken) +function fetch_document_start(version::YAMLVersion, stream::TokenStream) + fetch_document_indicator(version, stream, DocumentStartToken) end -function fetch_document_end(stream::TokenStream) - fetch_document_indicator(stream, DocumentEndToken) +function fetch_document_end(version::YAMLVersion, stream::TokenStream) + fetch_document_indicator(version, stream, DocumentEndToken) end -function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_document_indicator(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # Set the current intendation to -1. unwind_indent(stream, -1) @@ -557,7 +554,7 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke # Add DOCUMENT-START or DOCUMENT-END. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream, 3) + forwardchars!(version, stream, 3) end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -575,17 +572,17 @@ function fetch_byte_order_mark(stream::TokenStream) end -function fetch_flow_sequence_start(stream::TokenStream) - fetch_flow_collection_start(stream, FlowSequenceStartToken) +function fetch_flow_sequence_start(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_start(version, stream, FlowSequenceStartToken) end -function fetch_flow_mapping_start(stream::TokenStream) - fetch_flow_collection_start(stream, FlowMappingStartToken) +function fetch_flow_mapping_start(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_start(version, stream, FlowMappingStartToken) end -function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_flow_collection_start(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # '[' and '{' may start a simple key. save_possible_simple_key(stream) @@ -598,23 +595,23 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end -function fetch_flow_sequence_end(stream::TokenStream) - fetch_flow_collection_end(stream, FlowSequenceEndToken) +function fetch_flow_sequence_end(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_end(version, stream, FlowSequenceEndToken) end -function fetch_flow_mapping_end(stream::TokenStream) - fetch_flow_collection_end(stream, FlowMappingEndToken) +function fetch_flow_mapping_end(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_end(version, stream, FlowMappingEndToken) end -function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_flow_collection_end(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # Reset possible simple key on the current level. remove_possible_simple_key(stream) @@ -626,13 +623,13 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end -function fetch_flow_entry(stream::TokenStream) +function fetch_flow_entry(version::YAMLVersion, stream::TokenStream) # Simple keys are allowed after ','. stream.allow_simple_key = true @@ -641,13 +638,13 @@ function fetch_flow_entry(stream::TokenStream) # Add FLOW-ENTRY. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end -function fetch_block_entry(stream::TokenStream) +function fetch_block_entry(version::YAMLVersion, stream::TokenStream) # Block context needs additional checks. if stream.flow_level == 0 # Are we allowed to start a new entry? @@ -677,14 +674,14 @@ function fetch_block_entry(stream::TokenStream) # Add BLOCK-ENTRY. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) end -function fetch_key(stream::TokenStream) +function fetch_key(version::YAMLVersion, stream::TokenStream) if stream.flow_level == 0 # Are we allowed to start a key (not nessesary a simple)? if !stream.allow_simple_key @@ -709,13 +706,13 @@ function fetch_key(stream::TokenStream) # Add KEY. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end -function fetch_value(stream::TokenStream) +function fetch_value(version::YAMLVersion, stream::TokenStream) # Simple key if haskey(stream.possible_simple_keys, stream.flow_level) # Add KEY. @@ -768,13 +765,13 @@ function fetch_value(stream::TokenStream) # Add VALUE. start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end -function fetch_alias(stream::TokenStream) +function fetch_alias(version::YAMLVersion, stream::TokenStream) # ALIAS could be a simple key. save_possible_simple_key(stream) @@ -782,11 +779,11 @@ function fetch_alias(stream::TokenStream) stream.allow_simple_key = false # Scan and add ALIAS. - enqueue!(stream.token_queue, scan_anchor(stream, AliasToken)) + enqueue!(stream.token_queue, scan_anchor(version, stream, AliasToken)) end -function fetch_anchor(stream::TokenStream) +function fetch_anchor(version::YAMLVersion, stream::TokenStream) # ANCHOR could start a simple key. save_possible_simple_key(stream) @@ -794,11 +791,11 @@ function fetch_anchor(stream::TokenStream) stream.allow_simple_key = false # Scan and add ANCHOR. - enqueue!(stream.token_queue, scan_anchor(stream, AnchorToken)) + enqueue!(stream.token_queue, scan_anchor(version, stream, AnchorToken)) end -function fetch_tag(stream::TokenStream) +function fetch_tag(version::YAMLVersion, stream::TokenStream) # TAG could start a simple key. save_possible_simple_key(stream) @@ -806,21 +803,21 @@ function fetch_tag(stream::TokenStream) stream.allow_simple_key = false # Scan and add TAG. - enqueue!(stream.token_queue, scan_tag(stream)) + enqueue!(stream.token_queue, scan_tag(version, stream)) end -function fetch_literal(stream::TokenStream) - fetch_block_scalar(stream, '|') +function fetch_literal(version::YAMLVersion, stream::TokenStream) + fetch_block_scalar(version, stream, '|') end -function fetch_folded(stream::TokenStream) - fetch_block_scalar(stream, '>') +function fetch_folded(version::YAMLVersion, stream::TokenStream) + fetch_block_scalar(version, stream, '>') end -function fetch_block_scalar(stream::TokenStream, style::Char) +function fetch_block_scalar(version::YAMLVersion, stream::TokenStream, style::Char) # A simple key may follow a block scalar. stream.allow_simple_key = true @@ -828,21 +825,21 @@ function fetch_block_scalar(stream::TokenStream, style::Char) remove_possible_simple_key(stream) # Scan and add SCALAR. - enqueue!(stream.token_queue, scan_block_scalar(stream, style)) + enqueue!(stream.token_queue, scan_block_scalar(version, stream, style)) end -function fetch_single(stream::TokenStream) - fetch_flow_scalar(stream, '\'') +function fetch_single(version::YAMLVersion, stream::TokenStream) + fetch_flow_scalar(version, stream, '\'') end -function fetch_double(stream::TokenStream) - fetch_flow_scalar(stream, '"') +function fetch_double(version::YAMLVersion, stream::TokenStream) + fetch_flow_scalar(version, stream, '"') end -function fetch_flow_scalar(stream::TokenStream, style::Char) +function fetch_flow_scalar(version::YAMLVersion, stream::TokenStream, style::Char) # A flow scalar could be a simple key. save_possible_simple_key(stream) @@ -850,14 +847,14 @@ function fetch_flow_scalar(stream::TokenStream, style::Char) stream.allow_simple_key = false # Scan and add SCALAR. - enqueue!(stream.token_queue, scan_flow_scalar(stream, style)) + enqueue!(stream.token_queue, scan_flow_scalar(version, stream, style)) end -function fetch_plain(stream::TokenStream) +function fetch_plain(version::YAMLVersion, stream::TokenStream) save_possible_simple_key(stream) stream.allow_simple_key = false - enqueue!(stream.token_queue, scan_plain(stream)) + enqueue!(stream.token_queue, scan_plain(version, stream)) end @@ -886,20 +883,20 @@ end # U+2029 → U+2029 # otherwise → (empty) # -function scan_line_break(::YAMLV1_1, stream::TokenStream)::String +function scan_line_break(version::YAMLV1_1, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' - forwardchars!(YAMLV1_1(), stream, 2) + forwardchars!(version, stream, 2) else - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end "\u000a" elseif c == '\u000a' || c == '\u0085' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) "\u000a" - elseif c == '\u2028' || c == '\u2029' - forwardchars!(YAMLV1_1(), stream) + elseif is_b_specific(version, c) + forwardchars!(version, stream) string(c) else "" @@ -919,17 +916,17 @@ end # U+000A → U+000A # otherwise → (empty) # -function scan_line_break(::YAMLV1_2, stream::TokenStream)::String +function scan_line_break(version::YAMLV1_2, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' - forwardchars!(YAMLV1_2(), stream, 2) + forwardchars!(version, stream, 2) else - forwardchars!(YAMLV1_2(), stream) + forwardchars!(version, stream) end "\u000a" elseif c == '\u000a' - forwardchars!(YAMLV1_2(), stream) + forwardchars!(version, stream) "\u000a" else "" @@ -937,21 +934,22 @@ function scan_line_break(::YAMLV1_2, stream::TokenStream)::String end # Scan past whitespace to the next token. -function scan_to_next_token(stream::TokenStream) +function scan_to_next_token(version::YAMLVersion, stream::TokenStream) while true # whitespace while peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end # comment if peek(stream.input) == '#' - forwardchars!(YAMLV1_1(), stream) - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(YAMLV1_1(), stream) + while true + forwardchars!(version, stream) + c = peek(stream.input) + (c == '\0' || is_b_char(version, c)) && break end end # line break - if scan_line_break(YAMLV1_1(), stream) != "" + if scan_line_break(version, stream) != "" if stream.flow_level == 0 stream.allow_simple_key = true end @@ -963,86 +961,94 @@ function scan_to_next_token(stream::TokenStream) end -function scan_directive(stream::TokenStream) +function scan_directive(version::YAMLVersion, stream::TokenStream) start_mark = Mark(stream) - forwardchars!(YAMLV1_1(), stream) - name = scan_directive_name(stream, start_mark) + forwardchars!(version, stream) + name = scan_directive_name(version, stream, start_mark) value = nothing if name == "YAML" - value = scan_yaml_directive_value(stream, start_mark) + value = scan_yaml_directive_value(version, stream, start_mark) end_mark = Mark(stream) elseif name == "TAG" - tag_handle = scan_tag_directive_handle(stream, start_mark) - tag_prefix = scan_tag_directive_prefix(stream, start_mark) + tag_handle = scan_tag_directive_handle(version, stream, start_mark) + tag_prefix = scan_tag_directive_prefix(version, stream, start_mark) value = (tag_handle, tag_prefix) end_mark = Mark(stream) else # Otherwise we warn and ignore the directive. end_mark = Mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(YAMLV1_1(), stream) + while + begin + c = peek(stream.input) + !(c == '\0' || is_b_char(version, c)) + end + forwardchars!(version, stream) end end - scan_directive_ignored_line(stream, start_mark) + scan_directive_ignored_line(version, stream, start_mark) DirectiveToken(Span(start_mark, end_mark), name, value) end -function scan_directive_name(stream::TokenStream, start_mark::Mark) +function scan_directive_name(version::YAMLVersion, stream::TokenStream, start_mark::Mark) length = 0 - c = peek(stream.input) - while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' - length += 1 + while begin c = peek(stream.input, length) + is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' end - - if length == 0 - throw(ScannerError("while scanning a directive", start_mark, - "expected alphanumeric character, but found '$(c)'", - Mark(stream))) + length += 1 end + length == 0 && throw(ScannerError( + "while scanning a directive", start_mark, + "expected alphanumeric character, but found '$c'", Mark(stream), + )) + value = prefix(stream.input, length) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) - if !in(c, ":\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected alphanumeric character, but found '$(c)'", - Mark(stream))) - end + c == ':' || c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected alphanumeric character, but found '$c'", Mark(stream), + )) value end -function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) - while peek(stream.input) == ' ' || peek(stream.input) == ':' - forwardchars!(YAMLV1_1(), stream) - end - - major = scan_yaml_directive_number(stream, start_mark) - if peek(stream.input) != '.' - throw(ScannerError("while scanning a directive", start_mark, - "expected '.' but found '$(peek(stream.input))'", - Mark(stream))) +function scan_yaml_directive_value(version::YAMLVersion, stream::TokenStream, start_mark::Mark) + while begin + c = peek(stream.input) + c == ' ' || c == ':' end - forwardchars!(YAMLV1_1(), stream) - minor = scan_yaml_directive_number(stream, start_mark) - if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ' or a line break, but found '$(peek(stream.input))'", - Mark(stream))) + forwardchars!(version, stream) end - return (major, minor) + + major = scan_yaml_directive_number(version, stream, start_mark) + c = peek(stream.input) + c == '.' || throw(ScannerError( + "while scanning a directive", start_mark, + "expected '.' but found '$c'", Mark(stream), + )) + forwardchars!(version, stream) + + minor = scan_yaml_directive_number(version, stream, start_mark) + c = peek(stream.input) + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ' or a line break, but found '$c'", Mark(stream), + )) + + major, minor end # scan the YAML directive's number from a stream -function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int +function scan_yaml_directive_number(version::YAMLVersion, stream::TokenStream, start_mark::Mark)::Int # ------------------------------------------------- # check that the first character is a decimal digit # ------------------------------------------------- @@ -1073,7 +1079,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # --------------------------------------------------- # advance the stream by the length that has been read # --------------------------------------------------- - forwardchars!(YAMLV1_1(), stream, pos) + forwardchars!(version, stream, pos) # ----------------- # return the number # ----------------- @@ -1081,110 +1087,109 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int end -function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) +function scan_tag_directive_handle(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end - value = scan_tag_handle(stream, "directive", start_mark) - if peek(stream.input) != ' ' - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ', but found '$(peek(stream.input))'", - Mark(stream))) - end + value = scan_tag_handle(version, stream, "directive", start_mark) + + c = peek(stream.input) + c == ' ' || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ', but found '$c'", Mark(stream), + )) value end -function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) +function scan_tag_directive_prefix(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end + + value = scan_tag_uri(version, stream, "directive", start_mark) - value = scan_tag_uri(stream, "directive", start_mark) - if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ', but found $(peek(stream.input))", - Mark(stream))) - end + c = peek(stream.input) + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ', but found $c", Mark(stream), + )) value end -function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) +function scan_directive_ignored_line(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end if peek(stream.input) == '#' - forwardchars!(YAMLV1_1(), stream) - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) + while begin + c = peek(stream.input) + !(c == '\0' || is_b_char(version, c)) + end + forwardchars!(version, stream) end end - if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found '$(peek(stream.input))'", - Mark(stream))) - end - scan_line_break(YAMLV1_1(), stream) + c = peek(stream.input) + c == '\0' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected a comment or a line break, but found '$c'", Mark(stream), + )) + scan_line_break(version, stream) end -function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} +function scan_anchor(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} start_mark = Mark(stream) indicator = peek(stream.input) - if indicator == '*' - name = "alias" - else - name = "anchor" - end - forwardchars!(YAMLV1_1(), stream) + name = indicator == '*' ? "alias" : "anchor" + forwardchars!(version, stream) length = 0 - c = peek(stream.input) - while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' - length += 1 + while begin c = peek(stream.input, length) + is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' end - - if length == 0 - throw(ScannerError("while scanning an $(name)", start_mark, - "expected an alphanumeric character, but found '$(peek(stream.input))'", - Mark(stream))) + length += 1 end + + length == 0 && throw(ScannerError( + "while scanning an $name", start_mark, + "expected an alphanumeric character, but found '$(peek(stream.input))'", Mark(stream), + )) value = prefix(stream.input, length) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) - if !(is_whitespace(YAMLV1_1(), c) || in(c, "?:,]}%@`")) - throw(ScannerError("while scanning an $(name)", start_mark, - "expected an alphanumeric character, but found '$c'", - Mark(stream))) - end + is_whitespace(version, c) || in(c, "?:,]}%@`") || throw(ScannerError( + "while scanning an $name", start_mark, + "expected an alphanumeric character, but found '$c'", Mark(stream), + )) end_mark = Mark(stream) T(Span(start_mark, end_mark), value) end -function scan_tag(stream::TokenStream) +function scan_tag(version::YAMLVersion, stream::TokenStream) start_mark = Mark(stream) c = peek(stream.input, 1) if c == '<' handle = nothing - forwardchars!(YAMLV1_1(), stream, 2) - suffix = scan_tag_uri(stream, "tag", start_mark) - if peek(stream.input) != '>' - throw(ScannerError("while parsing a tag", start_mark, - "expected '>', but found '$(peek(stream.input))'", - Mark(stream))) - end - forwardchars!(YAMLV1_1(), stream) - elseif is_whitespace(YAMLV1_1(), c) + forwardchars!(version, stream, 2) + suffix = scan_tag_uri(version, stream, "tag", start_mark) + peek(stream.input) == '>' || throw(ScannerError( + "while parsing a tag", start_mark, + "expected '>', but found '$(peek(stream.input))'", Mark(stream), + )) + forwardchars!(version, stream) + elseif is_whitespace(version, c) handle = nothing suffix = '!' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) else length = 1 use_handle = false - while !in(c, "\0 \r\n\u0085\u2028\u2029") + while !(c == '\0' || is_b_char(version, c)) if c == '!' use_handle = true break @@ -1193,20 +1198,19 @@ function scan_tag(stream::TokenStream) c = peek(stream.input, length) end if use_handle - handle = scan_tag_handle(stream, "tag", start_mark) + handle = scan_tag_handle(version, stream, "tag", start_mark) else handle = "!" - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end - suffix = scan_tag_uri(stream, "tag", start_mark) + suffix = scan_tag_uri(version, stream, "tag", start_mark) end c = peek(stream.input) - if !in(c, "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a tag", start_mark, - "expected ' ' or a line break, but found '$(c)'", - Mark(stream))) - end + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a tag", start_mark, + "expected ' ' or a line break, but found '$c'", Mark(stream), + )) value = (handle, suffix) end_mark = Mark(stream) @@ -1214,25 +1218,25 @@ function scan_tag(stream::TokenStream) end -function scan_block_scalar(stream::TokenStream, style::Char) +function scan_block_scalar(version::YAMLVersion, stream::TokenStream, style::Char) folded = style == '>' chunks = Any[] start_mark = Mark(stream) # Scan the header. - forwardchars!(YAMLV1_1(), stream) - chomping, increment = scan_block_scalar_indicators(stream, start_mark) - scan_block_scalar_ignored_line(stream, start_mark) + forwardchars!(version, stream) + chomping, increment = scan_block_scalar_indicators(version, stream, start_mark) + scan_block_scalar_ignored_line(version, stream, start_mark) # Determine the indentation level and go to the first non-empty line. min_indent = max(1, stream.indent + 1) if increment === nothing - breaks, max_indent, end_mark = scan_block_scalar_indentation(stream) + breaks, max_indent, end_mark = scan_block_scalar_indentation(version, stream) indent = max(min_indent, max_indent) else indent = min_indent + increment - 1 - breaks, end_mark = scan_block_scalar_breaks(stream, indent) + breaks, end_mark = scan_block_scalar_breaks(version, stream, indent) end line_break = "" @@ -1241,13 +1245,16 @@ function scan_block_scalar(stream::TokenStream, style::Char) append!(chunks, breaks) leading_non_space = !is_s_white(peek(stream.input)) length = 0 - while !in(peek(stream.input, length), "\0\r\n\u0085\u2028\u2029") + while begin + c = peek(stream.input, length) + !(c == '\0' || is_b_char(version, c)) + end length += 1 end push!(chunks, prefix(stream.input, length)) - forwardchars!(YAMLV1_1(), stream, length) - line_break = scan_line_break(YAMLV1_1(), stream) - breaks, end_mark = scan_block_scalar_breaks(stream, indent) + forwardchars!(version, stream, length) + line_break = scan_line_break(version, stream) + breaks, end_mark = scan_block_scalar_breaks(version, stream, indent) if stream.column == indent && peek(stream.input) != '\0' if folded && line_break == "\n" && leading_non_space && !is_s_white(peek(stream.input)) @@ -1275,83 +1282,84 @@ function scan_block_scalar(stream::TokenStream, style::Char) end -function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) +function scan_block_scalar_ignored_line(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end - if peek(stream.input) == '#' - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(YAMLV1_1(), stream) + c = peek(stream.input) + if c == '#' + while !(c == '\0' || is_b_char(version, c)) + forwardchars!(version, stream) + c = peek(stream.input) end end - if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a block scalal", start_mark, - "expected a comment or a line break, but found '$(peek(stream.input))'", - Mark(stream))) - end + c == '\0' || is_b_char(version, c) || throw(ScannerError( + "while scanning a block scalal", start_mark, + "expected a comment or a line break, but found '$c'", Mark(stream), + )) - scan_line_break(YAMLV1_1(), stream) + scan_line_break(version, stream) end -function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) +function scan_block_scalar_indicators(version::YAMLVersion, stream::TokenStream, start_mark::Mark) chomping = nothing increment = nothing c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) c = peek(stream.input) - if in(c, "0123456789") + if isdigit(c) + c == '0' && throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", Mark(stream), + )) increment = parse(Int, string(c)) - if increment == 0 - throw(ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - Mark(stream))) - end end - elseif in(c, "0123456789") + elseif isdigit(c) + c == '0' && throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", Mark(stream), + )) increment = parse(Int, string(c)) - if increment == 0 - throw(ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - Mark(stream))) - end - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) c = peek(stream.input) if c == '+' || c == '-' - comping = c == '+' - forwardchars!(YAMLV1_1(), stream) + chomping = c == '+' + forwardchars!(version, stream) end end c = peek(stream.input) - if !in(c, "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found '$(c)'", - Mark(stream))) - end + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found '$c'", Mark(stream), + )) chomping, increment end -function scan_block_scalar_indentation(stream::TokenStream) - chunks = Any[] +function scan_block_scalar_indentation(version::YAMLVersion, stream::TokenStream)::Tuple{Vector{String}, Integer, Mark} + chunks = String[] max_indent = 0 end_mark = Mark(stream) - while in(peek(stream.input), " \r\n\u0085\u2028\u2029") - if peek(stream.input) != ' ' - push!(chunks, scan_line_break(YAMLV1_1(), stream)) + while true + c = peek(stream.input) + if is_b_char(version, c) + push!(chunks, scan_line_break(version, stream)) end_mark = Mark(stream) - else - forwardchars!(YAMLV1_1(), stream) + elseif c == ' ' + forwardchars!(version, stream) if stream.column > max_indent max_indent = stream.column end + else + break end end @@ -1359,18 +1367,18 @@ function scan_block_scalar_indentation(stream::TokenStream) end -function scan_block_scalar_breaks(stream::TokenStream, indent) +function scan_block_scalar_breaks(version::YAMLVersion, stream::TokenStream, indent) chunks = Any[] end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end - while is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, scan_line_break(YAMLV1_1(), stream)) + while is_b_char(version, peek(stream.input)) + push!(chunks, scan_line_break(version, stream)) end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end end @@ -1378,25 +1386,25 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) end -function scan_flow_scalar(stream::TokenStream, style::Char) +function scan_flow_scalar(version::YAMLVersion, stream::TokenStream, style::Char) double = style == '"' chunks = Any[] start_mark = Mark(stream) q = peek(stream.input) # quote - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) while peek(stream.input) != q || peek(stream.input, 1) == q - append!(chunks, scan_flow_scalar_spaces(stream, double, start_mark)) - append!(chunks, scan_flow_scalar_non_spaces(stream, double, start_mark)) + append!(chunks, scan_flow_scalar_spaces(version, stream, double, start_mark)) + append!(chunks, scan_flow_scalar_non_spaces(version, stream, double, start_mark)) end - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) end_mark = Mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end -const ESCAPE_REPLACEMENTS = Dict{Char,Char}( +const ESCAPE_REPLACEMENTS = Dict{Char, Char}( '0' => '\0', 'a' => '\u0007', 'b' => '\u0008', @@ -1424,25 +1432,27 @@ const ESCAPE_CODES = Dict{Char, Int}( ) -function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, - start_mark::Mark) +function scan_flow_scalar_non_spaces( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +) chunks = Any[] while true length = 0 c = peek(stream.input, length) - while !(in(c, "\'\"\\") || is_whitespace(YAMLV1_1(), c)) + while !(in(c, "\'\"\\") || is_whitespace(version, c)) length += 1 c = peek(stream.input, length) end if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) end c = peek(stream.input) if !double && c == '\'' && peek(stream.input, 1) == '\'' push!(chunks, '\'') - forwardchars!(YAMLV1_1(), stream, 2) + forwardchars!(version, stream, 2) elseif (double && c == '\'') || (!double && in(c, "\"\\")) push!(chunks, c) forward!(stream.input) @@ -1467,15 +1477,15 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) - forwardchars!(YAMLV1_1(), stream, length) - elseif is_b_char(YAMLV1_1(), c) - scan_line_break(YAMLV1_1(), stream) - append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) + forwardchars!(version, stream, length) + elseif is_b_char(version, c) + scan_line_break(version, stream) + append!(chunks, scan_flow_scalar_breaks(version, stream, double, start_mark)) else - throw(ScannerError("while scanning a double-quoted scalar", - start_mark, - "found unknown escape character '$(c)'", - Mark(stream))) + throw(ScannerError( + "while scanning a double-quoted scalar", start_mark, + "found unknown escape character '$c'", Mark(stream)), + ) end else return chunks @@ -1484,27 +1494,31 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end -function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, - start_mark::Mark) +function scan_flow_scalar_spaces( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +) chunks = Any[] length = 0 while is_s_white(peek(stream.input, length)) length += 1 end whitespaces = prefix(stream.input, length) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) if c == '\0' - throw(ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", Mark(stream))) - elseif is_b_char(YAMLV1_1(), c) - line_break = scan_line_break(YAMLV1_1(), stream) - breaks = scan_flow_scalar_breaks(stream, double, start_mark) + throw(ScannerError( + "while scanning a quoted scalar", start_mark, + "found unexpected end of stream", Mark(stream), + )) + elseif is_b_char(version, c) + line_break = scan_line_break(version, stream) + breaks = scan_flow_scalar_breaks(version, stream, double, start_mark) if line_break != '\n' push!(chunks, line_break) else isempty(breaks) - push!(chunks, ' ') + push!(chunks, " ") end append!(chunks, breaks) else @@ -1515,31 +1529,35 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, end -function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, - start_mark::Mark) - chunks = Any[] +function scan_flow_scalar_breaks( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +)::Vector{String} + chunks = String[] while true pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) - throw(ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document seperator", - Mark(stream))) + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) + throw(ScannerError( + "while scanning a quoted scalar", start_mark, + "found unexpected document seperator", Mark(stream)), + ) end while is_s_white(peek(stream.input)) forward!(stream.input) end - if is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, scan_line_break(YAMLV1_1(), stream)) + if is_b_char(version, peek(stream.input)) + push!(chunks, scan_line_break(version, stream)) else - return chunks + break end end + chunks end -function scan_plain(stream::TokenStream) +function scan_plain(version::YAMLVersion, stream::TokenStream) # See the specification for details. # We add an additional restriction for the flow context: # plain scalars in the flow context cannot contain ',', ':' and '?'. @@ -1564,10 +1582,10 @@ function scan_plain(stream::TokenStream) while true c = peek(stream.input, length) cnext = peek(stream.input, length + 1) - if is_whitespace(YAMLV1_1(), c) || + if is_whitespace(version, c) || c === nothing || (stream.flow_level == 0 && c == ':' && - (cnext === nothing || is_whitespace(YAMLV1_1(), cnext))) || + (cnext === nothing || is_whitespace(version, cnext))) || (stream.flow_level != 0 && in(c, ",:?[]{}")) break end @@ -1578,9 +1596,9 @@ function scan_plain(stream::TokenStream) c = peek(stream.input) if stream.flow_level != 0 && c == ':' && begin cnext = peek(stream.input, length + 1) - !(is_whitespace(YAMLV1_1(), cnext) || in(cnext, ",[]{}")) + !(is_whitespace(version, cnext) || in(cnext, ",[]{}")) end - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", Mark(stream))) end @@ -1592,9 +1610,9 @@ function scan_plain(stream::TokenStream) stream.allow_simple_key = true append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) end_mark = Mark(stream) - spaces = scan_plain_spaces(stream, indent, start_mark) + spaces = scan_plain_spaces(version, stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || (stream.flow_level == 0 && stream.column < indent) break @@ -1605,8 +1623,10 @@ function scan_plain(stream::TokenStream) end -function scan_plain_spaces(stream::TokenStream, indent::Integer, - start_mark::Mark) +function scan_plain_spaces( + version::YAMLVersion, stream::TokenStream, + indent::Integer, start_mark::Mark, +) chunks = Any[] length = 0 while peek(stream.input, length) == ' ' @@ -1614,30 +1634,33 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end whitespaces = prefix(stream.input, length) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) - if is_b_char(YAMLV1_1(), c) - line_break = scan_line_break(YAMLV1_1(), stream) + if is_b_char(version, c) + line_break = scan_line_break(version, stream) stream.allow_simple_key = true if peek(stream.input) == '\uFEFF' return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) return Any[] end breaks = Any[] - while in(peek(stream.input), " \r\n\u0085\u2028\u2029") + while begin + c = peek(stream.input) + c == ' ' || is_b_char(version, c) + end if peek(stream.input) == ' ' - forwardchars!(YAMLV1_1(), stream) + forwardchars!(version, stream) else - push!(breaks, scan_line_break(YAMLV1_1(), stream)) + push!(breaks, scan_line_break(version, stream)) if peek(stream.input) == '\uFEFF' return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3)) + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) return Any[] end end @@ -1656,7 +1679,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end -function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) +function scan_tag_handle(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) c = peek(stream.input) if c != '!' throw(ScannerError("while scanning a $(name)", start_mark, @@ -1671,30 +1694,31 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end if c != '!' - forwardchars!(YAMLV1_1(), stream, length) - throw(ScannerError("while scanning a $(name)", start_mark, - "expected '!', but found '$(c)'", - Mark(stream))) + forwardchars!(version, stream, length) + throw(ScannerError( + "while scanning a $name", start_mark, + "expected '!', but found '$c'", Mark(stream), + )) end length += 1 end value = prefix(stream.input, length) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) value end -function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) +function scan_tag_uri(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) chunks = Any[] length = 0 c = peek(stream.input, length) while is_ns_ascii_letter(c) || isdigit(c) || in(c, "-;/?:@&=+\$,_.!~*\'()[]%") if c == '%' push!(chunks, prefix(stream.input, length)) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) length = 0 - push!(chunks, scan_uri_escapes(stream, name, start_mark)) + push!(chunks, scan_uri_escapes(version, stream, name, start_mark)) else length += 1 end @@ -1703,7 +1727,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(YAMLV1_1(), stream, length) + forwardchars!(version, stream, length) length = 0 end @@ -1717,7 +1741,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) end -function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) +function scan_uri_escapes(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) bytes = Any[] mark = Mark(stream) while peek(stream.input) == '%' @@ -1732,7 +1756,7 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) end end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) - forwardchars!(YAMLV1_1(), stream, 2) + forwardchars!(version, stream, 2) end string(bytes...) From bf84c857bfc817265b2a828aab04c4fc6279bf53 Mon Sep 17 00:00:00 2001 From: Koki Fushimi Date: Sat, 22 Jun 2024 16:51:56 +0900 Subject: [PATCH 27/27] Use version traits for the parser. --- src/parser.jl | 266 ++++++++++++++++++++++++++----------------------- src/scanner.jl | 6 +- 2 files changed, 142 insertions(+), 130 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index d34d0c3..2ebf45f 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -42,6 +42,7 @@ end function peek(stream::EventStream) + version = YAMLV1_1() if stream.next_event === nothing if stream.state === nothing return nothing @@ -49,10 +50,10 @@ function peek(stream::EventStream) stream.state = nothing return stream.end_of_stream else - x = stream.state(stream) + x = stream.state(version, stream) #@show x stream.next_event = x - #stream.next_event = stream.state(stream) + #stream.next_event = stream.state(version, stream) end end @@ -61,6 +62,7 @@ end function forward!(stream::EventStream) + version = YAMLV1_1() if stream.next_event === nothing if stream.state === nothing nothing @@ -68,7 +70,7 @@ function forward!(stream::EventStream) stream.state = nothing return stream.end_of_stream else - stream.next_event = stream.state(stream) + stream.next_event = stream.state(version, stream) end end @@ -78,11 +80,11 @@ function forward!(stream::EventStream) end -function process_directives(stream::EventStream) +function process_directives(version::YAMLVersion, stream::EventStream) stream.yaml_version = nothing stream.tag_handles = Dict{String, String}() - while peek(stream.input) isa DirectiveToken - token = forward!(stream.input) + while peek(version, stream.input) isa DirectiveToken + token = forward!(version, stream.input) if token.name == "YAML" if stream.yaml_version !== nothing throw(ParserError(nothing, nothing, @@ -95,6 +97,18 @@ function process_directives(stream::EventStream) "found incompatible YAML document (version 1.* is required)", firstmark(token))) end + # version = + if minor == 0 + @warn "directive YAML 1.0 found but currently, YAML version 1.1 and 1.2 are supported. Fall back to 1.2." + YAMLV1_2() + elseif minor == 1 + YAMLV1_1() + elseif minor == 2 + YAMLV1_2() + else + @warn "directive YAML 1.$minor found but currently, YAML version 1.1 and 1.2 are supported. Fall back to 1.2." + YAMLV1_2() + end stream.yaml_version = token.value elseif token.name == "TAG" handle, prefix = token.value @@ -124,8 +138,8 @@ end # Parser state functions -function parse_stream_start(stream::EventStream) - token = forward!(stream.input) :: StreamStartToken +function parse_stream_start(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) :: StreamStartToken event = StreamStartEvent(firstmark(token), lastmark(token), token.encoding) stream.state = parse_implicit_document_start @@ -133,12 +147,12 @@ function parse_stream_start(stream::EventStream) end -function parse_implicit_document_start(stream::EventStream) - token = peek(stream.input) +function parse_implicit_document_start(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) # Parse a byte order mark if token isa ByteOrderMarkToken - forward!(stream.input) - token = peek(stream.input) + forward!(version, stream.input) + token = peek(version, stream.input) end if !(token isa Union{DirectiveToken, DocumentStartToken, StreamEndToken}) stream.tag_handles = DEFAULT_TAGS @@ -150,41 +164,41 @@ function parse_implicit_document_start(stream::EventStream) event else - parse_document_start(stream) + parse_document_start(version, stream) end end -function parse_document_start(stream::EventStream) +function parse_document_start(version::YAMLVersion, stream::EventStream) # Parse any extra document end indicators. - while peek(stream.input) isa DocumentEndToken + while peek(version, stream.input) isa DocumentEndToken stream.input = Iterators.rest(stream.input) end - token = peek(stream.input) + token = peek(version, stream.input) # Parse a byte order mark if it exists if token isa ByteOrderMarkToken - forward!(stream.input) - token = peek(stream.input) + forward!(version, stream.input) + token = peek(version, stream.input) end # Parse explicit document. if !(token isa StreamEndToken) start_mark = firstmark(token) - version, tags = process_directives(stream) - if !(peek(stream.input) isa DocumentStartToken) + directive_version, tags = process_directives(version, stream) + if !(peek(version, stream.input) isa DocumentStartToken) throw(ParserError(nothing, nothing, "expected '' but found $(typeof(token))")) end - token = forward!(stream.input) + token = forward!(version, stream.input) event = DocumentStartEvent(start_mark, lastmark(token), - true, version, tags) + true, directive_version, tags) push!(stream.states, parse_document_end) stream.state = parse_document_content event else # Parse the end of the stream - token = forward!(stream.input) + token = forward!(version, stream.input) event = StreamEndEvent(firstmark(token), lastmark(token)) @assert isempty(stream.states) @assert isempty(stream.marks) @@ -194,12 +208,12 @@ function parse_document_start(stream::EventStream) end -function parse_document_end(stream::EventStream) - token = peek(stream.input) +function parse_document_end(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) start_mark = end_mark = firstmark(token) explicit = false if token isa DocumentEndToken - forward!(stream.input) + forward!(version, stream.input) end_mark = lastmark(token) explicit = true stream.end_of_stream = StreamEndEvent(firstmark(token), @@ -211,40 +225,40 @@ function parse_document_end(stream::EventStream) end -function parse_document_content(stream::EventStream) - if peek(stream.input) isa Union{DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken} - event = process_empty_scalar(stream, firstmark(peek(stream.input))) +function parse_document_content(version::YAMLVersion, stream::EventStream) + if peek(version, stream.input) isa Union{DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken} + event = process_empty_scalar(stream, firstmark(peek(version, stream.input))) stream.state = pop!(stream.states) event else - parse_block_node(stream) + parse_block_node(version, stream) end end -function parse_block_node(stream::EventStream) - parse_node(stream, true) +function parse_block_node(version::YAMLVersion, stream::EventStream) + parse_node(version, stream, true) end -function parse_flow_node(stream::EventStream) - parse_node(stream) +function parse_flow_node(version::YAMLVersion, stream::EventStream) + parse_node(version, stream) end -function parse_block_node_or_indentless_sequence(stream::EventStream) - parse_node(stream, true, true) +function parse_block_node_or_indentless_sequence(version::YAMLVersion, stream::EventStream) + parse_node(version, stream, true, true) end -function _parse_node(token::AliasToken, stream::EventStream, block, indentless_sequence) - forward!(stream.input) +function _parse_node(version::YAMLVersion, token::AliasToken, stream::EventStream, block, indentless_sequence) + forward!(version, stream.input) stream.state = pop!(stream.states) return AliasEvent(firstmark(token), lastmark(token), token.value) end -function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - forward!(stream.input) +function __parse_node(version::YAMLVersion, token::ScalarToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) + forward!(version, stream.input) end_mark = lastmark(token) if (token.plain && tag === nothing) || tag == "!" implicit = true, false @@ -258,21 +272,21 @@ function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark token.value, token.style) end -function __parse_node(token::FlowSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::FlowSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) end_mark = lastmark(token) stream.state = parse_flow_sequence_first_entry SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, true) end -function __parse_node(token::FlowMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::FlowMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) end_mark = lastmark(token) stream.state = parse_flow_mapping_first_key MappingStartEvent(start_mark, end_mark, anchor, tag, implicit, true) end -function __parse_node(token::BlockSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::BlockSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing end_mark = firstmark(token) stream.state = parse_block_sequence_first_entry @@ -280,7 +294,7 @@ function __parse_node(token::BlockSequenceStartToken, stream::EventStream, block implicit, false) end -function __parse_node(token::BlockMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::BlockMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing end_mark = firstmark(token) stream.state = parse_block_mapping_first_key @@ -288,7 +302,7 @@ function __parse_node(token::BlockMappingStartToken, stream::EventStream, block, implicit, false) end -function __parse_node(token, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) if anchor !== nothing || tag !== nothing stream.state = pop!(stream.states) return ScalarEvent(start_mark, end_mark, anchor, tag, @@ -301,30 +315,30 @@ function __parse_node(token, stream::EventStream, block, start_mark, end_mark, a end end -function _parse_node(token, stream::EventStream, block, indentless_sequence) +function _parse_node(version::YAMLVersion, token, stream::EventStream, block, indentless_sequence) anchor = nothing tag = nothing start_mark = end_mark = tag_mark = nothing if token isa AnchorToken - forward!(stream.input) + forward!(version, stream.input) start_mark = firstmark(token) end_mark = lastmark(token) anchor = token.value - token = peek(stream.input) + token = peek(version, stream.input) if token isa TagToken - forward!(stream.input) + forward!(version, stream.input) tag_mark = firstmark(token) end_mark = lastmark(token) tag = token.value end elseif token isa TagToken - forward!(stream.input) + forward!(version, stream.input) start_mark = firstmark(token) end_mark = lastmark(token) tag = token.value - token = peek(stream.input) + token = peek(version, stream.input) if token isa AnchorToken - forward!(stream.input) + forward!(version, stream.input) end_mark = lastmark(token) anchor = token.value end @@ -344,7 +358,7 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) end end - token = peek(stream.input) + token = peek(version, stream.input) if start_mark === nothing start_mark = end_mark = firstmark(token) end @@ -357,31 +371,31 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) event = SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, false) else - event = __parse_node(token, stream, block, start_mark, end_mark, anchor, tag, implicit) + event = __parse_node(version, token, stream, block, start_mark, end_mark, anchor, tag, implicit) end event end -function parse_node(stream::EventStream, block=false, indentless_sequence=false) - token = peek(stream.input) - _parse_node(token, stream, block, indentless_sequence) +function parse_node(version::YAMLVersion, stream::EventStream, block=false, indentless_sequence=false) + token = peek(version, stream.input) + _parse_node(version, token, stream, block, indentless_sequence) end -function parse_block_sequence_first_entry(stream::EventStream) - token = forward!(stream.input) +function parse_block_sequence_first_entry(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) push!(stream.marks, firstmark(token)) - parse_block_sequence_entry(stream) + parse_block_sequence_entry(version, stream) end -function parse_block_sequence_entry(stream::EventStream) - token = peek(stream.input) +function parse_block_sequence_entry(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa BlockEntryToken - forward!(stream.input) - if !(peek(stream.input) isa Union{BlockEntryToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{BlockEntryToken, BlockEndToken}) push!(stream.states, parse_block_sequence_entry) - return parse_block_node(stream) + return parse_block_node(version, stream) else stream.state = parse_block_sequence_entry return process_empty_scalar(stream, lastmark(token)) @@ -394,20 +408,20 @@ function parse_block_sequence_entry(stream::EventStream) firstmark(token))) end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) SequenceEndEvent(firstmark(token), lastmark(token)) end -function parse_indentless_sequence_entry(stream::EventStream) - token = peek(stream.input) +function parse_indentless_sequence_entry(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa BlockEntryToken - forward!(stream.input) - if !(peek(stream.input) isa Union{BlockEntryToken, KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{BlockEntryToken, KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_indentless_sequence_entry) - return parse_block_node(stream) + return parse_block_node(version, stream) else stream.state = parse_indentless_sequence_entry return process_empty_scalar(stream, lastmark(token)) @@ -419,20 +433,20 @@ function parse_indentless_sequence_entry(stream::EventStream) end -function parse_block_mapping_first_key(stream::EventStream) - token = forward!(stream.input) +function parse_block_mapping_first_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) push!(stream.marks, firstmark(token)) - parse_block_mapping_key(stream) + parse_block_mapping_key(version, stream) end -function parse_block_mapping_key(stream::EventStream) - token = peek(stream.input) +function parse_block_mapping_key(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa KeyToken - forward!(stream.input) - if !(peek(stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_block_mapping_value) - return parse_block_node_or_indentless_sequence(stream) + return parse_block_node_or_indentless_sequence(version, stream) else stream.state = parse_block_mapping_value return process_empty_scalar(stream, lastmark(token)) @@ -445,20 +459,20 @@ function parse_block_mapping_key(stream::EventStream) firstmark(token))) end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_block_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_block_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_block_mapping_key) - parse_block_node_or_indentless_sequence(stream) + parse_block_node_or_indentless_sequence(version, stream) else stream.state = parse_block_mapping_key process_empty_scalar(stream, lastmark(token)) @@ -470,23 +484,23 @@ function parse_block_mapping_value(stream::EventStream) end -function parse_flow_sequence_first_entry(stream::EventStream) - token = forward!(stream.input) +function parse_flow_sequence_first_entry(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) push!(stream.marks, firstmark(token)) - parse_flow_sequence_entry(stream, true) + parse_flow_sequence_entry(version, stream, true) end -function _parse_flow_sequence_entry(token::FlowSequenceEndToken, stream::EventStream, first_entry=false) - forward!(stream.input) +function _parse_flow_sequence_entry(version::YAMLVersion, token::FlowSequenceEndToken, stream::EventStream, first_entry=false) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) SequenceEndEvent(firstmark(token), lastmark(token)) end -function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry=false) +function _parse_flow_sequence_entry(version::YAMLVersion, token::Any, stream::EventStream, first_entry=false) if !first_entry if token isa FlowEntryToken - forward!(stream.input) + forward!(version, stream.input) else throw(ParserError("while parsing a flow sequence", stream.marks[end], @@ -495,7 +509,7 @@ function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry end end - token = peek(stream.input) + token = peek(version, stream.input) if isa(token, KeyToken) stream.state = parse_flow_sequence_entry_mapping_key MappingStartEvent(firstmark(token), lastmark(token), @@ -504,20 +518,20 @@ function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry nothing else push!(stream.states, parse_flow_sequence_entry) - parse_flow_node(stream) + parse_flow_node(version, stream) end end -function parse_flow_sequence_entry(stream::EventStream, first_entry=false) - token = peek(stream.input) - _parse_flow_sequence_entry(token::Token, stream::EventStream, first_entry) +function parse_flow_sequence_entry(version::YAMLVersion, stream::EventStream, first_entry=false) + token = peek(version, stream.input) + _parse_flow_sequence_entry(version, token::Token, stream::EventStream, first_entry) end -function parse_flow_sequence_entry_mapping_key(stream::EventStream) - token = forward!(stream.input) +function parse_flow_sequence_entry_mapping_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) if !(token isa Union{ValueToken, FlowEntryToken, FlowSequenceEndToken}) push!(stream.states, parse_flow_sequence_entry_mapping_value) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_sequence_entry_mapping_value process_empty_scalar(stream, lastmark(token)) @@ -525,13 +539,13 @@ function parse_flow_sequence_entry_mapping_key(stream::EventStream) end -function parse_flow_sequence_entry_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_flow_sequence_entry_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{FlowEntryToken, FlowSequenceEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{FlowEntryToken, FlowSequenceEndToken}) push!(stream.states, parse_flow_sequence_entry_mapping_end) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_sequence_entry_mapping_end process_empty_scalar(stream, lastmark(token)) @@ -543,26 +557,26 @@ function parse_flow_sequence_entry_mapping_value(stream::EventStream) end -function parse_flow_sequence_entry_mapping_end(stream::EventStream) +function parse_flow_sequence_entry_mapping_end(version::YAMLVersion, stream::EventStream) stream.state = parse_flow_sequence_entry - token = peek(stream.input) + token = peek(version, stream.input) MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_flow_mapping_first_key(stream::EventStream) - token = forward!(stream.input) +function parse_flow_mapping_first_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) push!(stream.marks, firstmark(token)) - parse_flow_mapping_key(stream, true) + parse_flow_mapping_key(version, stream, true) end -function parse_flow_mapping_key(stream::EventStream, first_entry=false) - token = peek(stream.input) +function parse_flow_mapping_key(version::YAMLVersion, stream::EventStream, first_entry=false) + token = peek(version, stream.input) if !(token isa FlowMappingEndToken) if !first_entry if token isa FlowEntryToken - forward!(stream.input) + forward!(version, stream.input) else throw(ParserError("while parsing a flow mapping", stream.marks[end], @@ -571,36 +585,36 @@ function parse_flow_mapping_key(stream::EventStream, first_entry=false) end end - token = peek(stream.input) + token = peek(version, stream.input) if token isa KeyToken - forward!(stream.input) - if !(peek(stream.input) isa Union{ValueToken, FlowEntryToken, FlowMappingEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{ValueToken, FlowEntryToken, FlowMappingEndToken}) push!(stream.states, parse_flow_mapping_value) - return parse_flow_node(stream) + return parse_flow_node(version, stream) else stream.state = parse_flow_mapping_value return process_empty_scalar(stream, lastmark(token)) end elseif !(token isa FlowMappingEndToken) push!(stream.states, parse_flow_mapping_empty_value) - return parse_flow_node(stream) + return parse_flow_node(version, stream) end end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_flow_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_flow_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{FlowEntryToken, FlowMappingEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{FlowEntryToken, FlowMappingEndToken}) push!(stream.states, parse_flow_mapping_key) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_mapping_key process_empty_scalar(stream, lastmark(token)) @@ -612,9 +626,9 @@ function parse_flow_mapping_value(stream::EventStream) end -function parse_flow_mapping_empty_value(stream::EventStream) +function parse_flow_mapping_empty_value(version::YAMLVersion, stream::EventStream) stream.state = parse_flow_mapping_key - process_empty_scalar(stream, firstmark(peek(stream.input))) + process_empty_scalar(stream, firstmark(peek(version, stream.input))) end diff --git a/src/scanner.jl b/src/scanner.jl index f52e9e1..ea57a95 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -274,8 +274,7 @@ function need_more_tokens(stream::TokenStream) end -function peek(stream::TokenStream) - version = YAMLV1_1() +function peek(version::YAMLVersion, stream::TokenStream) while need_more_tokens(stream) fetch_more_tokens(version, stream) end @@ -288,8 +287,7 @@ function peek(stream::TokenStream) end -function forward!(stream::TokenStream) - version = YAMLV1_1() +function forward!(version::YAMLVersion, stream::TokenStream) while need_more_tokens(stream) fetch_more_tokens(version, stream) end