diff --git a/src/YAML.jl b/src/YAML.jl index 31a0274..8b12368 100644 --- a/src/YAML.jl +++ b/src/YAML.jl @@ -30,6 +30,8 @@ using StringEncodings include("versions.jl") include("queue.jl") include("buffered_input.jl") +include("mark.jl") +include("span.jl") include("tokens.jl") include("scanner.jl") include("events.jl") diff --git a/src/buffered_input.jl b/src/buffered_input.jl index dfa7b5a..ef01fd2 100644 --- a/src/buffered_input.jl +++ b/src/buffered_input.jl @@ -15,29 +15,26 @@ mutable struct BufferedInput end end - -# Read and buffer n more characters -function __fill(bi::BufferedInput, bi_input::IO, n::Integer) - for _ in 1:n - c = eof(bi_input) ? '\0' : read(bi_input, Char) - i = bi.offset + bi.avail + 1 +# Read and buffer `n` more characters +function buffer!(bi::BufferedInput, n::Integer)::Nothing + for i in (bi.offset + bi.avail) .+ (1:n) + c = eof(bi.input) ? '\0' : read(bi.input, Char) if i ≤ length(bi.buffer) bi.buffer[i] = c else push!(bi.buffer, c) end - bi.avail += 1 end + bi.avail += n + nothing end -_fill(bi::BufferedInput, n::Integer) = __fill(bi, bi.input, n) - # Peek the character in the i-th position relative to the current position. # (0-based) function peek(bi::BufferedInput, i::Integer=0) i1 = i + 1 if bi.avail < i1 - _fill(bi, i1 - bi.avail) + buffer!(bi, i1 - bi.avail) end bi.buffer[bi.offset + i1] end @@ -45,11 +42,8 @@ end # Return the string formed from the first n characters from the current position # of the stream. -function prefix(bi::BufferedInput, n::Integer=1) - n1 = n + 1 - if bi.avail < n1 - _fill(bi, n1 - bi.avail) - end +function prefix(bi::BufferedInput, n::Integer=1)::String + bi.avail < n && buffer!(bi, n - bi.avail) String(bi.buffer[bi.offset .+ (1:n)]) end @@ -70,6 +64,7 @@ function forward!(bi::BufferedInput, n::Integer=1) n -= 1 end end + nothing end # Ugly hack to allow peeking of `StringDecoder`s diff --git a/src/mark.jl b/src/mark.jl new file mode 100644 index 0000000..d715a74 --- /dev/null +++ b/src/mark.jl @@ -0,0 +1,10 @@ +# Position within the document being parsed +struct Mark + index::UInt64 + line::UInt64 + column::UInt64 +end + +function show(io::IO, mark::Mark) + @printf(io, "line %d, column %d", mark.line, mark.column) +end diff --git a/src/parser.jl b/src/parser.jl index 8274098..2ebf45f 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -42,6 +42,7 @@ end function peek(stream::EventStream) + version = YAMLV1_1() if stream.next_event === nothing if stream.state === nothing return nothing @@ -49,10 +50,10 @@ function peek(stream::EventStream) stream.state = nothing return stream.end_of_stream else - x = stream.state(stream) + x = stream.state(version, stream) #@show x stream.next_event = x - #stream.next_event = stream.state(stream) + #stream.next_event = stream.state(version, stream) end end @@ -61,6 +62,7 @@ end function forward!(stream::EventStream) + version = YAMLV1_1() if stream.next_event === nothing if stream.state === nothing nothing @@ -68,7 +70,7 @@ function forward!(stream::EventStream) stream.state = nothing return stream.end_of_stream else - stream.next_event = stream.state(stream) + stream.next_event = stream.state(version, stream) end end @@ -78,29 +80,41 @@ function forward!(stream::EventStream) end -function process_directives(stream::EventStream) +function process_directives(version::YAMLVersion, stream::EventStream) stream.yaml_version = nothing stream.tag_handles = Dict{String, String}() - while peek(stream.input) isa DirectiveToken - token = forward!(stream.input) + while peek(version, stream.input) isa DirectiveToken + token = forward!(version, stream.input) if token.name == "YAML" if stream.yaml_version !== nothing throw(ParserError(nothing, nothing, "found duplicate YAML directive", - token.start_mark)) + firstmark(token))) end major, minor = token.value if major != 1 throw(ParserError(nothing, nothing, "found incompatible YAML document (version 1.* is required)", - token.start_mark)) + firstmark(token))) + end + # version = + if minor == 0 + @warn "directive YAML 1.0 found but currently, YAML version 1.1 and 1.2 are supported. Fall back to 1.2." + YAMLV1_2() + elseif minor == 1 + YAMLV1_1() + elseif minor == 2 + YAMLV1_2() + else + @warn "directive YAML 1.$minor found but currently, YAML version 1.1 and 1.2 are supported. Fall back to 1.2." + YAMLV1_2() end stream.yaml_version = token.value elseif token.name == "TAG" handle, prefix = token.value if haskey(stream.tag_handles, handle) throw(ParserError(nothing, nothing, - "duplicate tag handle $(handle)", token.start_mark)) + "duplicate tag handle $(handle)", firstmark(token))) end stream.tag_handles[handle] = prefix end @@ -124,25 +138,25 @@ end # Parser state functions -function parse_stream_start(stream::EventStream) - token = forward!(stream.input) :: StreamStartToken - event = StreamStartEvent(token.span.start_mark, token.span.end_mark, +function parse_stream_start(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) :: StreamStartToken + event = StreamStartEvent(firstmark(token), lastmark(token), token.encoding) stream.state = parse_implicit_document_start event end -function parse_implicit_document_start(stream::EventStream) - token = peek(stream.input) +function parse_implicit_document_start(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) # Parse a byte order mark if token isa ByteOrderMarkToken - forward!(stream.input) - token = peek(stream.input) + forward!(version, stream.input) + token = peek(version, stream.input) end if !(token isa Union{DirectiveToken, DocumentStartToken, StreamEndToken}) stream.tag_handles = DEFAULT_TAGS - event = DocumentStartEvent(token.span.start_mark, token.span.start_mark, + event = DocumentStartEvent(firstmark(token), firstmark(token), false) push!(stream.states, parse_document_end) @@ -150,42 +164,42 @@ function parse_implicit_document_start(stream::EventStream) event else - parse_document_start(stream) + parse_document_start(version, stream) end end -function parse_document_start(stream::EventStream) +function parse_document_start(version::YAMLVersion, stream::EventStream) # Parse any extra document end indicators. - while peek(stream.input) isa DocumentEndToken + while peek(version, stream.input) isa DocumentEndToken stream.input = Iterators.rest(stream.input) end - token = peek(stream.input) + token = peek(version, stream.input) # Parse a byte order mark if it exists if token isa ByteOrderMarkToken - forward!(stream.input) - token = peek(stream.input) + forward!(version, stream.input) + token = peek(version, stream.input) end # Parse explicit document. if !(token isa StreamEndToken) - start_mark = token.span.start_mark - version, tags = process_directives(stream) - if !(peek(stream.input) isa DocumentStartToken) + start_mark = firstmark(token) + directive_version, tags = process_directives(version, stream) + if !(peek(version, stream.input) isa DocumentStartToken) throw(ParserError(nothing, nothing, "expected '' but found $(typeof(token))")) end - token = forward!(stream.input) - event = DocumentStartEvent(start_mark, token.span.end_mark, - true, version, tags) + token = forward!(version, stream.input) + event = DocumentStartEvent(start_mark, lastmark(token), + true, directive_version, tags) push!(stream.states, parse_document_end) stream.state = parse_document_content event else # Parse the end of the stream - token = forward!(stream.input) - event = StreamEndEvent(token.span.start_mark, token.span.end_mark) + token = forward!(version, stream.input) + event = StreamEndEvent(firstmark(token), lastmark(token)) @assert isempty(stream.states) @assert isempty(stream.marks) stream.state = nothing @@ -194,16 +208,16 @@ function parse_document_start(stream::EventStream) end -function parse_document_end(stream::EventStream) - token = peek(stream.input) - start_mark = end_mark = token.span.start_mark +function parse_document_end(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) + start_mark = end_mark = firstmark(token) explicit = false if token isa DocumentEndToken - forward!(stream.input) - end_mark = token.span.end_mark + forward!(version, stream.input) + end_mark = lastmark(token) explicit = true - stream.end_of_stream = StreamEndEvent(token.span.start_mark, - token.span.end_mark) + stream.end_of_stream = StreamEndEvent(firstmark(token), + lastmark(token)) end event = DocumentEndEvent(start_mark, end_mark, explicit) stream.state = parse_document_start @@ -211,41 +225,41 @@ function parse_document_end(stream::EventStream) end -function parse_document_content(stream::EventStream) - if peek(stream.input) isa Union{DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken} - event = process_empty_scalar(stream, peek(stream.input).span.start_mark) +function parse_document_content(version::YAMLVersion, stream::EventStream) + if peek(version, stream.input) isa Union{DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken} + event = process_empty_scalar(stream, firstmark(peek(version, stream.input))) stream.state = pop!(stream.states) event else - parse_block_node(stream) + parse_block_node(version, stream) end end -function parse_block_node(stream::EventStream) - parse_node(stream, true) +function parse_block_node(version::YAMLVersion, stream::EventStream) + parse_node(version, stream, true) end -function parse_flow_node(stream::EventStream) - parse_node(stream) +function parse_flow_node(version::YAMLVersion, stream::EventStream) + parse_node(version, stream) end -function parse_block_node_or_indentless_sequence(stream::EventStream) - parse_node(stream, true, true) +function parse_block_node_or_indentless_sequence(version::YAMLVersion, stream::EventStream) + parse_node(version, stream, true, true) end -function _parse_node(token::AliasToken, stream::EventStream, block, indentless_sequence) - forward!(stream.input) +function _parse_node(version::YAMLVersion, token::AliasToken, stream::EventStream, block, indentless_sequence) + forward!(version, stream.input) stream.state = pop!(stream.states) - return AliasEvent(token.span.start_mark, token.span.end_mark, token.value) + return AliasEvent(firstmark(token), lastmark(token), token.value) end -function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - forward!(stream.input) - end_mark = token.span.end_mark +function __parse_node(version::YAMLVersion, token::ScalarToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) + forward!(version, stream.input) + end_mark = lastmark(token) if (token.plain && tag === nothing) || tag == "!" implicit = true, false elseif tag === nothing @@ -258,37 +272,37 @@ function __parse_node(token::ScalarToken, stream::EventStream, block, start_mark token.value, token.style) end -function __parse_node(token::FlowSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - end_mark = token.span.end_mark +function __parse_node(version::YAMLVersion, token::FlowSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) + end_mark = lastmark(token) stream.state = parse_flow_sequence_first_entry SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, true) end -function __parse_node(token::FlowMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) - end_mark = token.span.end_mark +function __parse_node(version::YAMLVersion, token::FlowMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) + end_mark = lastmark(token) stream.state = parse_flow_mapping_first_key MappingStartEvent(start_mark, end_mark, anchor, tag, implicit, true) end -function __parse_node(token::BlockSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::BlockSequenceStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing - end_mark = token.span.start_mark + end_mark = firstmark(token) stream.state = parse_block_sequence_first_entry SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, false) end -function __parse_node(token::BlockMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token::BlockMappingStartToken, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) block || return nothing - end_mark = token.span.start_mark + end_mark = firstmark(token) stream.state = parse_block_mapping_first_key MappingStartEvent(start_mark, end_mark, anchor, tag, implicit, false) end -function __parse_node(token, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) +function __parse_node(version::YAMLVersion, token, stream::EventStream, block, start_mark, end_mark, anchor, tag, implicit) if anchor !== nothing || tag !== nothing stream.state = pop!(stream.states) return ScalarEvent(start_mark, end_mark, anchor, tag, @@ -297,35 +311,35 @@ function __parse_node(token, stream::EventStream, block, start_mark, end_mark, a node = block ? "block" : "flow" throw(ParserError("while parsing a $(node) node", start_mark, "expected the node content, but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end -function _parse_node(token, stream::EventStream, block, indentless_sequence) +function _parse_node(version::YAMLVersion, token, stream::EventStream, block, indentless_sequence) anchor = nothing tag = nothing start_mark = end_mark = tag_mark = nothing if token isa AnchorToken - forward!(stream.input) - start_mark = token.span.start_mark - end_mark = token.span.end_mark + forward!(version, stream.input) + start_mark = firstmark(token) + end_mark = lastmark(token) anchor = token.value - token = peek(stream.input) + token = peek(version, stream.input) if token isa TagToken - forward!(stream.input) - tag_mark = token.span.start_mark - end_mark = token.span.end_mark + forward!(version, stream.input) + tag_mark = firstmark(token) + end_mark = lastmark(token) tag = token.value end elseif token isa TagToken - forward!(stream.input) - start_mark = token.span.start_mark - end_mark = token.span.end_mark + forward!(version, stream.input) + start_mark = firstmark(token) + end_mark = lastmark(token) tag = token.value - token = peek(stream.input) + token = peek(version, stream.input) if token isa AnchorToken - forward!(stream.input) - end_mark = token.end_mark + forward!(version, stream.input) + end_mark = lastmark(token) anchor = token.value end end @@ -344,277 +358,277 @@ function _parse_node(token, stream::EventStream, block, indentless_sequence) end end - token = peek(stream.input) + token = peek(version, stream.input) if start_mark === nothing - start_mark = end_mark = token.span.start_mark + start_mark = end_mark = firstmark(token) end event = nothing implicit = tag === nothing || tag == "!" if indentless_sequence && token isa BlockEntryToken - end_mark = token.span.end_mark + end_mark = lastmark(token) stream.state = parse_indentless_sequence_entry event = SequenceStartEvent(start_mark, end_mark, anchor, tag, implicit, false) else - event = __parse_node(token, stream, block, start_mark, end_mark, anchor, tag, implicit) + event = __parse_node(version, token, stream, block, start_mark, end_mark, anchor, tag, implicit) end event end -function parse_node(stream::EventStream, block=false, indentless_sequence=false) - token = peek(stream.input) - _parse_node(token, stream, block, indentless_sequence) +function parse_node(version::YAMLVersion, stream::EventStream, block=false, indentless_sequence=false) + token = peek(version, stream.input) + _parse_node(version, token, stream, block, indentless_sequence) end -function parse_block_sequence_first_entry(stream::EventStream) - token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) - parse_block_sequence_entry(stream) +function parse_block_sequence_first_entry(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) + push!(stream.marks, firstmark(token)) + parse_block_sequence_entry(version, stream) end -function parse_block_sequence_entry(stream::EventStream) - token = peek(stream.input) +function parse_block_sequence_entry(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa BlockEntryToken - forward!(stream.input) - if !(peek(stream.input) isa Union{BlockEntryToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{BlockEntryToken, BlockEndToken}) push!(stream.states, parse_block_sequence_entry) - return parse_block_node(stream) + return parse_block_node(version, stream) else stream.state = parse_block_sequence_entry - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end if !(token isa BlockEndToken) throw(ParserError("while parsing a block collection", stream.marks[end], "expected , but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end -function parse_indentless_sequence_entry(stream::EventStream) - token = peek(stream.input) +function parse_indentless_sequence_entry(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa BlockEntryToken - forward!(stream.input) - if !(peek(stream.input) isa Union{BlockEntryToken, KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{BlockEntryToken, KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_indentless_sequence_entry) - return parse_block_node(stream) + return parse_block_node(version, stream) else stream.state = parse_indentless_sequence_entry - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end -function parse_block_mapping_first_key(stream::EventStream) - token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) - parse_block_mapping_key(stream) +function parse_block_mapping_first_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) + push!(stream.marks, firstmark(token)) + parse_block_mapping_key(version, stream) end -function parse_block_mapping_key(stream::EventStream) - token = peek(stream.input) +function parse_block_mapping_key(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa KeyToken - forward!(stream.input) - if !(peek(stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_block_mapping_value) - return parse_block_node_or_indentless_sequence(stream) + return parse_block_node_or_indentless_sequence(version, stream) else stream.state = parse_block_mapping_value - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end end if !(token isa BlockEndToken) throw(ParserError("while parsing a block mapping", stream.marks[end], "expected , but found $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_block_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_block_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{KeyToken, ValueToken, BlockEndToken}) push!(stream.states, parse_block_mapping_key) - parse_block_node_or_indentless_sequence(stream) + parse_block_node_or_indentless_sequence(version, stream) else stream.state = parse_block_mapping_key - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_block_mapping_key - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end -function parse_flow_sequence_first_entry(stream::EventStream) - token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) - parse_flow_sequence_entry(stream, true) +function parse_flow_sequence_first_entry(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) + push!(stream.marks, firstmark(token)) + parse_flow_sequence_entry(version, stream, true) end -function _parse_flow_sequence_entry(token::FlowSequenceEndToken, stream::EventStream, first_entry=false) - forward!(stream.input) +function _parse_flow_sequence_entry(version::YAMLVersion, token::FlowSequenceEndToken, stream::EventStream, first_entry=false) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - SequenceEndEvent(token.span.start_mark, token.span.end_mark) + SequenceEndEvent(firstmark(token), lastmark(token)) end -function _parse_flow_sequence_entry(token::Any, stream::EventStream, first_entry=false) +function _parse_flow_sequence_entry(version::YAMLVersion, token::Any, stream::EventStream, first_entry=false) if !first_entry if token isa FlowEntryToken - forward!(stream.input) + forward!(version, stream.input) else throw(ParserError("while parsing a flow sequence", stream.marks[end], "expected ',' or ']', but got $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end - token = peek(stream.input) + token = peek(version, stream.input) if isa(token, KeyToken) stream.state = parse_flow_sequence_entry_mapping_key - MappingStartEvent(token.span.start_mark, token.span.end_mark, + MappingStartEvent(firstmark(token), lastmark(token), nothing, nothing, true, true) elseif isa(token, FlowSequenceEndToken) nothing else push!(stream.states, parse_flow_sequence_entry) - parse_flow_node(stream) + parse_flow_node(version, stream) end end -function parse_flow_sequence_entry(stream::EventStream, first_entry=false) - token = peek(stream.input) - _parse_flow_sequence_entry(token::Token, stream::EventStream, first_entry) +function parse_flow_sequence_entry(version::YAMLVersion, stream::EventStream, first_entry=false) + token = peek(version, stream.input) + _parse_flow_sequence_entry(version, token::Token, stream::EventStream, first_entry) end -function parse_flow_sequence_entry_mapping_key(stream::EventStream) - token = forward!(stream.input) +function parse_flow_sequence_entry_mapping_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) if !(token isa Union{ValueToken, FlowEntryToken, FlowSequenceEndToken}) push!(stream.states, parse_flow_sequence_entry_mapping_value) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_sequence_entry_mapping_value - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end end -function parse_flow_sequence_entry_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_flow_sequence_entry_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{FlowEntryToken, FlowSequenceEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{FlowEntryToken, FlowSequenceEndToken}) push!(stream.states, parse_flow_sequence_entry_mapping_end) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_sequence_entry_mapping_end - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_flow_sequence_entry_mapping_end - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end -function parse_flow_sequence_entry_mapping_end(stream::EventStream) +function parse_flow_sequence_entry_mapping_end(version::YAMLVersion, stream::EventStream) stream.state = parse_flow_sequence_entry - token = peek(stream.input) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + token = peek(version, stream.input) + MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_flow_mapping_first_key(stream::EventStream) - token = forward!(stream.input) - push!(stream.marks, token.span.start_mark) - parse_flow_mapping_key(stream, true) +function parse_flow_mapping_first_key(version::YAMLVersion, stream::EventStream) + token = forward!(version, stream.input) + push!(stream.marks, firstmark(token)) + parse_flow_mapping_key(version, stream, true) end -function parse_flow_mapping_key(stream::EventStream, first_entry=false) - token = peek(stream.input) +function parse_flow_mapping_key(version::YAMLVersion, stream::EventStream, first_entry=false) + token = peek(version, stream.input) if !(token isa FlowMappingEndToken) if !first_entry if token isa FlowEntryToken - forward!(stream.input) + forward!(version, stream.input) else throw(ParserError("while parsing a flow mapping", stream.marks[end], "expected ',' or '}', but got $(typeof(token))", - token.span.start_mark)) + firstmark(token))) end end - token = peek(stream.input) + token = peek(version, stream.input) if token isa KeyToken - forward!(stream.input) - if !(peek(stream.input) isa Union{ValueToken, FlowEntryToken, FlowMappingEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{ValueToken, FlowEntryToken, FlowMappingEndToken}) push!(stream.states, parse_flow_mapping_value) - return parse_flow_node(stream) + return parse_flow_node(version, stream) else stream.state = parse_flow_mapping_value - return process_empty_scalar(stream, token.span.end_mark) + return process_empty_scalar(stream, lastmark(token)) end elseif !(token isa FlowMappingEndToken) push!(stream.states, parse_flow_mapping_empty_value) - return parse_flow_node(stream) + return parse_flow_node(version, stream) end end - forward!(stream.input) + forward!(version, stream.input) pop!(stream.marks) stream.state = pop!(stream.states) - MappingEndEvent(token.span.start_mark, token.span.end_mark) + MappingEndEvent(firstmark(token), lastmark(token)) end -function parse_flow_mapping_value(stream::EventStream) - token = peek(stream.input) +function parse_flow_mapping_value(version::YAMLVersion, stream::EventStream) + token = peek(version, stream.input) if token isa ValueToken - forward!(stream.input) - if !(peek(stream.input) isa Union{FlowEntryToken, FlowMappingEndToken}) + forward!(version, stream.input) + if !(peek(version, stream.input) isa Union{FlowEntryToken, FlowMappingEndToken}) push!(stream.states, parse_flow_mapping_key) - parse_flow_node(stream) + parse_flow_node(version, stream) else stream.state = parse_flow_mapping_key - process_empty_scalar(stream, token.span.end_mark) + process_empty_scalar(stream, lastmark(token)) end else stream.state = parse_flow_mapping_key - process_empty_scalar(stream, token.span.start_mark) + process_empty_scalar(stream, firstmark(token)) end end -function parse_flow_mapping_empty_value(stream::EventStream) +function parse_flow_mapping_empty_value(version::YAMLVersion, stream::EventStream) stream.state = parse_flow_mapping_key - process_empty_scalar(stream, peek(stream.input).span.start_mark) + process_empty_scalar(stream, firstmark(peek(version, stream.input))) end diff --git a/src/scanner.jl b/src/scanner.jl index d999689..ea57a95 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1,5 +1,41 @@ +# YAML 1.1 [22] b-line-feed ::= #xA /*LF*/ +# YAML 1.2 [24] b-line-feed ::= x0A +const b_line_feed = '\n' + +# YAML 1.1 [23] b-carriage-return ::= #xD /*CR*/ +# YAML 1.2 [25] b-carriage-return ::= x0D +const b_carriage_return = '\r' + +# YAML 1.1 [24] b-next-line ::= #x85 /*NEL*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_next_line = '\u85' + +# YAML 1.1 [25] b-line-separator ::= #x2028 /*LS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_line_separator = '\u2028' + +# YAML 1.1 [26] b-paragraph-separator ::= #x2029 /*PS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_paragraph_separator = '\u2029' + # YAML 1.1 [27] b-char ::= b-line-feed | b-carriage-return | b-next-line | b-line-separator | b-paragraph-separator -is_b_char(::YAMLV1_1, c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == '\u2028' || c == '\u2029' +is_b_char(::YAMLV1_1, c::Char) = + c == b_line_feed || + c == b_carriage_return || + c == yaml_1_1_b_next_line || + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator + +# YAML 1.2 [26] b-char ::= b-line-feed | b-carriage-return # x0A x0D +is_b_char(::YAMLV1_2, c::Char) = + c == b_line_feed || + c == b_carriage_return + +# YAML 1.1 [28] b-specific ::= b-line-separator | b-paragraph-separator +# YAML 1.2 don't have this. +is_b_specific(::YAMLV1_1, c::Char) = + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator # YAML 1.2 [31] s-space ::= x20 const yaml_1_2_s_space = ' ' @@ -13,6 +49,8 @@ is_s_white(c::Char) = c == yaml_1_2_s_space || c == yaml_1_2_s_tab # YAML 1.2 [37] ns-ascii-letter ::= [x41-x5A] | [x61-x7A] # A-Z a-z is_ns_ascii_letter(c::Char) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z' +is_whitespace(::YAMLV1_1, c::Char) = c == '\0' || c == ' ' || c == '\t' || is_b_char(YAMLV1_1(), c) + struct SimpleKey token_number::UInt64 required::Bool @@ -144,45 +182,101 @@ function reset!(stream::TokenStream) end -function get_mark(stream::TokenStream) - Mark(stream.index, stream.line, stream.column) +Mark(stream::TokenStream) = Mark(stream.index, stream.line, stream.column) + +# ------------- +# forwardchars! +# ------------- + +# Advance the stream by a chacater and the index. +function forwardchar_skip!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + nothing +end + +# Advance the stream by a chacater and the index and a column. +function forwardchar_nobreak!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column += 1 + nothing +end + +# Advance the stream by a chacater and the index and break a line. +function forwardchar_breakline!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column = 0 + stream.line += 1 + nothing end +# forwardchars!(::YAMLVersion, ::TokenStream, ::Integer=1) +# Advance the stream by `n` characters. -# Advance the stream by k characters. -function forwardchars!(stream::TokenStream, k::Integer=1) - for _ in 1:k +# forwardchars!(::YAMLV1_1, ::TokenStream, ::Integer=1) +# YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed | b-next-line +# YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific +function forwardchars!(version::YAMLV1_1, stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n + # check whether the stream head is `b-ignored-any` c = peek(stream.input) - forward!(stream.input) - stream.index += 1 - if in(c, "\n\u0085\u2028\u2029") || - (c == '\r' && peek(stream.input) == '\n') - stream.column = 0 - stream.line += 1 + # check whether the stream head is `b-carriage-return` + if c == b_carriage_return + # `b-carriage-return` or `b-carriage-return b-line-feed` + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + forwardchar_skip!(stream) + i += 1 + end + # check whether the stream head is `b-ignored-any - b-carriage-return - ( b-carriage-return b-line-feed )` + elseif c == b_line_feed || c == yaml_1_1_b_next_line || is_b_specific(version, c) + forwardchar_breakline!(stream) + i += 1 + # the stream head is not `b-ignored-any` else - stream.column += 1 + forwardchar_nobreak!(stream) + i += 1 end end - stream.index += k - nothing end - -function need_more_tokens(stream::TokenStream) - if stream.done - return false - elseif isempty(stream.token_queue) - return true +# forwardchars!(::YAMLV1_2, ::TokenStream, ::Integer=1) +# YAML 1.2 [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed +function forwardchars!(::YAMLV1_2, stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n + c = peek(stream.input) + if c == b_carriage_return + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + i += 1 + end + elseif c == b_line_feed + forwardchar_breakline!(stream) + i += 1 + else + forwardchar_nobreak!(stream) + i += 1 + end end +end +function need_more_tokens(stream::TokenStream) + stream.done && return false + isempty(stream.token_queue) && return true stale_possible_simple_keys(stream) next_possible_simple_key(stream) == stream.tokens_taken end -function peek(stream::TokenStream) +function peek(version::YAMLVersion, stream::TokenStream) while need_more_tokens(stream) - fetch_more_tokens(stream) + fetch_more_tokens(version, stream) end if !isempty(stream.token_queue) @@ -193,9 +287,9 @@ function peek(stream::TokenStream) end -function forward!(stream::TokenStream) +function forward!(version::YAMLVersion, stream::TokenStream) while need_more_tokens(stream) - fetch_more_tokens(stream) + fetch_more_tokens(version, stream) end if !isempty(stream.token_queue) @@ -208,9 +302,9 @@ end # Read one or more tokens from the input stream. -function fetch_more_tokens(stream::TokenStream) +function fetch_more_tokens(version::YAMLVersion, stream::TokenStream) # Eat whitespace. - scan_to_next_token(stream::TokenStream) + scan_to_next_token(version, stream) # Remove obsolete possible simple keys. stale_possible_simple_keys(stream) @@ -223,46 +317,46 @@ function fetch_more_tokens(stream::TokenStream) if c == '\0' || c === nothing fetch_stream_end(stream) elseif c == '%' && check_directive(stream) - fetch_directive(stream) - elseif c == '-' && check_document_start(stream) - fetch_document_start(stream) - elseif c == '.' && check_document_end(stream) - fetch_document_end(stream) + fetch_directive(version, stream) + elseif c == '-' && check_document_start(version, stream) + fetch_document_start(version, stream) + elseif c == '.' && check_document_end(version, stream) + fetch_document_end(version, stream) stream.done = true elseif c == '[' - fetch_flow_sequence_start(stream) + fetch_flow_sequence_start(version, stream) elseif c == '{' - fetch_flow_mapping_start(stream) + fetch_flow_mapping_start(version, stream) elseif c == ']' - fetch_flow_sequence_end(stream) + fetch_flow_sequence_end(version, stream) elseif c == '}' - fetch_flow_mapping_end(stream) + fetch_flow_mapping_end(version, stream) elseif c == ',' - fetch_flow_entry(stream) - elseif c == '-' && check_block_entry(stream) - fetch_block_entry(stream) - elseif c == '?' && check_key(stream) - fetch_key(stream) - elseif c == ':' && check_value(stream) - fetch_value(stream) + fetch_flow_entry(version, stream) + elseif c == '-' && check_block_entry(version, stream) + fetch_block_entry(version, stream) + elseif c == '?' && check_key(version, stream) + fetch_key(version, stream) + elseif c == ':' && check_value(version, stream) + fetch_value(version, stream) elseif c == '*' - fetch_alias(stream) + fetch_alias(version, stream) elseif c == '&' - fetch_anchor(stream) + fetch_anchor(version, stream) elseif c == '!' - fetch_tag(stream) + fetch_tag(version, stream) elseif c == '|' && stream.flow_level == 0 - fetch_literal(stream) + fetch_literal(version, stream) elseif c == '>' && stream.flow_level == 0 - fetch_folded(stream) + fetch_folded(version, stream) elseif c == '\'' - fetch_single(stream) + fetch_single(version, stream) elseif c == '\"' - fetch_double(stream) + fetch_double(version, stream) elseif c == '\uFEFF' fetch_byte_order_mark(stream) - elseif check_plain(stream) - fetch_plain(stream) + elseif check_plain(version, stream) + fetch_plain(version, stream) else # TODO: Throw a meaningful exception. throw(c) @@ -297,7 +391,7 @@ function stale_possible_simple_keys(stream::TokenStream) if key.mark.line != stream.line || stream.index - key.mark.index > 1024 if key.required throw(ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", get_mark(stream))) + "could not find expected ':'", Mark(stream))) end delete!(stream.possible_simple_keys, level) end @@ -313,7 +407,7 @@ function save_possible_simple_key(stream::TokenStream) if stream.allow_simple_key remove_possible_simple_key(stream) token_number = stream.tokens_taken + length(stream.token_queue) - key = SimpleKey(token_number, required, get_mark(stream)) + key = SimpleKey(token_number, required, Mark(stream)) stream.possible_simple_keys[stream.flow_level] = key end end @@ -325,7 +419,7 @@ function remove_possible_simple_key(stream::TokenStream) key = stream.possible_simple_keys[stream.flow_level] if key.required throw(ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", get_mark(stream))) + "could not find expected ':'", Mark(stream))) end delete!(stream.possible_simple_keys, stream.flow_level) end @@ -341,7 +435,7 @@ function unwind_indent(stream::TokenStream, column) # In block context, we may need to issue the BLOCK-END tokens. while stream.indent > column - mark = get_mark(stream) + mark = Mark(stream) stream.indent = pop!(stream.indents) enqueue!(stream.token_queue, BlockEndToken(Span(mark, mark))) end @@ -363,43 +457,40 @@ end # Checkers # -------- -const whitespace = "\0 \t\r\n\u0085\u2028\u2029" - function check_directive(stream::TokenStream) stream.column == 0 end -function check_document_start(stream::TokenStream) +check_document_start(version::YAMLVersion, stream::TokenStream) = stream.column == 0 && - prefix(stream.input, 3) == "---" && - in(peek(stream.input, 3), whitespace) -end + prefix(stream.input, 3) == "---" && + is_whitespace(version, peek(stream.input, 3)) - function check_document_end(stream::TokenStream) - stream.column == 0 && - prefix(stream.input, 3) == "..." && - (in(peek(stream.input, 3), whitespace) || peek(stream.input, 3) === nothing) - end +check_document_end(version::YAMLVersion, stream::TokenStream) = + stream.column == 0 && + prefix(stream.input, 3) == "..." && begin + c = peek(stream.input, 3) + is_whitespace(version, c) || c === nothing + end -function check_block_entry(stream::TokenStream) - in(peek(stream.input, 1), whitespace) +function check_block_entry(version::YAMLVersion, stream::TokenStream) + is_whitespace(version, peek(stream.input, 1)) end -function check_key(stream::TokenStream) - stream.flow_level > 0 || in(peek(stream.input, 1), whitespace) +function check_key(version::YAMLVersion, stream::TokenStream) + stream.flow_level > 0 || is_whitespace(version, peek(stream.input, 1)) end -function check_value(stream::TokenStream) +function check_value(version::YAMLVersion, stream::TokenStream) cnext = peek(stream.input, 1) - stream.flow_level > 0 || in(cnext, whitespace) || cnext === nothing + stream.flow_level > 0 || is_whitespace(version, cnext) || cnext === nothing end -function check_plain(stream::TokenStream) - !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029-?:,[]{}#&*!|>\'\"%@`\uFEFF") || - (!in(peek(stream.input, 1), whitespace) && - (peek(stream.input) == '-' || (stream.flow_level == 0 && - in(peek(stream.input), "?:")))) +function check_plain(version::YAMLVersion, stream::TokenStream) + c = peek(stream.input) + !(c == '\0' || c == ' ' || c == '\t' || is_b_char(version, c) || in(c, "-?:,[]{}#&*!|>\'\"%@`\uFEFF")) || + (!is_whitespace(version, peek(stream.input, 1)) && (c == '-' || (stream.flow_level == 0 && in(c, "?:")))) end @@ -407,7 +498,7 @@ end # -------- function fetch_stream_start(stream::TokenStream) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, StreamStartToken(Span(mark, mark), string(stream.encoding))) end @@ -422,13 +513,13 @@ function fetch_stream_end(stream::TokenStream) stream.allow_simple_key = false empty!(stream.possible_simple_keys) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, StreamEndToken(Span(mark, mark))) stream.done = true end -function fetch_directive(stream::TokenStream) +function fetch_directive(version::YAMLVersion, stream::TokenStream) # Set the current intendation to -1. unwind_indent(stream, -1) @@ -436,21 +527,21 @@ function fetch_directive(stream::TokenStream) remove_possible_simple_key(stream) stream.allow_simple_key = false - enqueue!(stream.token_queue, scan_directive(stream)) + enqueue!(stream.token_queue, scan_directive(version, stream)) end -function fetch_document_start(stream::TokenStream) - fetch_document_indicator(stream, DocumentStartToken) +function fetch_document_start(version::YAMLVersion, stream::TokenStream) + fetch_document_indicator(version, stream, DocumentStartToken) end -function fetch_document_end(stream::TokenStream) - fetch_document_indicator(stream, DocumentEndToken) +function fetch_document_end(version::YAMLVersion, stream::TokenStream) + fetch_document_indicator(version, stream, DocumentEndToken) end -function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_document_indicator(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # Set the current intendation to -1. unwind_indent(stream, -1) @@ -460,9 +551,9 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke stream.allow_simple_key = false # Add DOCUMENT-START or DOCUMENT-END. - start_mark = get_mark(stream) - forwardchars!(stream, 3) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream, 3) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -471,25 +562,25 @@ function fetch_byte_order_mark(stream::TokenStream) # Set the current intendation to -1. unwind_indent(stream, -1) - start_mark = get_mark(stream) + start_mark = Mark(stream) forward!(stream.input) stream.index += 1 - end_mark = get_mark(stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, ByteOrderMarkToken(Span(start_mark, end_mark))) end -function fetch_flow_sequence_start(stream::TokenStream) - fetch_flow_collection_start(stream, FlowSequenceStartToken) +function fetch_flow_sequence_start(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_start(version, stream, FlowSequenceStartToken) end -function fetch_flow_mapping_start(stream::TokenStream) - fetch_flow_collection_start(stream, FlowMappingStartToken) +function fetch_flow_mapping_start(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_start(version, stream, FlowMappingStartToken) end -function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_flow_collection_start(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # '[' and '{' may start a simple key. save_possible_simple_key(stream) @@ -501,24 +592,24 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end -function fetch_flow_sequence_end(stream::TokenStream) - fetch_flow_collection_end(stream, FlowSequenceEndToken) +function fetch_flow_sequence_end(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_end(version, stream, FlowSequenceEndToken) end -function fetch_flow_mapping_end(stream::TokenStream) - fetch_flow_collection_end(stream, FlowMappingEndToken) +function fetch_flow_mapping_end(version::YAMLVersion, stream::TokenStream) + fetch_flow_collection_end(version, stream, FlowMappingEndToken) end -function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Token} +function fetch_flow_collection_end(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} # Reset possible simple key on the current level. remove_possible_simple_key(stream) @@ -529,14 +620,14 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok stream.allow_simple_key = false # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end -function fetch_flow_entry(stream::TokenStream) +function fetch_flow_entry(version::YAMLVersion, stream::TokenStream) # Simple keys are allowed after ','. stream.allow_simple_key = true @@ -544,25 +635,25 @@ function fetch_flow_entry(stream::TokenStream) remove_possible_simple_key(stream) # Add FLOW-ENTRY. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end -function fetch_block_entry(stream::TokenStream) +function fetch_block_entry(version::YAMLVersion, stream::TokenStream) # Block context needs additional checks. if stream.flow_level == 0 # Are we allowed to start a new entry? if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "sequence entries not allowed here", - get_mark(stream))) + Mark(stream))) end if add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockSequenceStartToken(Span(mark, mark))) end @@ -580,26 +671,26 @@ function fetch_block_entry(stream::TokenStream) remove_possible_simple_key(stream) # Add BLOCK-ENTRY. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) end -function fetch_key(stream::TokenStream) +function fetch_key(version::YAMLVersion, stream::TokenStream) if stream.flow_level == 0 # Are we allowed to start a key (not nessesary a simple)? if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "mapping keys are not allowed here", - get_mark(stream))) + Mark(stream))) end # We may need to add BLOCK-MAPPING-START. if add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockMappingStartToken(Span(mark, mark))) end @@ -612,14 +703,14 @@ function fetch_key(stream::TokenStream) remove_possible_simple_key(stream) # Add KEY. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end -function fetch_value(stream::TokenStream) +function fetch_value(version::YAMLVersion, stream::TokenStream) # Simple key if haskey(stream.possible_simple_keys, stream.flow_level) # Add KEY. @@ -650,7 +741,7 @@ function fetch_value(stream::TokenStream) if !stream.allow_simple_key throw(ScannerError(nothing, nothing, "mapping values are not allowed here", - get_mark(stream))) + Mark(stream))) end end @@ -658,7 +749,7 @@ function fetch_value(stream::TokenStream) # BLOCK-MAPPING-START. It will be detected as an error later by # the parser. if stream.flow_level == 0 && add_indent(stream, stream.column) - mark = get_mark(stream) + mark = Mark(stream) enqueue!(stream.token_queue, BlockMappingStartToken(Span(mark, mark))) end @@ -671,14 +762,14 @@ function fetch_value(stream::TokenStream) end # Add VALUE. - start_mark = get_mark(stream) - forwardchars!(stream) - end_mark = get_mark(stream) + start_mark = Mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end -function fetch_alias(stream::TokenStream) +function fetch_alias(version::YAMLVersion, stream::TokenStream) # ALIAS could be a simple key. save_possible_simple_key(stream) @@ -686,11 +777,11 @@ function fetch_alias(stream::TokenStream) stream.allow_simple_key = false # Scan and add ALIAS. - enqueue!(stream.token_queue, scan_anchor(stream, AliasToken)) + enqueue!(stream.token_queue, scan_anchor(version, stream, AliasToken)) end -function fetch_anchor(stream::TokenStream) +function fetch_anchor(version::YAMLVersion, stream::TokenStream) # ANCHOR could start a simple key. save_possible_simple_key(stream) @@ -698,11 +789,11 @@ function fetch_anchor(stream::TokenStream) stream.allow_simple_key = false # Scan and add ANCHOR. - enqueue!(stream.token_queue, scan_anchor(stream, AnchorToken)) + enqueue!(stream.token_queue, scan_anchor(version, stream, AnchorToken)) end -function fetch_tag(stream::TokenStream) +function fetch_tag(version::YAMLVersion, stream::TokenStream) # TAG could start a simple key. save_possible_simple_key(stream) @@ -710,21 +801,21 @@ function fetch_tag(stream::TokenStream) stream.allow_simple_key = false # Scan and add TAG. - enqueue!(stream.token_queue, scan_tag(stream)) + enqueue!(stream.token_queue, scan_tag(version, stream)) end -function fetch_literal(stream::TokenStream) - fetch_block_scalar(stream, '|') +function fetch_literal(version::YAMLVersion, stream::TokenStream) + fetch_block_scalar(version, stream, '|') end -function fetch_folded(stream::TokenStream) - fetch_block_scalar(stream, '>') +function fetch_folded(version::YAMLVersion, stream::TokenStream) + fetch_block_scalar(version, stream, '>') end -function fetch_block_scalar(stream::TokenStream, style::Char) +function fetch_block_scalar(version::YAMLVersion, stream::TokenStream, style::Char) # A simple key may follow a block scalar. stream.allow_simple_key = true @@ -732,21 +823,21 @@ function fetch_block_scalar(stream::TokenStream, style::Char) remove_possible_simple_key(stream) # Scan and add SCALAR. - enqueue!(stream.token_queue, scan_block_scalar(stream, style)) + enqueue!(stream.token_queue, scan_block_scalar(version, stream, style)) end -function fetch_single(stream::TokenStream) - fetch_flow_scalar(stream, '\'') +function fetch_single(version::YAMLVersion, stream::TokenStream) + fetch_flow_scalar(version, stream, '\'') end -function fetch_double(stream::TokenStream) - fetch_flow_scalar(stream, '"') +function fetch_double(version::YAMLVersion, stream::TokenStream) + fetch_flow_scalar(version, stream, '"') end -function fetch_flow_scalar(stream::TokenStream, style::Char) +function fetch_flow_scalar(version::YAMLVersion, stream::TokenStream, style::Char) # A flow scalar could be a simple key. save_possible_simple_key(stream) @@ -754,14 +845,14 @@ function fetch_flow_scalar(stream::TokenStream, style::Char) stream.allow_simple_key = false # Scan and add SCALAR. - enqueue!(stream.token_queue, scan_flow_scalar(stream, style)) + enqueue!(stream.token_queue, scan_flow_scalar(version, stream, style)) end -function fetch_plain(stream::TokenStream) +function fetch_plain(version::YAMLVersion, stream::TokenStream) save_possible_simple_key(stream) stream.allow_simple_key = false - enqueue!(stream.token_queue, scan_plain(stream)) + enqueue!(stream.token_queue, scan_plain(version, stream)) end @@ -770,48 +861,93 @@ end # If the stream is at a line break, advance past it. # -# Returns: -# '\r\n' : '\n' -# '\r' : '\n' -# '\n' : '\n' -# '\x85' : '\n' -# '\u2028' : '\u2028' -# '\u2029 : '\u2029' -# default : '' +# YAML 1.1 +# +# [22] b-line-feed ::= #xA /*LF*/ +# [23] b-carriage-return ::= #xD /*CR*/ +# [24] b-next-line ::= #x85 /*NEL*/ +# [25] b-line-separator ::= #x2028 /*LS*/ +# [26] b-paragraph-separator ::= #x2029 /*PS*/ +# [28] b-specific ::= b-line-separator | b-paragraph-separator +# [29] b-generic ::= ( b-carriage-return b-line-feed) | b-carriage-return | b-line-feed | b-next-line +# [30] b-as-line-feed ::= b-generic +# [31] b-normalized ::= b-as-line-feed | b-specific +# +# U+000D U+000A → U+000A +# U+000D → U+000A +# U+000A → U+000A +# U+0085 → U+000A +# U+2028 → U+2028 +# U+2029 → U+2029 +# otherwise → (empty) # -function scan_line_break(stream::TokenStream) - if in(peek(stream.input), "\r\n\u0085") - if prefix(stream.input, 2) == "\r\n" - forwardchars!(stream, 2) +function scan_line_break(version::YAMLV1_1, stream::TokenStream)::String + c = peek(stream.input) + if c == '\u000d' + if peek(stream.input, 1) == '\u000a' + forwardchars!(version, stream, 2) else - forwardchars!(stream) + forwardchars!(version, stream) end - return "\n" - elseif in(peek(stream.input), "\u2028\u2029") - ch = peek(stream.input) - forwardchars!(stream) - return ch + "\u000a" + elseif c == '\u000a' || c == '\u0085' + forwardchars!(version, stream) + "\u000a" + elseif is_b_specific(version, c) + forwardchars!(version, stream) + string(c) + else + "" + end +end +# +# YAML 1.2 +# +# [24] b-line-feed ::= x0A +# [25] b-carriage-return ::= x0D +# [26] b-char ::= b-line-feed | b-carriage-return +# [27] nb-char ::= c-printable - b-char - c-byte-order-mark +# [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed +# +# U+000D U+000A → U+000A +# U+000D → U+000A +# U+000A → U+000A +# otherwise → (empty) +# +function scan_line_break(version::YAMLV1_2, stream::TokenStream)::String + c = peek(stream.input) + if c == '\u000d' + if peek(stream.input, 1) == '\u000a' + forwardchars!(version, stream, 2) + else + forwardchars!(version, stream) + end + "\u000a" + elseif c == '\u000a' + forwardchars!(version, stream) + "\u000a" + else + "" end - return "" end - # Scan past whitespace to the next token. -function scan_to_next_token(stream::TokenStream) +function scan_to_next_token(version::YAMLVersion, stream::TokenStream) while true # whitespace while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end # comment if peek(stream.input) == '#' - forwardchars!(stream) - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + while true + forwardchars!(version, stream) + c = peek(stream.input) + (c == '\0' || is_b_char(version, c)) && break end end # line break - if scan_line_break(stream) != "" + if scan_line_break(version, stream) != "" if stream.flow_level == 0 stream.allow_simple_key = true end @@ -823,86 +959,94 @@ function scan_to_next_token(stream::TokenStream) end -function scan_directive(stream::TokenStream) - start_mark = get_mark(stream) - forwardchars!(stream) - name = scan_directive_name(stream, start_mark) +function scan_directive(version::YAMLVersion, stream::TokenStream) + start_mark = Mark(stream) + forwardchars!(version, stream) + name = scan_directive_name(version, stream, start_mark) value = nothing if name == "YAML" - value = scan_yaml_directive_value(stream, start_mark) - end_mark = get_mark(stream) + value = scan_yaml_directive_value(version, stream, start_mark) + end_mark = Mark(stream) elseif name == "TAG" - tag_handle = scan_tag_directive_handle(stream, start_mark) - tag_prefix = scan_tag_directive_prefix(stream, start_mark) + tag_handle = scan_tag_directive_handle(version, stream, start_mark) + tag_prefix = scan_tag_directive_prefix(version, stream, start_mark) value = (tag_handle, tag_prefix) - end_mark = get_mark(stream) + end_mark = Mark(stream) else # Otherwise we warn and ignore the directive. - end_mark = get_mark(stream) + end_mark = Mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + while + begin + c = peek(stream.input) + !(c == '\0' || is_b_char(version, c)) + end + forwardchars!(version, stream) end end - scan_directive_ignored_line(stream, start_mark) + scan_directive_ignored_line(version, stream, start_mark) DirectiveToken(Span(start_mark, end_mark), name, value) end -function scan_directive_name(stream::TokenStream, start_mark::Mark) +function scan_directive_name(version::YAMLVersion, stream::TokenStream, start_mark::Mark) length = 0 - c = peek(stream.input) - while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' - length += 1 + while begin c = peek(stream.input, length) + is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' end - - if length == 0 - throw(ScannerError("while scanning a directive", start_mark, - "expected alphanumeric character, but found '$(c)'", - get_mark(stream))) + length += 1 end + length == 0 && throw(ScannerError( + "while scanning a directive", start_mark, + "expected alphanumeric character, but found '$c'", Mark(stream), + )) + value = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) - if !in(c, ":\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected alphanumeric character, but found '$(c)'", - get_mark(stream))) - end + c == ':' || c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected alphanumeric character, but found '$c'", Mark(stream), + )) value end -function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) - while peek(stream.input) == ' ' || peek(stream.input) == ':' - forwardchars!(stream) - end - - major = scan_yaml_directive_number(stream, start_mark) - if peek(stream.input) != '.' - throw(ScannerError("while scanning a directive", start_mark, - "expected '.' but found '$(peek(stream.input))'", - get_mark(stream))) +function scan_yaml_directive_value(version::YAMLVersion, stream::TokenStream, start_mark::Mark) + while begin + c = peek(stream.input) + c == ' ' || c == ':' end - forwardchars!(stream) - minor = scan_yaml_directive_number(stream, start_mark) - if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ' or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) + forwardchars!(version, stream) end - return (major, minor) + + major = scan_yaml_directive_number(version, stream, start_mark) + c = peek(stream.input) + c == '.' || throw(ScannerError( + "while scanning a directive", start_mark, + "expected '.' but found '$c'", Mark(stream), + )) + forwardchars!(version, stream) + + minor = scan_yaml_directive_number(version, stream, start_mark) + c = peek(stream.input) + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ' or a line break, but found '$c'", Mark(stream), + )) + + major, minor end # scan the YAML directive's number from a stream -function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int +function scan_yaml_directive_number(version::YAMLVersion, stream::TokenStream, start_mark::Mark)::Int # ------------------------------------------------- # check that the first character is a decimal digit # ------------------------------------------------- @@ -913,7 +1057,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # throw an error if the input is not decimal digits isdigit(c) || throw(ScannerError( "while scanning a directive", start_mark, - "expected a digit, but found '$c'", get_mark(stream), + "expected a digit, but found '$c'", Mark(stream), )) # ----------------------------------------------------------- # until the end of the decimal digits, increment the position @@ -933,7 +1077,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # --------------------------------------------------- # advance the stream by the length that has been read # --------------------------------------------------- - forwardchars!(stream, pos) + forwardchars!(version, stream, pos) # ----------------- # return the number # ----------------- @@ -941,109 +1085,109 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int end -function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) +function scan_tag_directive_handle(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end - value = scan_tag_handle(stream, "directive", start_mark) - if peek(stream.input) != ' ' - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ', but found '$(peek(stream.input))'", - get_mark(stream))) - end + value = scan_tag_handle(version, stream, "directive", start_mark) + + c = peek(stream.input) + c == ' ' || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ', but found '$c'", Mark(stream), + )) value end -function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) +function scan_tag_directive_prefix(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end + + value = scan_tag_uri(version, stream, "directive", start_mark) - value = scan_tag_uri(stream, "directive", start_mark) - if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected ' ', but found $(peek(stream.input))", - get_mark(stream))) - end + c = peek(stream.input) + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected ' ', but found $c", Mark(stream), + )) value end -function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) +function scan_directive_ignored_line(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end if peek(stream.input) == '#' - forwardchars!(stream) - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + forwardchars!(version, stream) + while begin + c = peek(stream.input) + !(c == '\0' || is_b_char(version, c)) + end + forwardchars!(version, stream) end end - if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) - end - scan_line_break(stream) + c = peek(stream.input) + c == '\0' || is_b_char(version, c) || throw(ScannerError( + "while scanning a directive", start_mark, + "expected a comment or a line break, but found '$c'", Mark(stream), + )) + scan_line_break(version, stream) end -function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} - start_mark = get_mark(stream) +function scan_anchor(version::YAMLVersion, stream::TokenStream, ::Type{T}) where {T<:Token} + start_mark = Mark(stream) indicator = peek(stream.input) - if indicator == '*' - name = "alias" - else - name = "anchor" - end - forwardchars!(stream) + name = indicator == '*' ? "alias" : "anchor" + forwardchars!(version, stream) length = 0 - c = peek(stream.input) - while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' - length += 1 + while begin c = peek(stream.input, length) + is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' end - - if length == 0 - throw(ScannerError("while scanning an $(name)", start_mark, - "expected an alphanumeric character, but found '$(peek(stream.input))'", - get_mark(stream))) + length += 1 end + + length == 0 && throw(ScannerError( + "while scanning an $name", start_mark, + "expected an alphanumeric character, but found '$(peek(stream.input))'", Mark(stream), + )) value = prefix(stream.input, length) - forwardchars!(stream, length) - if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") - throw(ScannerError("while scanning an $(name)", start_mark, - "expected an alphanumeric character, but found '$(peek(stream.input))'", - get_mark(stream))) - end - end_mark = get_mark(stream) + forwardchars!(version, stream, length) + c = peek(stream.input) + is_whitespace(version, c) || in(c, "?:,]}%@`") || throw(ScannerError( + "while scanning an $name", start_mark, + "expected an alphanumeric character, but found '$c'", Mark(stream), + )) + end_mark = Mark(stream) T(Span(start_mark, end_mark), value) end -function scan_tag(stream::TokenStream) - start_mark = get_mark(stream) +function scan_tag(version::YAMLVersion, stream::TokenStream) + start_mark = Mark(stream) c = peek(stream.input, 1) if c == '<' handle = nothing - forwardchars!(stream, 2) - suffix = scan_tag_uri(stream, "tag", start_mark) - if peek(stream.input) != '>' - throw(ScannerError("while parsing a tag", start_mark, - "expected '>', but found '$(peek(stream.input))'", - get_mark(stream))) - end - forwardchars!(stream) - elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") + forwardchars!(version, stream, 2) + suffix = scan_tag_uri(version, stream, "tag", start_mark) + peek(stream.input) == '>' || throw(ScannerError( + "while parsing a tag", start_mark, + "expected '>', but found '$(peek(stream.input))'", Mark(stream), + )) + forwardchars!(version, stream) + elseif is_whitespace(version, c) handle = nothing suffix = '!' - forwardchars!(stream) + forwardchars!(version, stream) else length = 1 use_handle = false - while !in(c, "\0 \r\n\u0085\u2028\u2029") + while !(c == '\0' || is_b_char(version, c)) if c == '!' use_handle = true break @@ -1052,46 +1196,45 @@ function scan_tag(stream::TokenStream) c = peek(stream.input, length) end if use_handle - handle = scan_tag_handle(stream, "tag", start_mark) + handle = scan_tag_handle(version, stream, "tag", start_mark) else handle = "!" - forwardchars!(stream) + forwardchars!(version, stream) end - suffix = scan_tag_uri(stream, "tag", start_mark) + suffix = scan_tag_uri(version, stream, "tag", start_mark) end c = peek(stream.input) - if !in(c, "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a tag", start_mark, - "expected ' ' or a line break, but found '$(c)'", - get_mark(stream))) - end + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a tag", start_mark, + "expected ' ' or a line break, but found '$c'", Mark(stream), + )) value = (handle, suffix) - end_mark = get_mark(stream) + end_mark = Mark(stream) TagToken(Span(start_mark, end_mark), value) end -function scan_block_scalar(stream::TokenStream, style::Char) +function scan_block_scalar(version::YAMLVersion, stream::TokenStream, style::Char) folded = style == '>' chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) # Scan the header. - forwardchars!(stream) - chomping, increment = scan_block_scalar_indicators(stream, start_mark) - scan_block_scalar_ignored_line(stream, start_mark) + forwardchars!(version, stream) + chomping, increment = scan_block_scalar_indicators(version, stream, start_mark) + scan_block_scalar_ignored_line(version, stream, start_mark) # Determine the indentation level and go to the first non-empty line. min_indent = max(1, stream.indent + 1) if increment === nothing - breaks, max_indent, end_mark = scan_block_scalar_indentation(stream) + breaks, max_indent, end_mark = scan_block_scalar_indentation(version, stream) indent = max(min_indent, max_indent) else indent = min_indent + increment - 1 - breaks, end_mark = scan_block_scalar_breaks(stream, indent) + breaks, end_mark = scan_block_scalar_breaks(version, stream, indent) end line_break = "" @@ -1100,13 +1243,16 @@ function scan_block_scalar(stream::TokenStream, style::Char) append!(chunks, breaks) leading_non_space = !is_s_white(peek(stream.input)) length = 0 - while !in(peek(stream.input, length), "\0\r\n\u0085\u2028\u2029") + while begin + c = peek(stream.input, length) + !(c == '\0' || is_b_char(version, c)) + end length += 1 end push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) - line_break = scan_line_break(stream) - breaks, end_mark = scan_block_scalar_breaks(stream, indent) + forwardchars!(version, stream, length) + line_break = scan_line_break(version, stream) + breaks, end_mark = scan_block_scalar_breaks(version, stream, indent) if stream.column == indent && peek(stream.input) != '\0' if folded && line_break == "\n" && leading_non_space && !is_s_white(peek(stream.input)) @@ -1134,83 +1280,84 @@ function scan_block_scalar(stream::TokenStream, style::Char) end -function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) +function scan_block_scalar_ignored_line(version::YAMLVersion, stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end - if peek(stream.input) == '#' - while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + c = peek(stream.input) + if c == '#' + while !(c == '\0' || is_b_char(version, c)) + forwardchars!(version, stream) + c = peek(stream.input) end end - if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a block scalal", start_mark, - "expected a comment or a line break, but found '$(peek(stream.input))'", - get_mark(stream))) - end + c == '\0' || is_b_char(version, c) || throw(ScannerError( + "while scanning a block scalal", start_mark, + "expected a comment or a line break, but found '$c'", Mark(stream), + )) - scan_line_break(stream) + scan_line_break(version, stream) end -function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) +function scan_block_scalar_indicators(version::YAMLVersion, stream::TokenStream, start_mark::Mark) chomping = nothing increment = nothing c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - forwardchars!(stream) + forwardchars!(version, stream) c = peek(stream.input) - if in(c, "0123456789") + if isdigit(c) + c == '0' && throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", Mark(stream), + )) increment = parse(Int, string(c)) - if increment == 0 - throw(ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - get_mark(stream))) - end end - elseif in(c, "0123456789") + elseif isdigit(c) + c == '0' && throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", Mark(stream), + )) increment = parse(Int, string(c)) - if increment == 0 - throw(ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - get_mark(stream))) - end - forwardchars!(stream) + forwardchars!(version, stream) c = peek(stream.input) if c == '+' || c == '-' - comping = c == '+' - forwardchars!(stream) + chomping = c == '+' + forwardchars!(version, stream) end end c = peek(stream.input) - if !in(c, "\0 \r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found '$(c)'", - get_mark(stream))) - end + c == '\0' || c == ' ' || is_b_char(version, c) || throw(ScannerError( + "while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found '$c'", Mark(stream), + )) chomping, increment end -function scan_block_scalar_indentation(stream::TokenStream) - chunks = Any[] +function scan_block_scalar_indentation(version::YAMLVersion, stream::TokenStream)::Tuple{Vector{String}, Integer, Mark} + chunks = String[] max_indent = 0 - end_mark = get_mark(stream) - while in(peek(stream.input), " \r\n\u0085\u2028\u2029") - if peek(stream.input) != ' ' - push!(chunks, scan_line_break(stream)) - end_mark = get_mark(stream) - else - forwardchars!(stream) + end_mark = Mark(stream) + while true + c = peek(stream.input) + if is_b_char(version, c) + push!(chunks, scan_line_break(version, stream)) + end_mark = Mark(stream) + elseif c == ' ' + forwardchars!(version, stream) if stream.column > max_indent max_indent = stream.column end + else + break end end @@ -1218,18 +1365,18 @@ function scan_block_scalar_indentation(stream::TokenStream) end -function scan_block_scalar_breaks(stream::TokenStream, indent) +function scan_block_scalar_breaks(version::YAMLVersion, stream::TokenStream, indent) chunks = Any[] - end_mark = get_mark(stream) + end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end - while is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, scan_line_break(stream)) - end_mark = get_mark(stream) + while is_b_char(version, peek(stream.input)) + push!(chunks, scan_line_break(version, stream)) + end_mark = Mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) end end @@ -1237,25 +1384,25 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) end -function scan_flow_scalar(stream::TokenStream, style::Char) +function scan_flow_scalar(version::YAMLVersion, stream::TokenStream, style::Char) double = style == '"' chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) q = peek(stream.input) # quote - forwardchars!(stream) + forwardchars!(version, stream) while peek(stream.input) != q || peek(stream.input, 1) == q - append!(chunks, scan_flow_scalar_spaces(stream, double, start_mark)) - append!(chunks, scan_flow_scalar_non_spaces(stream, double, start_mark)) + append!(chunks, scan_flow_scalar_spaces(version, stream, double, start_mark)) + append!(chunks, scan_flow_scalar_non_spaces(version, stream, double, start_mark)) end - forwardchars!(stream) - end_mark = get_mark(stream) + forwardchars!(version, stream) + end_mark = Mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end -const ESCAPE_REPLACEMENTS = Dict{Char,Char}( +const ESCAPE_REPLACEMENTS = Dict{Char, Char}( '0' => '\0', 'a' => '\u0007', 'b' => '\u0008', @@ -1283,23 +1430,27 @@ const ESCAPE_CODES = Dict{Char, Int}( ) -function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, - start_mark::Mark) +function scan_flow_scalar_non_spaces( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +) chunks = Any[] while true length = 0 - while !in(peek(stream.input, length), "\'\"\\\0 \t\r\n\u0085\u2028\u2029") + c = peek(stream.input, length) + while !(in(c, "\'\"\\") || is_whitespace(version, c)) length += 1 + c = peek(stream.input, length) end if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(version, stream, length) end c = peek(stream.input) if !double && c == '\'' && peek(stream.input, 1) == '\'' push!(chunks, '\'') - forwardchars!(stream, 2) + forwardchars!(version, stream, 2) elseif (double && c == '\'') || (!double && in(c, "\"\\")) push!(chunks, c) forward!(stream.input) @@ -1320,19 +1471,19 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, string("expected escape sequence of", " $(length) hexadecimal", "digits, but found '$(c)'"), - get_mark(stream))) + Mark(stream))) end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) - forwardchars!(stream, length) - elseif is_b_char(YAMLV1_1(), c) - scan_line_break(stream) - append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) + forwardchars!(version, stream, length) + elseif is_b_char(version, c) + scan_line_break(version, stream) + append!(chunks, scan_flow_scalar_breaks(version, stream, double, start_mark)) else - throw(ScannerError("while scanning a double-quoted scalar", - start_mark, - "found unknown escape character '$(c)'", - get_mark(stream))) + throw(ScannerError( + "while scanning a double-quoted scalar", start_mark, + "found unknown escape character '$c'", Mark(stream)), + ) end else return chunks @@ -1341,27 +1492,31 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end -function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, - start_mark::Mark) +function scan_flow_scalar_spaces( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +) chunks = Any[] length = 0 while is_s_white(peek(stream.input, length)) length += 1 end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) if c == '\0' - throw(ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", get_mark(stream))) - elseif is_b_char(YAMLV1_1(), c) - line_break = scan_line_break(stream) - breaks = scan_flow_scalar_breaks(stream, double, start_mark) + throw(ScannerError( + "while scanning a quoted scalar", start_mark, + "found unexpected end of stream", Mark(stream), + )) + elseif is_b_char(version, c) + line_break = scan_line_break(version, stream) + breaks = scan_flow_scalar_breaks(version, stream, double, start_mark) if line_break != '\n' push!(chunks, line_break) else isempty(breaks) - push!(chunks, ' ') + push!(chunks, " ") end append!(chunks, breaks) else @@ -1372,39 +1527,42 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, end -function scan_flow_scalar_breaks(stream::TokenStream, double::Bool, - start_mark::Mark) - chunks = Any[] +function scan_flow_scalar_breaks( + version::YAMLVersion, stream::TokenStream, + double::Bool, start_mark::Mark, +)::Vector{String} + chunks = String[] while true pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") - throw(ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document seperator", - get_mark(stream))) + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) + throw(ScannerError( + "while scanning a quoted scalar", start_mark, + "found unexpected document seperator", Mark(stream)), + ) end while is_s_white(peek(stream.input)) forward!(stream.input) end - if is_b_char(YAMLV1_1(), peek(stream.input)) - push!(chunks, scan_line_break(stream)) + if is_b_char(version, peek(stream.input)) + push!(chunks, scan_line_break(version, stream)) else - return chunks + break end end + chunks end -function scan_plain(stream::TokenStream) +function scan_plain(version::YAMLVersion, stream::TokenStream) # See the specification for details. # We add an additional restriction for the flow context: # plain scalars in the flow context cannot contain ',', ':' and '?'. # We also keep track of the `allow_simple_key` flag here. # Indentation rules are loosed for the flow context. chunks = Any[] - start_mark = get_mark(stream) + start_mark = Mark(stream) end_mark = start_mark indent = stream.indent + 1 @@ -1422,10 +1580,10 @@ function scan_plain(stream::TokenStream) while true c = peek(stream.input, length) cnext = peek(stream.input, length + 1) - if in(c, whitespace) || + if is_whitespace(version, c) || c === nothing || (stream.flow_level == 0 && c == ':' && - (cnext === nothing || in(cnext, whitespace))) || + (cnext === nothing || is_whitespace(version, cnext))) || (stream.flow_level != 0 && in(c, ",:?[]{}")) break end @@ -1434,11 +1592,13 @@ function scan_plain(stream::TokenStream) # It's not clear what we should do with ':' in the flow context. c = peek(stream.input) - if stream.flow_level != 0 && c == ':' && - !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") - forwardchars!(stream, length) + if stream.flow_level != 0 && c == ':' && begin + cnext = peek(stream.input, length + 1) + !(is_whitespace(version, cnext) || in(cnext, ",[]{}")) + end + forwardchars!(version, stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, - "found unexpected ':'", get_mark(stream))) + "found unexpected ':'", Mark(stream))) end if length == 0 @@ -1448,9 +1608,9 @@ function scan_plain(stream::TokenStream) stream.allow_simple_key = true append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) - end_mark = get_mark(stream) - spaces = scan_plain_spaces(stream, indent, start_mark) + forwardchars!(version, stream, length) + end_mark = Mark(stream) + spaces = scan_plain_spaces(version, stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || (stream.flow_level == 0 && stream.column < indent) break @@ -1461,8 +1621,10 @@ function scan_plain(stream::TokenStream) end -function scan_plain_spaces(stream::TokenStream, indent::Integer, - start_mark::Mark) +function scan_plain_spaces( + version::YAMLVersion, stream::TokenStream, + indent::Integer, start_mark::Mark, +) chunks = Any[] length = 0 while peek(stream.input, length) == ' ' @@ -1470,32 +1632,33 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(version, stream, length) c = peek(stream.input) - if is_b_char(YAMLV1_1(), c) - line_break = scan_line_break(stream) + if is_b_char(version, c) + line_break = scan_line_break(version, stream) stream.allow_simple_key = true if peek(stream.input) == '\uFEFF' return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) return Any[] end breaks = Any[] - while in(peek(stream.input), " \r\n\u0085\u2028\u2029") + while begin + c = peek(stream.input) + c == ' ' || is_b_char(version, c) + end if peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(version, stream) else - push!(breaks, scan_line_break(stream)) + push!(breaks, scan_line_break(version, stream)) if peek(stream.input) == '\uFEFF' return Any[] end pref = prefix(stream.input, 3) - if pref == "---" || pref == "..." && - in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029") + if pref == "---" || pref == "..." && is_whitespace(version, peek(stream.input, 3)) return Any[] end end @@ -1514,11 +1677,11 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end -function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) +function scan_tag_handle(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) c = peek(stream.input) if c != '!' throw(ScannerError("while scanning a $(name)", start_mark, - "expected '!', but found '$(c)'", get_mark(stream))) + "expected '!', but found '$(c)'", Mark(stream))) end length = 1 c = peek(stream.input, length) @@ -1529,30 +1692,31 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end if c != '!' - forwardchars!(stream, length) - throw(ScannerError("while scanning a $(name)", start_mark, - "expected '!', but found '$(c)'", - get_mark(stream))) + forwardchars!(version, stream, length) + throw(ScannerError( + "while scanning a $name", start_mark, + "expected '!', but found '$c'", Mark(stream), + )) end length += 1 end value = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(version, stream, length) value end -function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) +function scan_tag_uri(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) chunks = Any[] length = 0 c = peek(stream.input, length) while is_ns_ascii_letter(c) || isdigit(c) || in(c, "-;/?:@&=+\$,_.!~*\'()[]%") if c == '%' push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(version, stream, length) length = 0 - push!(chunks, scan_uri_escapes(stream, name, start_mark)) + push!(chunks, scan_uri_escapes(version, stream, name, start_mark)) else length += 1 end @@ -1561,23 +1725,23 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(version, stream, length) length = 0 end if isempty(chunks) throw(ScannerError("while parsing a $(name)", start_mark, "expected URI, but found '$(c)'", - get_mark(stream))) + Mark(stream))) end string(chunks...) end -function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) +function scan_uri_escapes(version::YAMLVersion, stream::TokenStream, name::String, start_mark::Mark) bytes = Any[] - mark = get_mark(stream) + mark = Mark(stream) while peek(stream.input) == '%' forward!(stream.input) for k in 0:1 @@ -1586,11 +1750,11 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark) string("expected URI escape sequence of", " 2 hexadecimal digits, but found", " '$(peek(stream.input, k))'"), - get_mark(stream))) + Mark(stream))) end end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) - forwardchars!(stream, 2) + forwardchars!(version, stream, 2) end string(bytes...) diff --git a/src/span.jl b/src/span.jl new file mode 100644 index 0000000..0be0017 --- /dev/null +++ b/src/span.jl @@ -0,0 +1,5 @@ +# Where in the stream a particular token lies. +struct Span + start_mark::Mark + end_mark::Mark +end diff --git a/src/tokens.jl b/src/tokens.jl index 00c31e9..3f0acc0 100644 --- a/src/tokens.jl +++ b/src/tokens.jl @@ -1,28 +1,10 @@ - -# Position within the document being parsed -struct Mark - index::UInt64 - line::UInt64 - column::UInt64 -end - - -function show(io::IO, mark::Mark) - @printf(io, "line %d, column %d", mark.line, mark.column) -end - - -# Where in the stream a particular token lies. -struct Span - start_mark::Mark - end_mark::Mark -end - - # YAML Tokens. -# Each token must include at minimum member "span::Span". -abstract type Token end +abstract type Token + # span::Span +end +firstmark(token::Token) = token.span.start_mark +lastmark(token::Token) = token.span.end_mark # The '%YAML' directive. struct DirectiveToken <: Token diff --git a/test/runtests.jl b/test/runtests.jl index 989fb3e..306df82 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -61,61 +61,6 @@ const test_write_ignored = [ "multi-constructor" ] - -function equivalent(xs::AbstractDict, ys::AbstractDict) - if Set(collect(keys(xs))) != Set(collect(keys(ys))) - @info "Not equivalent" Set(collect(keys(xs))) Set(collect(keys(ys))) - return false - end - - for k in keys(xs) - if !equivalent(xs[k], ys[k]) - @info "Not equivalent" xs[k] ys[k] - return false - end - end - - true -end - - -function equivalent(xs::AbstractArray, ys::AbstractArray) - if length(xs) != length(ys) - @info "Not equivalent" length(xs) length(ys) - return false - end - - for (x, y) in zip(xs, ys) - if !equivalent(x, y) - @info "Not equivalent" x y - return false - end - end - - true -end - - -function equivalent(x::Float64, y::Float64) - isnan(x) && isnan(y) ? true : x == y -end - - -function equivalent(x::AbstractString, y::AbstractString) - while endswith(x, "\n") - x = x[1:end-1] # trailing newline characters are ambiguous - end - while endswith(y, "\n") - y = y[1:end-1] - end - x == y -end - -function equivalent(x, y) - x == y -end - - # test custom tags function construct_type_map(t::Symbol, constructor::YAML.Constructor, node::YAML.Node) @@ -177,14 +122,14 @@ const testdir = dirname(@__FILE__) yaml_file_name, TestConstructor() ) - equivalent(data, expected) + isequal(data, expected) end @test begin dictData = YAML.load_file( yaml_file_name, more_constructors, multi_constructors ) - equivalent(dictData, expected) + isequal(dictData, expected) end end @@ -194,7 +139,7 @@ const testdir = dirname(@__FILE__) yaml_string, TestConstructor() ) - equivalent(data, expected) + isequal(data, expected) end @test begin @@ -202,7 +147,7 @@ const testdir = dirname(@__FILE__) yaml_string, more_constructors, multi_constructors ) - equivalent(dictData, expected) + isequal(dictData, expected) end end @@ -212,7 +157,7 @@ const testdir = dirname(@__FILE__) yaml_file_name, TestConstructor() ) - equivalent(first(data), expected) + isequal(first(data), expected) end @test begin @@ -220,7 +165,7 @@ const testdir = dirname(@__FILE__) yaml_file_name, more_constructors, multi_constructors ) - equivalent(first(dictData), expected) + isequal(first(dictData), expected) end end @@ -230,7 +175,7 @@ const testdir = dirname(@__FILE__) yaml_string, TestConstructor() ) - equivalent(first(data), expected) + isequal(first(data), expected) end @test begin @@ -238,7 +183,7 @@ const testdir = dirname(@__FILE__) yaml_string, more_constructors, multi_constructors ) - equivalent(first(dictData), expected) + isequal(first(dictData), expected) end end @@ -250,7 +195,7 @@ const testdir = dirname(@__FILE__) yaml_file_name, more_constructors ) - equivalent(write_and_load(data), expected) + isequal(write_and_load(data), expected) end end else @@ -283,11 +228,11 @@ test: 2 test: 3 """) (val, state) = iterate(iterable) - @test equivalent(val, Dict("test" => 1)) + @test isequal(val, Dict("test" => 1)) (val, state) = iterate(iterable, state) - @test equivalent(val, Dict("test" => 2)) + @test isequal(val, Dict("test" => 2)) (val, state) = iterate(iterable, state) - @test equivalent(val, Dict("test" => 3)) + @test isequal(val, Dict("test" => 3)) @test iterate(iterable, state) === nothing end @@ -371,7 +316,7 @@ end expected = Dict{Any,Any}("Test" => Dict{Any,Any}("test2"=>["test1", "test2"],"test1"=>"data")) - @test equivalent(YAML.load(yamlString, MySafeConstructor()), expected) + @test isequal(YAML.load(yamlString, MySafeConstructor()), expected) @test_throws YAML.ConstructorError YAML.load( yamlString, MyReallySafeConstructor()