From 07788d1a9e37322f60da5ea7253f321fb6462a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Sun, 30 Mar 2025 14:50:09 -0700 Subject: [PATCH 01/10] number suffix type annotations Fixes: https://github.com/kdl-org/kdl/issues/510 --- draft-marchan-kdl2.md | 79 ++++++++++++++++++- .../expected_kdl/suffix_type_bare.kdl | 1 + .../expected_kdl/suffix_type_explicit.kdl | 1 + .../suffix_type_explicit_exponent.kdl | 1 + .../suffix_type_explicit_non_decimal.kdl | 1 + tests/test_cases/input/suffix_type_bare.kdl | 1 + .../input/suffix_type_bare_comma_fail.kdl | 1 + ...uffix_type_bare_exponent_exponent_fail.kdl | 1 + .../suffix_type_bare_exponentish_fail.kdl | 1 + .../input/suffix_type_bare_hexish_fail.kdl | 1 + .../input/suffix_type_bare_keyword_fail.kdl | 1 + .../suffix_type_bare_letter_number_fail.kdl | 1 + .../suffix_type_bare_non_decimal_fail.kdl | 1 + .../suffix_type_bare_non_identifier_fail.kdl | 1 + .../input/suffix_type_bare_period_fail.kdl | 1 + .../input/suffix_type_bare_redundant_fail.kdl | 1 + .../input/suffix_type_bare_string_fail.kdl | 1 + .../suffix_type_bare_underscore_fail.kdl | 1 + .../test_cases/input/suffix_type_explicit.kdl | 1 + .../input/suffix_type_explicit_exponent.kdl | 1 + .../suffix_type_explicit_keyword_fail.kdl | 1 + .../suffix_type_explicit_non_decimal.kdl | 1 + ...ffix_type_explicit_non_identifier_fail.kdl | 1 + .../suffix_type_explicit_number_fail.kdl | 1 + .../suffix_type_explicit_redundant_fail.kdl | 1 + 25 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 tests/test_cases/expected_kdl/suffix_type_bare.kdl create mode 100644 tests/test_cases/expected_kdl/suffix_type_explicit.kdl create mode 100644 tests/test_cases/expected_kdl/suffix_type_explicit_exponent.kdl create mode 100644 tests/test_cases/expected_kdl/suffix_type_explicit_non_decimal.kdl create mode 100644 tests/test_cases/input/suffix_type_bare.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_comma_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_exponent_exponent_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_exponentish_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_hexish_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_keyword_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_period_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_redundant_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_string_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_underscore_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_exponent.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_keyword_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_non_decimal.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_number_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_redundant_fail.kdl diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 865d13a9..924c96e1 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -265,10 +265,66 @@ KDL does not specify any restrictions on what implementations might do with these annotations. They are free to ignore them, or use them to make decisions about how to interpret a value. -Additionally, the following type annotations MAY be recognized by KDL parsers -and, if used, SHOULD interpret these types as follows: +### Suffix Type Annotation + +When a ({{value}}) is a ({{number}}), it's possible to attach the type +annotation as a "suffix", instead of prepending it between `(` and `)`. This +makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., which +are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. + +An implementation that finds BOTH a parenthesized and a suffix +({{type-annotation}}) on the same ({{number}}) MUST yield a syntax error. + +Suffixes MUST BE plain ({{identifier-string}})s. No other ({{string}}) is +acceptable. + +There are two kinds of ({{suffix-type-annotation}}) available: +({{bare-suffix-type-annotation}})s and ({{explicit-suffix-type-annotation}}). + +#### Bare Suffix Type Annotation + +When a ({{value}}) is a decimal ({{number}}) WITHOUT exponential syntax (`1e+5` +etc) (and ONLY a decimal), it's possible to attach the type annotation as a +suffix directly to the number, without any additional syntax. + +They also come with some additional rules (like only being available for +decimals), in order to prevent potential ambiguity or footguns with the syntax. +This is generally acceptable, as type annotations in particular tend to be +application-defined and limited in scope, rather than arbitrary user data. In +designing this feature, it was determined that the value for various real-world +DSLs outweighed the complexity of the following rules. + +As such, to remove ambiguity, the suffix ({{identifier-string}}) MUST NOT start +with any of the following patterns, all of which MUST yield syntax errors: -### Reserved Type Annotations for Numbers Without Decimals: +* `.`, `,`, or `_` +* `[a-zA-Z][0-9_]` (to disambiguate all non-decimals, with breathing room) +* `[eE][+-]?[0-9]` (to disambiguate exponentials) +* `[xX][a-fA-F]` (to disambiguate hexadecimals) + +All other ({{identifier-string}})s can be safely appended to decimal numbers, so +long as the decimal does not include an exponential component. + +If the desired suffix would violate any of the above rules, either regular +parenthetical ({{type-annotation}})s, or ({{explicit-suffix-type-annotation}})s +may be used. + +#### Explicit Suffix Type Annotation + +Any ({{number}}) may have a `#` attached to it, followed by any valid +({{identifier-string}}). This is an explicit ({{suffix-type-annotation}}) syntax +without any of the relatively complex requirements of +({{bare-suffix-type-annotation}}), which can be a useful escape hatch. For +example: `10.0#u8` is invalid syntax without the `#` prefix. + +Note again that, unlike ({{bare-suffix-type-annotation}})s, Explicit Suffixes +may be used with ALL ({{number}}) formats (hexadecimal, decimal, octal, and +binary). For example, `0x1234#u16` is valid. + +### Reserved Type Annotations for Numbers Without Decimals + +Additionally, the following type annotations MAY be recognized by KDL parsers +and, if used, SHOULD interpret these types as follows. Signed integers of various sizes (the number is the bit size): @@ -335,6 +391,7 @@ IEEE 754-2008 decimal floating point numbers ~~~kdl node (u8)123 +node 123#i64 node prop=(regex).* (published)date "1970-01-01" (contributor)person name="Foo McBar" @@ -1013,12 +1070,26 @@ multi-line-raw-string-body := // Numbers number := keyword-number | hex | octal | binary | decimal -decimal := sign? integer ('.' integer)? exponent? +decimal := sign? integer ('.' integer)? ( + // NOTE: This grammar does not explicitly guard against having both + // parenthesized and type suffixes. + bare-type-suffix | + explicit-type-suffix | + (exponent explicit-type-suffix?) + )? exponent := ('e' | 'E') sign? integer integer := digit (digit | '_')* digit := [0-9] sign := '+' | '-' +bare-type-suffix := bare-type-suffix-initial identifier-char* +bare-type-suffix-initial := identifier-char + - '.' - ',' - '_' + - ([a-zA-Z] [0-9_]) + - (('e' | 'E') sign? digit) + - (('x' | 'X') [a-fA-F]) +explicit-type-suffix := '#' identifier-string + hex := sign? '0x' hex-digit (hex-digit | '_')* octal := sign? '0o' [0-7] [0-7_]* binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')* diff --git a/tests/test_cases/expected_kdl/suffix_type_bare.kdl b/tests/test_cases/expected_kdl/suffix_type_bare.kdl new file mode 100644 index 00000000..885fe200 --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_bare.kdl @@ -0,0 +1 @@ +node (abc)123 diff --git a/tests/test_cases/expected_kdl/suffix_type_explicit.kdl b/tests/test_cases/expected_kdl/suffix_type_explicit.kdl new file mode 100644 index 00000000..885fe200 --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_explicit.kdl @@ -0,0 +1 @@ +node (abc)123 diff --git a/tests/test_cases/expected_kdl/suffix_type_explicit_exponent.kdl b/tests/test_cases/expected_kdl/suffix_type_explicit_exponent.kdl new file mode 100644 index 00000000..c3f9ce0e --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_explicit_exponent.kdl @@ -0,0 +1 @@ +node (abc)12300000 diff --git a/tests/test_cases/expected_kdl/suffix_type_explicit_non_decimal.kdl b/tests/test_cases/expected_kdl/suffix_type_explicit_non_decimal.kdl new file mode 100644 index 00000000..81b59ff5 --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_explicit_non_decimal.kdl @@ -0,0 +1 @@ +node (abc)5 (def)83 (zzz)291 diff --git a/tests/test_cases/input/suffix_type_bare.kdl b/tests/test_cases/input/suffix_type_bare.kdl new file mode 100644 index 00000000..a032d6a2 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare.kdl @@ -0,0 +1 @@ +node 123abc diff --git a/tests/test_cases/input/suffix_type_bare_comma_fail.kdl b/tests/test_cases/input/suffix_type_bare_comma_fail.kdl new file mode 100644 index 00000000..1ff24807 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_comma_fail.kdl @@ -0,0 +1 @@ +node 123,abc diff --git a/tests/test_cases/input/suffix_type_bare_exponent_exponent_fail.kdl b/tests/test_cases/input/suffix_type_bare_exponent_exponent_fail.kdl new file mode 100644 index 00000000..235474e2 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_exponent_exponent_fail.kdl @@ -0,0 +1 @@ +node 123e5e5 diff --git a/tests/test_cases/input/suffix_type_bare_exponentish_fail.kdl b/tests/test_cases/input/suffix_type_bare_exponentish_fail.kdl new file mode 100644 index 00000000..678175a8 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_exponentish_fail.kdl @@ -0,0 +1 @@ +node 123e5abc diff --git a/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl b/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl new file mode 100644 index 00000000..fc4ae1ef --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl @@ -0,0 +1 @@ +node 123xabc123 \ No newline at end of file diff --git a/tests/test_cases/input/suffix_type_bare_keyword_fail.kdl b/tests/test_cases/input/suffix_type_bare_keyword_fail.kdl new file mode 100644 index 00000000..ffdaff40 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_keyword_fail.kdl @@ -0,0 +1 @@ +node #truefoo #false#bar #nullish #nan#no #inf123 #-inf#123 diff --git a/tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl b/tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl new file mode 100644 index 00000000..3310ecad --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl @@ -0,0 +1 @@ +node 123u8 diff --git a/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl b/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl new file mode 100644 index 00000000..bf09c804 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl @@ -0,0 +1 @@ +node 0b0101abc 0o123def 0x123zzz diff --git a/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl b/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl new file mode 100644 index 00000000..0fee4ada --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl @@ -0,0 +1 @@ +node 123"astring" 456#"rawstring"# diff --git a/tests/test_cases/input/suffix_type_bare_period_fail.kdl b/tests/test_cases/input/suffix_type_bare_period_fail.kdl new file mode 100644 index 00000000..8f0aad52 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_period_fail.kdl @@ -0,0 +1 @@ +node 123.abc diff --git a/tests/test_cases/input/suffix_type_bare_redundant_fail.kdl b/tests/test_cases/input/suffix_type_bare_redundant_fail.kdl new file mode 100644 index 00000000..8fc91279 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_redundant_fail.kdl @@ -0,0 +1 @@ +node (abc)123def diff --git a/tests/test_cases/input/suffix_type_bare_string_fail.kdl b/tests/test_cases/input/suffix_type_bare_string_fail.kdl new file mode 100644 index 00000000..f8ace5fc --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_string_fail.kdl @@ -0,0 +1 @@ +node "foo"bar diff --git a/tests/test_cases/input/suffix_type_bare_underscore_fail.kdl b/tests/test_cases/input/suffix_type_bare_underscore_fail.kdl new file mode 100644 index 00000000..e7ee020a --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_underscore_fail.kdl @@ -0,0 +1 @@ +node 123_abc diff --git a/tests/test_cases/input/suffix_type_explicit.kdl b/tests/test_cases/input/suffix_type_explicit.kdl new file mode 100644 index 00000000..47e027e5 --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit.kdl @@ -0,0 +1 @@ +node 123#abc diff --git a/tests/test_cases/input/suffix_type_explicit_exponent.kdl b/tests/test_cases/input/suffix_type_explicit_exponent.kdl new file mode 100644 index 00000000..87deb179 --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_exponent.kdl @@ -0,0 +1 @@ +node 123e5#abc diff --git a/tests/test_cases/input/suffix_type_explicit_keyword_fail.kdl b/tests/test_cases/input/suffix_type_explicit_keyword_fail.kdl new file mode 100644 index 00000000..cd233adf --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_keyword_fail.kdl @@ -0,0 +1 @@ +node 123#true 456#false 789#null 012#nan 345#inf 678#-inf diff --git a/tests/test_cases/input/suffix_type_explicit_non_decimal.kdl b/tests/test_cases/input/suffix_type_explicit_non_decimal.kdl new file mode 100644 index 00000000..2eb552a7 --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_non_decimal.kdl @@ -0,0 +1 @@ +node 0b0101#abc 0o123#def 0x123#zzz diff --git a/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl b/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl new file mode 100644 index 00000000..0b0a11ad --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl @@ -0,0 +1 @@ +node 123#"string" 456##"rawstring"# diff --git a/tests/test_cases/input/suffix_type_explicit_number_fail.kdl b/tests/test_cases/input/suffix_type_explicit_number_fail.kdl new file mode 100644 index 00000000..ef43d25f --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_number_fail.kdl @@ -0,0 +1 @@ +node 123#123 diff --git a/tests/test_cases/input/suffix_type_explicit_redundant_fail.kdl b/tests/test_cases/input/suffix_type_explicit_redundant_fail.kdl new file mode 100644 index 00000000..7bd2fe9e --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_redundant_fail.kdl @@ -0,0 +1 @@ +node (abc)123#def From 3b9436361a7d3716e53dbab7db9e90e8b8bb9aa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 11:51:00 -0700 Subject: [PATCH 02/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 924c96e1..26ca2b8a 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -295,7 +295,8 @@ designing this feature, it was determined that the value for various real-world DSLs outweighed the complexity of the following rules. As such, to remove ambiguity, the suffix ({{identifier-string}}) MUST NOT start -with any of the following patterns, all of which MUST yield syntax errors: +with any of the following patterns, all of which MUST yield syntax errors +(if they can be distinguished from other syntaxes at all): * `.`, `,`, or `_` * `[a-zA-Z][0-9_]` (to disambiguate all non-decimals, with breathing room) From a4b1053d157c35959de4703d894b532b21853205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 11:51:17 -0700 Subject: [PATCH 03/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 26ca2b8a..00653736 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -303,6 +303,9 @@ with any of the following patterns, all of which MUST yield syntax errors * `[eE][+-]?[0-9]` (to disambiguate exponentials) * `[xX][a-fA-F]` (to disambiguate hexadecimals) +For example, `10,000` is illegal, as is `10u16`. `10e0n` is illegal, but `10e0` is a legal +*decimal number using exponential syntax*, __not__ equivalent to `(e0)10`. + All other ({{identifier-string}})s can be safely appended to decimal numbers, so long as the decimal does not include an exponential component. From 2f21bd3fcf5ffbeb754ec76435b37c7da19d69ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 11:51:31 -0700 Subject: [PATCH 04/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 00653736..b05fa73a 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -310,7 +310,7 @@ All other ({{identifier-string}})s can be safely appended to decimal numbers, so long as the decimal does not include an exponential component. If the desired suffix would violate any of the above rules, either regular -parenthetical ({{type-annotation}})s, or ({{explicit-suffix-type-annotation}})s +parenthetical ({{type-annotation}})s or ({{explicit-suffix-type-annotation}})s may be used. #### Explicit Suffix Type Annotation From c4613f66caf263b473eff001887b9ebbfb6660a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 11:51:41 -0700 Subject: [PATCH 05/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index b05fa73a..9c19b6ef 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -315,7 +315,7 @@ may be used. #### Explicit Suffix Type Annotation -Any ({{number}}) may have a `#` attached to it, followed by any valid +Any ({{number}}) may have a `#` appended to it, followed by any valid ({{identifier-string}}). This is an explicit ({{suffix-type-annotation}}) syntax without any of the relatively complex requirements of ({{bare-suffix-type-annotation}}), which can be a useful escape hatch. For From 662917ce0be963665aaf6f17a33941f1f8674fcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 11:52:14 -0700 Subject: [PATCH 06/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 9c19b6ef..0bfea57d 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -272,6 +272,16 @@ annotation as a "suffix", instead of prepending it between `(` and `)`. This makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., which are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. +Most suffixes can be appended directly to the number +(a ({{bare-suffix-type-annotation}})), +as shown in the previous paragraph. +To avoid parsing ambiguity, there are some restrictions on this; +an ({{explicit-suffix-type-annotation}}) avoids all these restrictions +by using an additional `#` to explicitly indicate it. +For example, `10.0u8` is invalid, but `10.0#u8` is valid +and equivalent to `(u8)10.0`. +See the "Bare Suffix Type Annotation" section for the full list of restrictions. + An implementation that finds BOTH a parenthesized and a suffix ({{type-annotation}}) on the same ({{number}}) MUST yield a syntax error. From 1362646825cb8fa6aecc55e42f4a59908711a4bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 13:20:56 -0700 Subject: [PATCH 07/10] new approach to these --- draft-marchan-kdl2.md | 58 ++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 0bfea57d..dc331a99 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -272,15 +272,13 @@ annotation as a "suffix", instead of prepending it between `(` and `)`. This makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., which are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. -Most suffixes can be appended directly to the number -(a ({{bare-suffix-type-annotation}})), -as shown in the previous paragraph. -To avoid parsing ambiguity, there are some restrictions on this; -an ({{explicit-suffix-type-annotation}}) avoids all these restrictions -by using an additional `#` to explicitly indicate it. -For example, `10.0u8` is invalid, but `10.0#u8` is valid -and equivalent to `(u8)10.0`. -See the "Bare Suffix Type Annotation" section for the full list of restrictions. +Most suffixes can be appended directly to the number (a +({{bare-suffix-type-annotation}})), as shown in the previous paragraph. To avoid +parsing ambiguity, there are some restrictions on this; an +({{explicit-suffix-type-annotation}}) avoids all these restrictions by using an +additional `#` to explicitly indicate it. For example, `10.0u8` is invalid, but +`10.0#u8` is valid and equivalent to `(u8)10.0`. See +({{bare-suffix-type-annotation}}) for the full list of restrictions. An implementation that finds BOTH a parenthesized and a suffix ({{type-annotation}}) on the same ({{number}}) MUST yield a syntax error. @@ -294,8 +292,9 @@ There are two kinds of ({{suffix-type-annotation}}) available: #### Bare Suffix Type Annotation When a ({{value}}) is a decimal ({{number}}) WITHOUT exponential syntax (`1e+5` -etc) (and ONLY a decimal), it's possible to attach the type annotation as a -suffix directly to the number, without any additional syntax. +etc) (and ONLY a decimal: that is, numbers which do NOT have a `0b`/`0o`/`0x` +prefix), it's possible to attach the type annotation as a suffix directly to the +number, without any additional syntax. They also come with some additional rules (like only being available for decimals), in order to prevent potential ambiguity or footguns with the syntax. @@ -305,16 +304,17 @@ designing this feature, it was determined that the value for various real-world DSLs outweighed the complexity of the following rules. As such, to remove ambiguity, the suffix ({{identifier-string}}) MUST NOT start -with any of the following patterns, all of which MUST yield syntax errors -(if they can be distinguished from other syntaxes at all): +with any of the following patterns, all of which MUST yield syntax errors (if +they can be distinguished from other syntaxes at all): * `.`, `,`, or `_` -* `[a-zA-Z][0-9_]` (to disambiguate all non-decimals, with breathing room) * `[eE][+-]?[0-9]` (to disambiguate exponentials) -* `[xX][a-fA-F]` (to disambiguate hexadecimals) -For example, `10,000` is illegal, as is `10u16`. `10e0n` is illegal, but `10e0` is a legal +For example, `10,000` is illegal. `10e0n` is illegal, but `10e0` is a legal *decimal number using exponential syntax*, __not__ equivalent to `(e0)10`. +Additionally, note that since bare suffixes are only legal on _decimals_, `0u8` +is legal, but `0xs` is _not_, since hexadecimals are determined by their +prefixes. Similarly, `1xs` _is_ legal, and equivalent to `(xs)1`. All other ({{identifier-string}})s can be safely appended to decimal numbers, so long as the decimal does not include an exponential component. @@ -329,11 +329,11 @@ Any ({{number}}) may have a `#` appended to it, followed by any valid ({{identifier-string}}). This is an explicit ({{suffix-type-annotation}}) syntax without any of the relatively complex requirements of ({{bare-suffix-type-annotation}}), which can be a useful escape hatch. For -example: `10.0#u8` is invalid syntax without the `#` prefix. +example: `0#b1` is invalid syntax without the `#` prefix. Note again that, unlike ({{bare-suffix-type-annotation}})s, Explicit Suffixes may be used with ALL ({{number}}) formats (hexadecimal, decimal, octal, and -binary). For example, `0x1234#u16` is valid. +binary). For example, `0x1234#u32` is valid. ### Reserved Type Annotations for Numbers Without Decimals @@ -1022,8 +1022,9 @@ node-children := '{' nodes final-node? '}' node-terminator := single-line-comment | newline | ';' | eof prop := string node-space* '=' node-space* value -value := type? node-space* (string | number | keyword) +value := normal-value | suffixed-decimal type := '(' node-space* string node-space* ')' +normal-value := type? node-space* (string | number | keyword) // Strings string := identifier-string | quoted-string | raw-string ΒΆ @@ -1084,24 +1085,25 @@ multi-line-raw-string-body := // Numbers number := keyword-number | hex | octal | binary | decimal -decimal := sign? integer ('.' integer)? ( - // NOTE: This grammar does not explicitly guard against having both - // parenthesized and type suffixes. - bare-type-suffix | - explicit-type-suffix | - (exponent explicit-type-suffix?) - )? +decimal := significand exponent? +suffixed-decimal := significand ( + bare-type-suffix + | (exponent? explicit-type-suffix) +) +significand := sign? significand-initial integer? ('.' integer)? exponent := ('e' | 'E') sign? integer integer := digit (digit | '_')* +significand-initial = digit + - '0b' + - '0o' + - '0x' digit := [0-9] sign := '+' | '-' bare-type-suffix := bare-type-suffix-initial identifier-char* bare-type-suffix-initial := identifier-char - '.' - ',' - '_' - - ([a-zA-Z] [0-9_]) - (('e' | 'E') sign? digit) - - (('x' | 'X') [a-fA-F]) explicit-type-suffix := '#' identifier-string hex := sign? '0x' hex-digit (hex-digit | '_')* From d5d4f4671a60b9d5e28e618d3c7968bb2e9c41bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 21:06:37 -0700 Subject: [PATCH 08/10] more cleanup and more tests --- draft-marchan-kdl2.md | 119 +++++++++--------- .../suffix_type_bare_letter_number.kdl | 1 + .../input/suffix_type_bare_binary_fail.kdl | 1 + .../input/suffix_type_bare_binaryish_fail.kdl | 1 + ...ix_type_bare_exponentish_no_digit_fail.kdl | 1 + .../input/suffix_type_bare_hex_fail.kdl | 1 + .../input/suffix_type_bare_hexish_fail.kdl | 2 +- ...kdl => suffix_type_bare_letter_number.kdl} | 0 .../suffix_type_bare_non_decimal_fail.kdl | 1 - .../suffix_type_bare_non_identifier_fail.kdl | 1 - .../input/suffix_type_bare_octal_fail.kdl | 1 + ...uffix_type_bare_on_quoted_string_fail.kdl} | 0 .../suffix_type_bare_on_raw_string_fail.kdl | 1 + .../suffix_type_bare_quoted_string_fail.kdl | 1 + .../suffix_type_bare_raw_string_fail.kdl | 1 + ...ffix_type_explicit_non_identifier_fail.kdl | 1 - ...uffix_type_explicit_quoted_string_fail.kdl | 1 + .../suffix_type_explicit_raw_string_fail.kdl | 1 + 18 files changed, 73 insertions(+), 62 deletions(-) create mode 100644 tests/test_cases/expected_kdl/suffix_type_bare_letter_number.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_binary_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_binaryish_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_exponentish_no_digit_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_hex_fail.kdl rename tests/test_cases/input/{suffix_type_bare_letter_number_fail.kdl => suffix_type_bare_letter_number.kdl} (100%) delete mode 100644 tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl delete mode 100644 tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_octal_fail.kdl rename tests/test_cases/input/{suffix_type_bare_string_fail.kdl => suffix_type_bare_on_quoted_string_fail.kdl} (100%) create mode 100644 tests/test_cases/input/suffix_type_bare_on_raw_string_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_quoted_string_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_bare_raw_string_fail.kdl delete mode 100644 tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_quoted_string_fail.kdl create mode 100644 tests/test_cases/input/suffix_type_explicit_raw_string_fail.kdl diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index dc331a99..0ad63ca6 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -267,75 +267,78 @@ about how to interpret a value. ### Suffix Type Annotation -When a ({{value}}) is a ({{number}}), it's possible to attach the type -annotation as a "suffix", instead of prepending it between `(` and `)`. This -makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., which -are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. - -Most suffixes can be appended directly to the number (a -({{bare-suffix-type-annotation}})), as shown in the previous paragraph. To avoid -parsing ambiguity, there are some restrictions on this; an -({{explicit-suffix-type-annotation}}) avoids all these restrictions by using an -additional `#` to explicitly indicate it. For example, `10.0u8` is invalid, but -`10.0#u8` is valid and equivalent to `(u8)10.0`. See -({{bare-suffix-type-annotation}}) for the full list of restrictions. - -An implementation that finds BOTH a parenthesized and a suffix -({{type-annotation}}) on the same ({{number}}) MUST yield a syntax error. - -Suffixes MUST BE plain ({{identifier-string}})s. No other ({{string}}) is -acceptable. - -There are two kinds of ({{suffix-type-annotation}}) available: -({{bare-suffix-type-annotation}})s and ({{explicit-suffix-type-annotation}}). +When a Value ({{value}}) is a Number ({{number}}), it's possible to attach the +type annotation as a "suffix", instead of prepending it between `(` and `)`. +This makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., +which are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. + +There are two kinds of Suffix Type Annotations ({{suffix-type-annotation}}) +available: Bare Suffix Type Annotations ({{bare-suffix-type-annotation}})s and +Explicit Suffix Type Annotations ({{explicit-suffix-type-annotation}}). + +Most suffixes can be appended directly to the number (a Bare Suffix Type +Annotation ({{bare-suffix-type-annotation}})), as shown in the previous +paragraph. To avoid parsing ambiguity, there are some restrictions on this; an +Explicit Suffix Type Annotation ({{explicit-suffix-type-annotation}}) avoids all +these restrictions by using an additional `#` to explicitly indicate it. For +example, `10.0u8` is invalid, but `10.0#u8` is valid and equivalent to +`(u8)10.0`. See Bare Suffix Type Annotation ({{bare-suffix-type-annotation}}) +for the full list of restrictions. + +An implementation that finds BOTH a parenthesized ({{type-annotation}}) and a +Suffix Type Annotation ({{suffix-type-annotation}}) on the same Number +({{number}}) MUST yield a syntax error. + +Suffixes MUST BE plain Identifier Strings ({{identifier-string}}). No other +String ({{string}}) syntax is acceptable. #### Bare Suffix Type Annotation -When a ({{value}}) is a decimal ({{number}}) WITHOUT exponential syntax (`1e+5` -etc) (and ONLY a decimal: that is, numbers which do NOT have a `0b`/`0o`/`0x` -prefix), it's possible to attach the type annotation as a suffix directly to the -number, without any additional syntax. +When a Value ({{value}}) is a decimal Number ({{number}}) WITHOUT exponential +syntax (`1e+5` etc) (and ONLY a decimal. That is, numbers which do NOT have a +`0b`/`0o`/`0x` prefix with an optional sign), it's possible to append the type +annotation as a suffix directly to the number, without any additional syntax. -They also come with some additional rules (like only being available for -decimals), in order to prevent potential ambiguity or footguns with the syntax. -This is generally acceptable, as type annotations in particular tend to be -application-defined and limited in scope, rather than arbitrary user data. In -designing this feature, it was determined that the value for various real-world -DSLs outweighed the complexity of the following rules. +To remove further ambiguity, on top of not being available for non-decimal +prefixes, and for decimals with exponent parts, the suffix Identifier String +({{identifier-string}}) itself MUST NOT start with any of `.`, `,`, or `_`, as +well as `[eE][-+]?[0-9]?` as part of the exponential restriction above. Note the +optional digit, which is added to prevent typo ambiguity. -As such, to remove ambiguity, the suffix ({{identifier-string}}) MUST NOT start -with any of the following patterns, all of which MUST yield syntax errors (if -they can be distinguished from other syntaxes at all): +For example, the following are all illegal: -* `.`, `,`, or `_` -* `[eE][+-]?[0-9]` (to disambiguate exponentials) +* `10,000` (suffix would start with `,`) +* `10e0n` (suffix on an exponential) +* `0xyz` (starts with reserved hexadecimal prefix) +* `0b` (starts with reserved binary prefix) +* `5e+oops` (looks too close to an exponential) -For example, `10,000` is illegal. `10e0n` is illegal, but `10e0` is a legal -*decimal number using exponential syntax*, __not__ equivalent to `(e0)10`. -Additionally, note that since bare suffixes are only legal on _decimals_, `0u8` -is legal, but `0xs` is _not_, since hexadecimals are determined by their -prefixes. Similarly, `1xs` _is_ legal, and equivalent to `(xs)1`. +Whereas the following are all legal: -All other ({{identifier-string}})s can be safely appended to decimal numbers, so -long as the decimal does not include an exponential component. +* `0u8` (aka `(u8)0`) +* `5em` (aka `(em)5`. The `e` is not followed by a digit.) +* `1xyz` (aka `(xyz)1`. No longer starts with `0` as above.) +* `20b` (aka `(b)20`, "20 bytes". No longer starts with just `0` as above.) If the desired suffix would violate any of the above rules, either regular -parenthetical ({{type-annotation}})s or ({{explicit-suffix-type-annotation}})s -may be used. +parenthetical Type Annotations ({{type-annotation}}) or Explicit Suffix Type +Annotations ({{explicit-suffix-type-annotation}}) may be used. #### Explicit Suffix Type Annotation -Any ({{number}}) may have a `#` appended to it, followed by any valid -({{identifier-string}}). This is an explicit ({{suffix-type-annotation}}) syntax -without any of the relatively complex requirements of -({{bare-suffix-type-annotation}}), which can be a useful escape hatch. For -example: `0#b1` is invalid syntax without the `#` prefix. +Any Number ({{number}}) may have a `#` appended to it, followed by any valid +Identifier String ({{identifier-string}}). This is an Explicit Suffix Type +Annotation ({{suffix-type-annotation}}) syntax without any of the added +restrictions of Bare Suffix Type Annotations ({{bare-suffix-type-annotation}}), +which can be a useful escape hatch. For example: `0#b` is invalid syntax without +the `#` prefix. -Note again that, unlike ({{bare-suffix-type-annotation}})s, Explicit Suffixes -may be used with ALL ({{number}}) formats (hexadecimal, decimal, octal, and -binary). For example, `0x1234#u32` is valid. +Note that, unlike Bare Suffix Type Annotations +({{bare-suffix-type-annotation}}), Explicit Suffixes may be used with ALL Number +({{number}}) formats (hexadecimal, decimal, octal, and binary). For example, +`0x1234#u32` is valid. -### Reserved Type Annotations for Numbers Without Decimals +### Reserved Type Annotations for Numbers Without Decimal Parts Additionally, the following type annotations MAY be recognized by KDL parsers and, if used, SHOULD interpret these types as follows. @@ -361,7 +364,7 @@ Platform-dependent integer types, both signed and unsigned: - `isize` - `usize` -### Reserved Type Annotations for Numbers With Decimals: +### Reserved Type Annotations for Numbers With Decimal Parts IEEE 754 floating point numbers, both single (32) and double (64) precision: @@ -373,7 +376,7 @@ IEEE 754-2008 decimal floating point numbers - `decimal64` - `decimal128` -### Reserved Type Annotations for Strings: +### Reserved Type Annotations for Strings - `date-time`: ISO8601 date/time format. - `time`: "Time" section of ISO8601. @@ -404,8 +407,8 @@ IEEE 754-2008 decimal floating point numbers ### Examples ~~~kdl -node (u8)123 -node 123#i64 +node 123u8 +node 0#b 20b 50GiB node prop=(regex).* (published)date "1970-01-01" (contributor)person name="Foo McBar" diff --git a/tests/test_cases/expected_kdl/suffix_type_bare_letter_number.kdl b/tests/test_cases/expected_kdl/suffix_type_bare_letter_number.kdl new file mode 100644 index 00000000..f8433a74 --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_bare_letter_number.kdl @@ -0,0 +1 @@ +node (u8)123 diff --git a/tests/test_cases/input/suffix_type_bare_binary_fail.kdl b/tests/test_cases/input/suffix_type_bare_binary_fail.kdl new file mode 100644 index 00000000..e14dd7f5 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_binary_fail.kdl @@ -0,0 +1 @@ +node 0b0101hi diff --git a/tests/test_cases/input/suffix_type_bare_binaryish_fail.kdl b/tests/test_cases/input/suffix_type_bare_binaryish_fail.kdl new file mode 100644 index 00000000..bfc36dc4 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_binaryish_fail.kdl @@ -0,0 +1 @@ +node 0b diff --git a/tests/test_cases/input/suffix_type_bare_exponentish_no_digit_fail.kdl b/tests/test_cases/input/suffix_type_bare_exponentish_no_digit_fail.kdl new file mode 100644 index 00000000..d61a1ae3 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_exponentish_no_digit_fail.kdl @@ -0,0 +1 @@ +node 1234e+foo diff --git a/tests/test_cases/input/suffix_type_bare_hex_fail.kdl b/tests/test_cases/input/suffix_type_bare_hex_fail.kdl new file mode 100644 index 00000000..dc0efe44 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_hex_fail.kdl @@ -0,0 +1 @@ +node 0x123nope diff --git a/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl b/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl index fc4ae1ef..37977c93 100644 --- a/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl +++ b/tests/test_cases/input/suffix_type_bare_hexish_fail.kdl @@ -1 +1 @@ -node 123xabc123 \ No newline at end of file +node 0xohno diff --git a/tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl b/tests/test_cases/input/suffix_type_bare_letter_number.kdl similarity index 100% rename from tests/test_cases/input/suffix_type_bare_letter_number_fail.kdl rename to tests/test_cases/input/suffix_type_bare_letter_number.kdl diff --git a/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl b/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl deleted file mode 100644 index bf09c804..00000000 --- a/tests/test_cases/input/suffix_type_bare_non_decimal_fail.kdl +++ /dev/null @@ -1 +0,0 @@ -node 0b0101abc 0o123def 0x123zzz diff --git a/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl b/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl deleted file mode 100644 index 0fee4ada..00000000 --- a/tests/test_cases/input/suffix_type_bare_non_identifier_fail.kdl +++ /dev/null @@ -1 +0,0 @@ -node 123"astring" 456#"rawstring"# diff --git a/tests/test_cases/input/suffix_type_bare_octal_fail.kdl b/tests/test_cases/input/suffix_type_bare_octal_fail.kdl new file mode 100644 index 00000000..8adf6a21 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_octal_fail.kdl @@ -0,0 +1 @@ +node 0o123nope diff --git a/tests/test_cases/input/suffix_type_bare_string_fail.kdl b/tests/test_cases/input/suffix_type_bare_on_quoted_string_fail.kdl similarity index 100% rename from tests/test_cases/input/suffix_type_bare_string_fail.kdl rename to tests/test_cases/input/suffix_type_bare_on_quoted_string_fail.kdl diff --git a/tests/test_cases/input/suffix_type_bare_on_raw_string_fail.kdl b/tests/test_cases/input/suffix_type_bare_on_raw_string_fail.kdl new file mode 100644 index 00000000..9e72af0c --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_on_raw_string_fail.kdl @@ -0,0 +1 @@ +node #"foo"#bar diff --git a/tests/test_cases/input/suffix_type_bare_quoted_string_fail.kdl b/tests/test_cases/input/suffix_type_bare_quoted_string_fail.kdl new file mode 100644 index 00000000..335a1a25 --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_quoted_string_fail.kdl @@ -0,0 +1 @@ +node 123"astring" diff --git a/tests/test_cases/input/suffix_type_bare_raw_string_fail.kdl b/tests/test_cases/input/suffix_type_bare_raw_string_fail.kdl new file mode 100644 index 00000000..3453eebb --- /dev/null +++ b/tests/test_cases/input/suffix_type_bare_raw_string_fail.kdl @@ -0,0 +1 @@ +node 456#"rawstring"# diff --git a/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl b/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl deleted file mode 100644 index 0b0a11ad..00000000 --- a/tests/test_cases/input/suffix_type_explicit_non_identifier_fail.kdl +++ /dev/null @@ -1 +0,0 @@ -node 123#"string" 456##"rawstring"# diff --git a/tests/test_cases/input/suffix_type_explicit_quoted_string_fail.kdl b/tests/test_cases/input/suffix_type_explicit_quoted_string_fail.kdl new file mode 100644 index 00000000..5ffa2ccf --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_quoted_string_fail.kdl @@ -0,0 +1 @@ +node 123#"string" diff --git a/tests/test_cases/input/suffix_type_explicit_raw_string_fail.kdl b/tests/test_cases/input/suffix_type_explicit_raw_string_fail.kdl new file mode 100644 index 00000000..7203cd63 --- /dev/null +++ b/tests/test_cases/input/suffix_type_explicit_raw_string_fail.kdl @@ -0,0 +1 @@ +node 456##"rawstring"# From 06a6423ec67143b197a97f2ec0dbaa781bc52995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 17 Apr 2025 21:58:47 -0700 Subject: [PATCH 09/10] more refinement --- draft-marchan-kdl2.md | 124 ++++++++++-------- .../suffix_type_bare_underscore.kdl | 1 + .../input/bare_ident_numeric_fail.kdl | 1 - .../input/bare_ident_numeric_sign_fail.kdl | 1 - .../input/exponent_no_digit_fail.kdl | 1 + ...il.kdl => suffix_type_bare_underscore.kdl} | 0 6 files changed, 71 insertions(+), 57 deletions(-) create mode 100644 tests/test_cases/expected_kdl/suffix_type_bare_underscore.kdl delete mode 100644 tests/test_cases/input/bare_ident_numeric_fail.kdl delete mode 100644 tests/test_cases/input/bare_ident_numeric_sign_fail.kdl create mode 100644 tests/test_cases/input/exponent_no_digit_fail.kdl rename tests/test_cases/input/{suffix_type_bare_underscore_fail.kdl => suffix_type_bare_underscore.kdl} (100%) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index 0ad63ca6..b27b3f52 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -117,10 +117,11 @@ Being a node-oriented language means that the real core component of any KDL document is the "node". Every node must have a name, which must be a String ({{string}}). -The name may be preceded by a Type Annotation ({{type-annotation}}) to further -clarify its type, particularly in relation to its parent node. (For example, -clarifying that a particular `date` child node is for the _publication_ date, -rather than the last-modified date, with `(published)date`.) +The name may be preceded by a Prefix Type Annotation +({{prefix-type-annotation}}) to further clarify its type, particularly in +relation to its parent node. (For example, clarifying that a particular `date` +child node is for the _publication_ date, rather than the last-modified date, +with `(published)date`.) Following the name are zero or more Arguments ({{argument}}) or Properties ({{property}}), separated by either whitespace ({{whitespace}}) or a @@ -247,23 +248,42 @@ Values _MUST_ be either Arguments ({{argument}}) or values of Properties ({{property}}). Only String ({{string}}) values may be used as Node ({{node}}) names or Property ({{property}}) keys. -Values (both as arguments and in properties) _MAY_ be prefixed by a single -Type Annotation ({{type-annotation}}). +Values (both as arguments and in properties) _MAY_ include a single Type +Annotation ({{type-annotation}}). ## Type Annotation -A type annotation is a prefix to any Node Name ({{node}}) or Value ({{value}}) that -includes a _suggestion_ of what type the value is _intended_ to be treated as, -or as a _context-specific elaboration_ of the more generic type the node name -indicates. - -Type annotations are written as a set of `(` and `)` with a single -String ({{string}}) in it. It may contain Whitespace after the `(` and before -the `)`, and may be separated from its target by Whitespace. +A type annotation is a String ({{string}}) value attached to any Node Name +({{node}}) or Value ({{value}}) that includes a _suggestion_ of what type the +value is _intended_ to be treated as, or as a _context-specific elaboration_ of +the more generic type the node name indicates. KDL does not specify any restrictions on what implementations might do with these annotations. They are free to ignore them, or use them to make decisions -about how to interpret a value. +about how to interpret a value. That said, KDL does reserve certain well-known +strings for what would be their intended purpose, for the sake of +interoperability ({{reserved-type-annotations}}). + +There are two kinds of Type Annotation syntaxes in KDL: Prefix Type Annotations +({{prefix-type-annotation}}) and Suffix Type Annotations +({{suffix-type-annotation}}). + +### Examples + +~~~kdl +node 123u8 +node 0#b 20b 50GiB +node prop=(regex).* +(published)date "1970-01-01" +(contributor)person name="Foo McBar" +~~~ + +### Prefix Type Annotation + +Prefix Type Annotations are written as a set of `(` and `)` with a single String +({{string}}) in it. It may contain Whitespace after the `(` and before the `)`, +and may be separated from its target by Whitespace. Unlike the other annotation +types, any String type may be used. ### Suffix Type Annotation @@ -272,38 +292,38 @@ type annotation as a "suffix", instead of prepending it between `(` and `)`. This makes it possible to, for example, write `10px`, `10.5%`, `512GiB`, etc., which are equivalent to `(px)10`, `(%)5`, and `(GiB)512`, respectively. -There are two kinds of Suffix Type Annotations ({{suffix-type-annotation}}) -available: Bare Suffix Type Annotations ({{bare-suffix-type-annotation}})s and -Explicit Suffix Type Annotations ({{explicit-suffix-type-annotation}}). - Most suffixes can be appended directly to the number (a Bare Suffix Type Annotation ({{bare-suffix-type-annotation}})), as shown in the previous paragraph. To avoid parsing ambiguity, there are some restrictions on this; an Explicit Suffix Type Annotation ({{explicit-suffix-type-annotation}}) avoids all these restrictions by using an additional `#` to explicitly indicate it. For -example, `10.0u8` is invalid, but `10.0#u8` is valid and equivalent to -`(u8)10.0`. See Bare Suffix Type Annotation ({{bare-suffix-type-annotation}}) +example, `0bytes` is invalid, but `0#bytes` is valid and equivalent to +`(bytes)0`. See Bare Suffix Type Annotation ({{bare-suffix-type-annotation}}) for the full list of restrictions. -An implementation that finds BOTH a parenthesized ({{type-annotation}}) and a -Suffix Type Annotation ({{suffix-type-annotation}}) on the same Number -({{number}}) MUST yield a syntax error. +An implementation that finds BOTH a parenthesized Prefix Type Annotation +({{prefix-type-annotation}}) and a Suffix Type Annotation +({{suffix-type-annotation}}) on the same Number ({{number}}) MUST yield a syntax +error. Suffixes MUST BE plain Identifier Strings ({{identifier-string}}). No other String ({{string}}) syntax is acceptable. #### Bare Suffix Type Annotation -When a Value ({{value}}) is a decimal Number ({{number}}) WITHOUT exponential -syntax (`1e+5` etc) (and ONLY a decimal. That is, numbers which do NOT have a -`0b`/`0o`/`0x` prefix with an optional sign), it's possible to append the type -annotation as a suffix directly to the number, without any additional syntax. +When a Value ({{value}}) is a Number ({{number}}) that meets certain criteria, +it's possible to append an Identifier String ({{identifier-string}}), and ONLY +an Identifier String, as a suffix directly to the Number, as its Type Annotation +({{type-annotation}}). The criteria are as follows: -To remove further ambiguity, on top of not being available for non-decimal -prefixes, and for decimals with exponent parts, the suffix Identifier String -({{identifier-string}}) itself MUST NOT start with any of `.`, `,`, or `_`, as -well as `[eE][-+]?[0-9]?` as part of the exponential restriction above. Note the -optional digit, which is added to prevent typo ambiguity. +* The Number MUST be a Decimal (that is, it MUST NOT start with `0b`, `0o`, or + `0x`). Additionally, the tokens `0b`, `0o`, and `0x` MUST be treated as syntax + errors (incomplete non-decimal numbers). +* It MUST NOT have an exponent part (e.g. `5.2e+3`). +* The Identifier String used for the type itself MUST NOT start with either `.` or `,`. +* As part of the exponential restriction, the suffix MUST NOT match + `[eE]([-+]|[0-9])` (e.g. `5.2e+` SHOULD be considered a "bad exponential", and + MUST NOT parse as `(e+)5.2`). For example, the following are all illegal: @@ -312,17 +332,19 @@ For example, the following are all illegal: * `0xyz` (starts with reserved hexadecimal prefix) * `0b` (starts with reserved binary prefix) * `5e+oops` (looks too close to an exponential) +* `1.2.3-abc` (suffix would start with `.` AND Identifier Strings can't start + with `.`) Whereas the following are all legal: -* `0u8` (aka `(u8)0`) -* `5em` (aka `(em)5`. The `e` is not followed by a digit.) -* `1xyz` (aka `(xyz)1`. No longer starts with `0` as above.) -* `20b` (aka `(b)20`, "20 bytes". No longer starts with just `0` as above.) +* `0u8` = `(u8)0` +* `5em` = `(em)5`, the `e` is not followed by a digit. +* `1xyz` = `(xyz)1`, no longer starts with `0` as above. +* `20b` = `(b)20`, "20 bytes", no longer starts with just `0` as above. -If the desired suffix would violate any of the above rules, either regular -parenthetical Type Annotations ({{type-annotation}}) or Explicit Suffix Type -Annotations ({{explicit-suffix-type-annotation}}) may be used. +If the desired suffix would violate any of the above rules, either Prefix Type +Annotations ({{prefix-type-annotation}}) or Explicit Suffix Type Annotations +({{explicit-suffix-type-annotation}}) may be used. #### Explicit Suffix Type Annotation @@ -338,7 +360,9 @@ Note that, unlike Bare Suffix Type Annotations ({{number}}) formats (hexadecimal, decimal, octal, and binary). For example, `0x1234#u32` is valid. -### Reserved Type Annotations for Numbers Without Decimal Parts +### Reserved Type Annotations + +#### For Numbers Without Decimal Parts Additionally, the following type annotations MAY be recognized by KDL parsers and, if used, SHOULD interpret these types as follows. @@ -364,7 +388,7 @@ Platform-dependent integer types, both signed and unsigned: - `isize` - `usize` -### Reserved Type Annotations for Numbers With Decimal Parts +#### For Numbers With Decimal Parts IEEE 754 floating point numbers, both single (32) and double (64) precision: @@ -376,7 +400,7 @@ IEEE 754-2008 decimal floating point numbers - `decimal64` - `decimal128` -### Reserved Type Annotations for Strings +#### For Strings - `date-time`: ISO8601 date/time format. - `time`: "Time" section of ISO8601. @@ -404,16 +428,6 @@ IEEE 754-2008 decimal floating point numbers - `base64`: A Base64-encoded string, denoting arbitrary binary data. - `base85`: An [Ascii85](https://en.wikipedia.org/wiki/Ascii85)-encoded string, denoting arbitrary binary data. -### Examples - -~~~kdl -node 123u8 -node 0#b 20b 50GiB -node prop=(regex).* -(published)date "1970-01-01" -(contributor)person name="Foo McBar" -~~~ - ## String Strings in KDL represent textual UTF-8 Values ({{value}}). A String is either an @@ -1105,8 +1119,8 @@ sign := '+' | '-' bare-type-suffix := bare-type-suffix-initial identifier-char* bare-type-suffix-initial := identifier-char - - '.' - ',' - '_' - - (('e' | 'E') sign? digit) + - '.' - ',' + - (('e' | 'E') (sign | digit)) explicit-type-suffix := '#' identifier-string hex := sign? '0x' hex-digit (hex-digit | '_')* diff --git a/tests/test_cases/expected_kdl/suffix_type_bare_underscore.kdl b/tests/test_cases/expected_kdl/suffix_type_bare_underscore.kdl new file mode 100644 index 00000000..885fe200 --- /dev/null +++ b/tests/test_cases/expected_kdl/suffix_type_bare_underscore.kdl @@ -0,0 +1 @@ +node (abc)123 diff --git a/tests/test_cases/input/bare_ident_numeric_fail.kdl b/tests/test_cases/input/bare_ident_numeric_fail.kdl deleted file mode 100644 index 053af211..00000000 --- a/tests/test_cases/input/bare_ident_numeric_fail.kdl +++ /dev/null @@ -1 +0,0 @@ -node 0n \ No newline at end of file diff --git a/tests/test_cases/input/bare_ident_numeric_sign_fail.kdl b/tests/test_cases/input/bare_ident_numeric_sign_fail.kdl deleted file mode 100644 index 6cadc35e..00000000 --- a/tests/test_cases/input/bare_ident_numeric_sign_fail.kdl +++ /dev/null @@ -1 +0,0 @@ -node +0n \ No newline at end of file diff --git a/tests/test_cases/input/exponent_no_digit_fail.kdl b/tests/test_cases/input/exponent_no_digit_fail.kdl new file mode 100644 index 00000000..cca7adb1 --- /dev/null +++ b/tests/test_cases/input/exponent_no_digit_fail.kdl @@ -0,0 +1 @@ +node 1.0e- diff --git a/tests/test_cases/input/suffix_type_bare_underscore_fail.kdl b/tests/test_cases/input/suffix_type_bare_underscore.kdl similarity index 100% rename from tests/test_cases/input/suffix_type_bare_underscore_fail.kdl rename to tests/test_cases/input/suffix_type_bare_underscore.kdl From 507b43b3739b897786a01624354adcdf0c2536bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Mon, 28 Apr 2025 14:05:16 -0700 Subject: [PATCH 10/10] Update draft-marchan-kdl2.md Co-authored-by: Tab Atkins Jr. --- draft-marchan-kdl2.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/draft-marchan-kdl2.md b/draft-marchan-kdl2.md index b27b3f52..eb7ca726 100644 --- a/draft-marchan-kdl2.md +++ b/draft-marchan-kdl2.md @@ -1117,10 +1117,9 @@ significand-initial = digit digit := [0-9] sign := '+' | '-' -bare-type-suffix := bare-type-suffix-initial identifier-char* -bare-type-suffix-initial := identifier-char - - '.' - ',' - - (('e' | 'E') (sign | digit)) +bare-type-suffix := unambiguous-bare-type-suffix | e-bare-type-suffix +unambiguous-bare-type-suffix := (identifier-char - ('.' | ',' | 'e' | 'E')) identifier-char* +e-bare-type-suffix := 'e' | 'E' | ( ((identifier-char - ('.' | ',')) identifier-char) - (('e' | 'E') (sign | digit)) ) identifier-char* explicit-type-suffix := '#' identifier-string hex := sign? '0x' hex-digit (hex-digit | '_')*