From 8d1f411405fbb6948d83134b937b22e3f2587a4f Mon Sep 17 00:00:00 2001 From: Haiming Gao Date: Tue, 5 Dec 2023 23:26:30 -0600 Subject: [PATCH 1/4] Adding feature to allow for skipping RS control char --- .../com/fasterxml/jackson/core/JsonParser.java | 14 ++++++++++++++ .../jackson/core/base/ParserMinimalBase.java | 1 + .../jackson/core/json/JsonReadFeature.java | 15 +++++++++++++++ .../jackson/core/json/ReaderBasedJsonParser.java | 8 +++++--- .../core/json/async/AsyncNonStdParsingTest.java | 1 + 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonParser.java b/src/main/java/com/fasterxml/jackson/core/JsonParser.java index 3dddc1cbf2..e111758483 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonParser.java @@ -138,6 +138,20 @@ public enum Feature { */ ALLOW_SINGLE_QUOTES(false), + /** + * Feature that determines whether parser will allow use + * of the RS control character (exhibited by 0x1E and parsed as a + * tab or /t) within parsed content or not. + *

+ * Since JSON specification requires use of double quotes for + * field names, + * this is a non-standard feature, and as such disabled by default. + *

+ * NOTE: while not technically deprecated, since 2.10 recommended to use + * {@link com.fasterxml.jackson.core.json.JsonReadFeature#ALLOW_SINGLE_QUOTES} instead. + */ + ALLOW_RS_CONTROL_CHAR(false), + /** * Feature that determines whether parser will allow * JSON Strings to contain unquoted control characters diff --git a/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java b/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java index ef69ba9508..d65a207906 100644 --- a/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java +++ b/src/main/java/com/fasterxml/jackson/core/base/ParserMinimalBase.java @@ -29,6 +29,7 @@ public abstract class ParserMinimalBase extends JsonParser protected final static int INT_LF = '\n'; protected final static int INT_CR = '\r'; protected final static int INT_SPACE = 0x0020; + protected final static int INT_RS = 0x001E; // Markup protected final static int INT_LBRACKET = '['; diff --git a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java index aa2a7d7073..e333133e4b 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java +++ b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java @@ -11,6 +11,21 @@ public enum JsonReadFeature implements FormatFeature { + // // // Support for non-standard data format constructs: whitespaces + + /** + * Feature that determines whether parser will allow use + * of the RS control character (exhibited by 0x1E and parsed as a + * tab or /t) within parsed content or not. + *

+ * Since JSON specification does not mention comments as legal + * construct, + * this is a non-standard feature. As such, feature is + * disabled by default for parsers and must be + * explicitly enabled. + */ + ALLOW_RS_CONTROL_CHAR(false, JsonParser.Feature.ALLOW_RS_CONTROL_CHAR), + // // // Support for non-standard data format constructs: comments /** diff --git a/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java index 13a8048d57..1566bc87d1 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java @@ -35,6 +35,8 @@ public class ReaderBasedJsonParser private final static int FEAT_MASK_ALLOW_JAVA_COMMENTS = Feature.ALLOW_COMMENTS.getMask(); private final static int FEAT_MASK_ALLOW_YAML_COMMENTS = Feature.ALLOW_YAML_COMMENTS.getMask(); + + private final static int FEAT_MASK_ALLOW_CONTROL_CHAR = Feature.ALLOW_RS_CONTROL_CHAR.getMask(); // Latin1 encoding is not supported, but we do use 8-bit subset for // pre-processing task, to simplify first pass, keep it fast. @@ -2538,8 +2540,8 @@ private final int _skipWSOrEnd() throws IOException _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); - } else if (i != INT_TAB) { - _throwInvalidSpace(i); + } else if (i != INT_TAB && ((_features & FEAT_MASK_ALLOW_CONTROL_CHAR) != 0 && i != INT_RS)) { + _throwInvalidSpace(i); } } @@ -2558,7 +2560,7 @@ private final int _skipWSOrEnd() throws IOException _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); - } else if (i != INT_TAB) { + } else if (i != INT_TAB && ((_features & FEAT_MASK_ALLOW_CONTROL_CHAR) != 0 && i != INT_RS)) { _throwInvalidSpace(i); } } diff --git a/src/test/java/com/fasterxml/jackson/core/json/async/AsyncNonStdParsingTest.java b/src/test/java/com/fasterxml/jackson/core/json/async/AsyncNonStdParsingTest.java index b9105c07e7..8db7a5006b 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/async/AsyncNonStdParsingTest.java +++ b/src/test/java/com/fasterxml/jackson/core/json/async/AsyncNonStdParsingTest.java @@ -387,4 +387,5 @@ private AsyncReaderWrapper createParser(JsonFactory f, String doc, { return asyncForBytes(f, readSize, _jsonDoc(doc), offset); } + } From 21170ca6762ad8c5528714ac9807eae1bfb99ced Mon Sep 17 00:00:00 2001 From: Haiming Gao Date: Wed, 6 Dec 2023 13:58:42 -0600 Subject: [PATCH 2/4] fixing comments --- src/main/java/com/fasterxml/jackson/core/JsonParser.java | 6 +----- .../com/fasterxml/jackson/core/json/JsonReadFeature.java | 7 ++----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonParser.java b/src/main/java/com/fasterxml/jackson/core/JsonParser.java index e111758483..e9a7d51128 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonParser.java @@ -143,12 +143,8 @@ public enum Feature { * of the RS control character (exhibited by 0x1E and parsed as a * tab or /t) within parsed content or not. *

- * Since JSON specification requires use of double quotes for - * field names, + * Since JSON specification requires quoting for all control characters, * this is a non-standard feature, and as such disabled by default. - *

- * NOTE: while not technically deprecated, since 2.10 recommended to use - * {@link com.fasterxml.jackson.core.json.JsonReadFeature#ALLOW_SINGLE_QUOTES} instead. */ ALLOW_RS_CONTROL_CHAR(false), diff --git a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java index e333133e4b..1e6936fb25 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java +++ b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java @@ -18,11 +18,8 @@ public enum JsonReadFeature * of the RS control character (exhibited by 0x1E and parsed as a * tab or /t) within parsed content or not. *

- * Since JSON specification does not mention comments as legal - * construct, - * this is a non-standard feature. As such, feature is - * disabled by default for parsers and must be - * explicitly enabled. + * Since JSON specification requires quoting for all control characters, + * this is a non-standard feature, and as such disabled by default. */ ALLOW_RS_CONTROL_CHAR(false, JsonParser.Feature.ALLOW_RS_CONTROL_CHAR), From 6c446c980dfb764318ab49943e0765ddb110854c Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Wed, 6 Dec 2023 19:29:36 -0800 Subject: [PATCH 3/4] Fix logic for skipping RS chars --- .../jackson/core/json/ReaderBasedJsonParser.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java index 1566bc87d1..7a549d278e 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java @@ -36,7 +36,7 @@ public class ReaderBasedJsonParser private final static int FEAT_MASK_ALLOW_JAVA_COMMENTS = Feature.ALLOW_COMMENTS.getMask(); private final static int FEAT_MASK_ALLOW_YAML_COMMENTS = Feature.ALLOW_YAML_COMMENTS.getMask(); - private final static int FEAT_MASK_ALLOW_CONTROL_CHAR = Feature.ALLOW_RS_CONTROL_CHAR.getMask(); + private final static int FEAT_MASK_ALLOW_RS_CTRL_CHAR = Feature.ALLOW_RS_CONTROL_CHAR.getMask(); // Latin1 encoding is not supported, but we do use 8-bit subset for // pre-processing task, to simplify first pass, keep it fast. @@ -2540,8 +2540,9 @@ private final int _skipWSOrEnd() throws IOException _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); - } else if (i != INT_TAB && ((_features & FEAT_MASK_ALLOW_CONTROL_CHAR) != 0 && i != INT_RS)) { - _throwInvalidSpace(i); + } else if (i != INT_TAB + && ((_features & FEAT_MASK_ALLOW_RS_CTRL_CHAR) == 0 || i != INT_RS)) { + _throwInvalidSpace(i); } } @@ -2560,7 +2561,7 @@ private final int _skipWSOrEnd() throws IOException _currInputRowStart = _inputPtr; } else if (i == INT_CR) { _skipCR(); - } else if (i != INT_TAB && ((_features & FEAT_MASK_ALLOW_CONTROL_CHAR) != 0 && i != INT_RS)) { + } else if (i != INT_TAB && ((_features & FEAT_MASK_ALLOW_RS_CTRL_CHAR) != 0 && i != INT_RS)) { _throwInvalidSpace(i); } } From 66961b541486057b1aef2d257435d4843c1e783c Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Wed, 6 Dec 2023 19:35:30 -0800 Subject: [PATCH 4/4] Add `@since` annotations, minor comment rewording --- .../java/com/fasterxml/jackson/core/JsonParser.java | 13 ++++++++++--- .../jackson/core/json/JsonReadFeature.java | 10 +++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/fasterxml/jackson/core/JsonParser.java b/src/main/java/com/fasterxml/jackson/core/JsonParser.java index e9a7d51128..f0520b2615 100644 --- a/src/main/java/com/fasterxml/jackson/core/JsonParser.java +++ b/src/main/java/com/fasterxml/jackson/core/JsonParser.java @@ -140,11 +140,18 @@ public enum Feature { /** * Feature that determines whether parser will allow use - * of the RS control character (exhibited by 0x1E and parsed as a - * tab or /t) within parsed content or not. + * of the RS control character ({@code 0x1E}) within ignorable + * whitespace portion of input content (similar to TAB which + * is an allowed control character). *

- * Since JSON specification requires quoting for all control characters, + * Since JSON specification only allows a small set of control characters + * as whitespace by default, * this is a non-standard feature, and as such disabled by default. + *

+ * NOTE: while not technically deprecated, it is recommended to use + * {@link com.fasterxml.jackson.core.json.JsonReadFeature#ALLOW_RS_CONTROL_CHAR} instead. + * + * @since 2.17 */ ALLOW_RS_CONTROL_CHAR(false), diff --git a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java index 1e6936fb25..d04a75010c 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java +++ b/src/main/java/com/fasterxml/jackson/core/json/JsonReadFeature.java @@ -15,11 +15,15 @@ public enum JsonReadFeature /** * Feature that determines whether parser will allow use - * of the RS control character (exhibited by 0x1E and parsed as a - * tab or /t) within parsed content or not. + * of the RS control character ({@code 0x1E}) within ignorable + * whitespace portion of input content (similar to TAB which + * is an allowed control character). *

- * Since JSON specification requires quoting for all control characters, + * Since JSON specification only allows a small set of control characters + * as whitespace by default, * this is a non-standard feature, and as such disabled by default. + * + * @since 2.17 */ ALLOW_RS_CONTROL_CHAR(false, JsonParser.Feature.ALLOW_RS_CONTROL_CHAR),