-
-
Notifications
You must be signed in to change notification settings - Fork 815
ByteSourceJsonBootstrapper uses CharBufferReader for byte[] inputs #1079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
package com.fasterxml.jackson.core.json; | ||
|
||
import java.io.*; | ||
import java.nio.ByteBuffer; | ||
import java.nio.charset.Charset; | ||
|
||
import com.fasterxml.jackson.core.*; | ||
import com.fasterxml.jackson.core.format.InputAccessor; | ||
|
@@ -230,6 +232,13 @@ public Reader constructReader() throws IOException | |
InputStream in = _in; | ||
|
||
if (in == null) { | ||
int size = _inputEnd - _inputPtr; | ||
if (size >= 0 && size <= 8192) { | ||
// [jackson-core#488] Avoid overhead of heap ByteBuffer allocated by InputStreamReader | ||
// when processing small inputs up to 8KiB. | ||
Charset charset = Charset.forName(enc.getJavaName()); | ||
return new CharBufferReader(charset.decode(ByteBuffer.wrap(_inputBuffer, _inputPtr, _inputEnd))); | ||
} | ||
in = new ByteArrayInputStream(_inputBuffer, _inputPtr, _inputEnd); | ||
Comment on lines
+235
to
242
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Open to thoughts here on threshold. This new path decoding from |
||
} else { | ||
/* Also, if we have any read but unused input (usually true), | ||
|
75 changes: 75 additions & 0 deletions
75
src/main/java/com/fasterxml/jackson/core/json/CharBufferReader.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package com.fasterxml.jackson.core.json; | ||
|
||
import java.io.Reader; | ||
import java.nio.CharBuffer; | ||
import java.nio.charset.Charset; | ||
|
||
/** | ||
* An adapter implementation from {@link CharBuffer} to {@link Reader}. | ||
* This is used by {@link ByteSourceJsonBootstrapper#constructReader()} when processing small inputs to | ||
* avoid the 8KiB {@link java.nio.ByteBuffer} allocated by {@link java.io.InputStreamReader}, see [jackson-core#488]. | ||
*/ | ||
final class CharBufferReader extends Reader { | ||
private final CharBuffer charBuffer; | ||
|
||
CharBufferReader(CharBuffer buffer) { | ||
this.charBuffer = buffer; | ||
} | ||
|
||
@Override | ||
public int read(char[] chars, int off, int len) { | ||
int remaining = this.charBuffer.remaining(); | ||
if (remaining <= 0) { | ||
return -1; | ||
} | ||
int length = Math.min(len, remaining); | ||
this.charBuffer.get(chars, off, length); | ||
return length; | ||
} | ||
|
||
@Override | ||
public int read() { | ||
if (this.charBuffer.hasRemaining()) { | ||
return this.charBuffer.get(); | ||
} | ||
return -1; | ||
} | ||
|
||
@Override | ||
public long skip(long n) { | ||
if (n < 0L) { | ||
throw new IllegalArgumentException("number of characters to skip cannot be negative"); | ||
} | ||
int skipped = Math.min(this.charBuffer.remaining(), (int) Math.min(n, Integer.MAX_VALUE)); | ||
this.charBuffer.position(this.charBuffer.position() + skipped); | ||
return skipped; | ||
} | ||
|
||
@Override | ||
public boolean ready() { | ||
return true; | ||
} | ||
|
||
@Override | ||
public boolean markSupported() { | ||
return true; | ||
} | ||
|
||
@Override | ||
public void mark(int readAheadLimit) { | ||
if (readAheadLimit < 0L) { | ||
throw new IllegalArgumentException("read ahead limit cannot be negative"); | ||
} | ||
this.charBuffer.mark(); | ||
} | ||
|
||
@Override | ||
public void reset() { | ||
this.charBuffer.reset(); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
this.charBuffer.position(this.charBuffer.limit()); | ||
} | ||
} |
108 changes: 108 additions & 0 deletions
108
src/test/java/com/fasterxml/jackson/core/json/CharBufferReaderTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
package com.fasterxml.jackson.core.json; | ||
|
||
import java.io.IOException; | ||
import java.io.Reader; | ||
import java.nio.CharBuffer; | ||
import java.util.Arrays; | ||
|
||
import static org.junit.Assert.assertArrayEquals; | ||
import static org.junit.Assert.assertThrows; | ||
|
||
public class CharBufferReaderTest extends com.fasterxml.jackson.core.BaseTest { | ||
|
||
public void testSingleCharRead() throws IOException { | ||
CharBuffer buffer = CharBuffer.wrap("aB\u00A0\u1AE9\uFFFC"); | ||
Reader charBufferReader = new CharBufferReader(buffer); | ||
try (Reader reader = charBufferReader) { | ||
assertEquals('a', reader.read()); | ||
assertEquals('B', reader.read()); | ||
assertEquals('\u00A0', reader.read()); | ||
assertEquals('\u1AE9', reader.read()); | ||
assertEquals('', reader.read()); | ||
assertEquals(-1, reader.read()); | ||
} | ||
assertEquals(-1, charBufferReader.read()); | ||
} | ||
|
||
public void testBulkRead() throws IOException { | ||
CharBuffer buffer = CharBuffer.wrap("abcdefghijklmnopqrst\u00A0"); | ||
char[] chars = new char[12]; | ||
Reader charBufferReader = new CharBufferReader(buffer); | ||
try (Reader reader = charBufferReader) { | ||
assertEquals(12, reader.read(chars)); | ||
assertArrayEquals("abcdefghijkl".toCharArray(), chars); | ||
assertEquals(9, reader.read(chars)); | ||
assertArrayEquals("mnopqrst\u00A0".toCharArray(), Arrays.copyOf(chars, 9)); | ||
assertEquals(-1, reader.read(chars)); | ||
} | ||
assertEquals(-1, charBufferReader.read(chars)); | ||
} | ||
|
||
public void testSkip() throws IOException { | ||
CharBuffer buffer = CharBuffer.wrap("abcdefghijklmnopqrst\u00A0"); | ||
Reader charBufferReader = new CharBufferReader(buffer); | ||
char[] chars = new char[12]; | ||
try (Reader reader = charBufferReader) { | ||
assertEquals(12, reader.read(chars)); | ||
assertArrayEquals("abcdefghijkl".toCharArray(), chars); | ||
assertEquals(4, reader.skip(4)); | ||
assertEquals(4, reader.read(chars, 3, 4)); | ||
assertArrayEquals("qrst".toCharArray(), Arrays.copyOfRange(chars, 3, 7)); | ||
assertEquals(1, reader.skip(Long.MAX_VALUE)); | ||
assertEquals(0, reader.skip(Integer.MAX_VALUE)); | ||
assertEquals(0, reader.skip(Long.MAX_VALUE)); | ||
assertEquals(0, reader.skip(0)); | ||
assertEquals(0, reader.skip(1)); | ||
} | ||
assertEquals(0, charBufferReader.skip(1)); | ||
} | ||
|
||
public void testInvalidSkip() throws IOException { | ||
try (Reader reader = new CharBufferReader(CharBuffer.wrap("test"))) { | ||
IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> reader.skip(-1)); | ||
assertEquals("number of characters to skip cannot be negative", exception.getMessage()); | ||
} | ||
} | ||
|
||
public void testReady() throws IOException { | ||
try (Reader reader = new CharBufferReader(CharBuffer.wrap("test"))) { | ||
assertEquals(true, reader.ready()); | ||
} | ||
} | ||
|
||
public void testMarkReset() throws IOException { | ||
char[] chars = new char[8]; | ||
try (Reader reader = new CharBufferReader(CharBuffer.wrap("test"))) { | ||
assertEquals(true, reader.markSupported()); | ||
reader.mark(3); | ||
assertEquals(3, reader.read(chars, 0, 3)); | ||
assertArrayEquals("tes".toCharArray(), Arrays.copyOf(chars, 3)); | ||
reader.reset(); | ||
reader.mark(Integer.MAX_VALUE); | ||
assertEquals(4, reader.read(chars)); | ||
assertArrayEquals("test".toCharArray(), Arrays.copyOf(chars, 4)); | ||
reader.reset(); | ||
Arrays.fill(chars, '\0'); | ||
assertEquals(4, reader.read(chars)); | ||
} | ||
} | ||
|
||
public void testInvalidMark() throws IOException { | ||
try (Reader reader = new CharBufferReader(CharBuffer.wrap("test"))) { | ||
IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> reader.mark(-1)); | ||
assertEquals("read ahead limit cannot be negative", exception.getMessage()); | ||
} | ||
} | ||
|
||
public void testClose() throws IOException { | ||
char[] chars = new char[2]; | ||
try (Reader reader = new CharBufferReader(CharBuffer.wrap("test"))) { | ||
assertEquals(2, reader.read(chars)); | ||
assertArrayEquals("te".toCharArray(), chars); | ||
reader.close(); | ||
Arrays.fill(chars, '\0'); | ||
assertEquals(-1, reader.read(chars)); | ||
assertArrayEquals("\0\0".toCharArray(), chars); | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder how this compares with constructing a string:
The string methods tend to perform very well due to extensive optimization in the jdk, however when we use a reader, the
char[]
used byCharBufferReader
may perform better since it's not converting between bytes and chars on read.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good call, will benchmark and test this out. For Latin-1/ASCII compressed strings, we'll eat the cost of a byte array copy on string creation and decode should be no-op, while non-Latin-1 case may be a bit more expensive.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The JsonEncoding class only supports Unicode charsets. UTF8, variants of UTF16, variants of UTF32. 7 bit ASCII is a subset of UTF8. Latin-1 is not supported.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FasterXML/jackson-benchmarks#9 (comment) shows
new String
toStringReader
is better, so I created #1081 as an alternative to this PR.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I should have been more precise. The
Latin-1
was intended to reference the single byte encoding compact strings JEP 254, but I should have saidnon-ASCII UTF-8
will be a bit more expensive in terms of memory overhead.