Skip to content

Commit 5fbd92c

Browse files
committed
minor refactoring
1 parent 9d8ce91 commit 5fbd92c

File tree

1 file changed

+75
-76
lines changed

1 file changed

+75
-76
lines changed

smile/src/main/java/com/fasterxml/jackson/dataformat/smile/async/NonBlockingByteArrayParser.java

Lines changed: 75 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@
66
import java.math.BigInteger;
77
import java.util.Arrays;
88

9-
import com.fasterxml.jackson.core.JsonParseException;
109
import com.fasterxml.jackson.core.JsonToken;
1110
import com.fasterxml.jackson.core.async.ByteArrayFeeder;
1211
import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
1312
import com.fasterxml.jackson.core.io.IOContext;
14-
import com.fasterxml.jackson.core.json.JsonReadContext;
1513
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
1614
import com.fasterxml.jackson.core.util.VersionUtil;
1715

@@ -938,86 +936,19 @@ protected final JsonToken _startLongUnicode() throws IOException
938936

939937
private final JsonToken _finishLongUnicode() throws IOException
940938
{
939+
// First things first: did we have partially decoded multi-byte UTF-8 character?
940+
if (_inputCopyLen > 0) {
941+
if (!_finishPartialUnicodeChar()) {
942+
return JsonToken.NOT_AVAILABLE;
943+
}
944+
}
945+
941946
final int[] codes = SmileConstants.sUtf8UnitLengths;
942947
int c;
943948
final byte[] inputBuffer = _inputBuffer;
944949
char[] outBuf = _textBuffer.getBufferWithoutReset();
945950
int outPtr = _textBuffer.getCurrentSegmentSize();
946951

947-
// First things first: did we have partially decoded multi-byte UTF-8 character?
948-
if (_inputCopyLen > 0) {
949-
// NOTE: first byte stored in `_pending32` and we know we got one more byte for sure
950-
int next = _inputBuffer[_inputPtr++];
951-
switch (codes[_pending32]) { // type of UTF-8 sequence (length - 1)
952-
case 1: // 2-byte UTF
953-
c = _decodeUTF8_2(_pending32, next);
954-
break;
955-
case 2: // 3-byte UTF: did we have one or two bytes?
956-
if (_inputCopyLen == 1) {
957-
if (_inputPtr >= _inputEnd) {
958-
_inputCopy[0] = (byte) next;
959-
_inputCopyLen = 2;
960-
return JsonToken.NOT_AVAILABLE;
961-
}
962-
c = _decodeUTF8_3(_pending32, next, _inputBuffer[_inputPtr++]);
963-
} else {
964-
c = _decodeUTF8_3(_pending32, _inputCopy[0], next);
965-
}
966-
break;
967-
case 3: // 4-byte UTF; had 1/2/3 bytes, now got 2/3/4
968-
switch (_inputCopyLen) {
969-
case 1:
970-
if (_inputPtr >= _inputEnd) {
971-
_inputCopy[0] = (byte) next;
972-
_inputCopyLen = 2;
973-
return JsonToken.NOT_AVAILABLE;
974-
}
975-
int i3 = _inputBuffer[_inputPtr++];
976-
if (_inputPtr >= _inputEnd) {
977-
_inputCopy[0] = (byte) next;
978-
_inputCopy[1] = (byte) i3;
979-
_inputCopyLen = 3;
980-
return JsonToken.NOT_AVAILABLE;
981-
}
982-
c = _decodeUTF8_4(_pending32, next, i3, _inputBuffer[_inputPtr++]);
983-
break;
984-
case 2:
985-
if (_inputPtr >= _inputEnd) {
986-
_inputCopy[1] = (byte) next;
987-
_inputCopyLen = 3;
988-
return JsonToken.NOT_AVAILABLE;
989-
}
990-
c = _decodeUTF8_4(_pending32, _inputCopy[0], next, _inputBuffer[_inputPtr++]);
991-
break;
992-
case 3:
993-
default:
994-
c = _decodeUTF8_4(_pending32, _inputCopy[0], _inputCopy[1], next);
995-
break;
996-
}
997-
// Let's add first part right away:
998-
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
999-
if (outPtr >= outBuf.length) {
1000-
outBuf = _textBuffer.finishCurrentSegment();
1001-
outPtr = 0;
1002-
}
1003-
c = 0xDC00 | (c & 0x3FF);
1004-
// And let the other char output down below
1005-
break;
1006-
default:
1007-
// Is this good enough error message?
1008-
_reportInvalidInitial(_pending32);
1009-
c = 0;
1010-
}
1011-
_inputCopyLen = 0; // just for safety
1012-
// Need more room?
1013-
if (outPtr >= outBuf.length) {
1014-
outBuf = _textBuffer.finishCurrentSegment();
1015-
outPtr = 0;
1016-
}
1017-
// Ok, let's add char to output:
1018-
outBuf[outPtr++] = (char) c;
1019-
}
1020-
1021952
main_loop:
1022953
while (true) {
1023954
// First the tight ASCII loop:
@@ -1115,6 +1046,74 @@ private final JsonToken _finishLongUnicode() throws IOException
11151046
return JsonToken.NOT_AVAILABLE;
11161047
}
11171048

1049+
private final boolean _finishPartialUnicodeChar() throws IOException
1050+
{
1051+
final int[] codes = SmileConstants.sUtf8UnitLengths;
1052+
int c;
1053+
1054+
// NOTE: first byte stored in `_pending32` and we know we got one more byte for sure
1055+
int next = _inputBuffer[_inputPtr++];
1056+
switch (codes[_pending32]) { // type of UTF-8 sequence (length - 1)
1057+
case 1: // 2-byte UTF
1058+
c = _decodeUTF8_2(_pending32, next);
1059+
break;
1060+
case 2: // 3-byte UTF: did we have one or two bytes?
1061+
if (_inputCopyLen == 1) {
1062+
if (_inputPtr >= _inputEnd) {
1063+
_inputCopy[0] = (byte) next;
1064+
_inputCopyLen = 2;
1065+
return false;
1066+
}
1067+
c = _decodeUTF8_3(_pending32, next, _inputBuffer[_inputPtr++]);
1068+
} else {
1069+
c = _decodeUTF8_3(_pending32, _inputCopy[0], next);
1070+
}
1071+
break;
1072+
case 3: // 4-byte UTF; had 1/2/3 bytes, now got 2/3/4
1073+
switch (_inputCopyLen) {
1074+
case 1:
1075+
if (_inputPtr >= _inputEnd) {
1076+
_inputCopy[0] = (byte) next;
1077+
_inputCopyLen = 2;
1078+
return false;
1079+
}
1080+
int i3 = _inputBuffer[_inputPtr++];
1081+
if (_inputPtr >= _inputEnd) {
1082+
_inputCopy[0] = (byte) next;
1083+
_inputCopy[1] = (byte) i3;
1084+
_inputCopyLen = 3;
1085+
return false;
1086+
}
1087+
c = _decodeUTF8_4(_pending32, next, i3, _inputBuffer[_inputPtr++]);
1088+
break;
1089+
case 2:
1090+
if (_inputPtr >= _inputEnd) {
1091+
_inputCopy[1] = (byte) next;
1092+
_inputCopyLen = 3;
1093+
return false;
1094+
}
1095+
c = _decodeUTF8_4(_pending32, _inputCopy[0], next, _inputBuffer[_inputPtr++]);
1096+
break;
1097+
case 3:
1098+
default:
1099+
c = _decodeUTF8_4(_pending32, _inputCopy[0], _inputCopy[1], next);
1100+
break;
1101+
}
1102+
// Let's add first part right away:
1103+
_textBuffer.append((char) (0xD800 | (c >> 10)));
1104+
c = 0xDC00 | (c & 0x3FF);
1105+
// And let the other char output down below
1106+
break;
1107+
default:
1108+
// Is this good enough error message?
1109+
_reportInvalidInitial(_pending32);
1110+
c = 0;
1111+
}
1112+
_inputCopyLen = 0; // just for safety
1113+
_textBuffer.append((char) c);
1114+
return true;
1115+
}
1116+
11181117
/*
11191118
/**********************************************************************
11201119
/* Internal methods, UTF8 decoding

0 commit comments

Comments
 (0)