|
6 | 6 | import java.math.BigInteger;
|
7 | 7 | import java.util.Arrays;
|
8 | 8 |
|
9 |
| -import com.fasterxml.jackson.core.JsonParseException; |
10 | 9 | import com.fasterxml.jackson.core.JsonToken;
|
11 | 10 | import com.fasterxml.jackson.core.async.ByteArrayFeeder;
|
12 | 11 | import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
|
13 | 12 | import com.fasterxml.jackson.core.io.IOContext;
|
14 |
| -import com.fasterxml.jackson.core.json.JsonReadContext; |
15 | 13 | import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
|
16 | 14 | import com.fasterxml.jackson.core.util.VersionUtil;
|
17 | 15 |
|
@@ -938,86 +936,19 @@ protected final JsonToken _startLongUnicode() throws IOException
|
938 | 936 |
|
939 | 937 | private final JsonToken _finishLongUnicode() throws IOException
|
940 | 938 | {
|
| 939 | + // First things first: did we have partially decoded multi-byte UTF-8 character? |
| 940 | + if (_inputCopyLen > 0) { |
| 941 | + if (!_finishPartialUnicodeChar()) { |
| 942 | + return JsonToken.NOT_AVAILABLE; |
| 943 | + } |
| 944 | + } |
| 945 | + |
941 | 946 | final int[] codes = SmileConstants.sUtf8UnitLengths;
|
942 | 947 | int c;
|
943 | 948 | final byte[] inputBuffer = _inputBuffer;
|
944 | 949 | char[] outBuf = _textBuffer.getBufferWithoutReset();
|
945 | 950 | int outPtr = _textBuffer.getCurrentSegmentSize();
|
946 | 951 |
|
947 |
| - // First things first: did we have partially decoded multi-byte UTF-8 character? |
948 |
| - if (_inputCopyLen > 0) { |
949 |
| - // NOTE: first byte stored in `_pending32` and we know we got one more byte for sure |
950 |
| - int next = _inputBuffer[_inputPtr++]; |
951 |
| - switch (codes[_pending32]) { // type of UTF-8 sequence (length - 1) |
952 |
| - case 1: // 2-byte UTF |
953 |
| - c = _decodeUTF8_2(_pending32, next); |
954 |
| - break; |
955 |
| - case 2: // 3-byte UTF: did we have one or two bytes? |
956 |
| - if (_inputCopyLen == 1) { |
957 |
| - if (_inputPtr >= _inputEnd) { |
958 |
| - _inputCopy[0] = (byte) next; |
959 |
| - _inputCopyLen = 2; |
960 |
| - return JsonToken.NOT_AVAILABLE; |
961 |
| - } |
962 |
| - c = _decodeUTF8_3(_pending32, next, _inputBuffer[_inputPtr++]); |
963 |
| - } else { |
964 |
| - c = _decodeUTF8_3(_pending32, _inputCopy[0], next); |
965 |
| - } |
966 |
| - break; |
967 |
| - case 3: // 4-byte UTF; had 1/2/3 bytes, now got 2/3/4 |
968 |
| - switch (_inputCopyLen) { |
969 |
| - case 1: |
970 |
| - if (_inputPtr >= _inputEnd) { |
971 |
| - _inputCopy[0] = (byte) next; |
972 |
| - _inputCopyLen = 2; |
973 |
| - return JsonToken.NOT_AVAILABLE; |
974 |
| - } |
975 |
| - int i3 = _inputBuffer[_inputPtr++]; |
976 |
| - if (_inputPtr >= _inputEnd) { |
977 |
| - _inputCopy[0] = (byte) next; |
978 |
| - _inputCopy[1] = (byte) i3; |
979 |
| - _inputCopyLen = 3; |
980 |
| - return JsonToken.NOT_AVAILABLE; |
981 |
| - } |
982 |
| - c = _decodeUTF8_4(_pending32, next, i3, _inputBuffer[_inputPtr++]); |
983 |
| - break; |
984 |
| - case 2: |
985 |
| - if (_inputPtr >= _inputEnd) { |
986 |
| - _inputCopy[1] = (byte) next; |
987 |
| - _inputCopyLen = 3; |
988 |
| - return JsonToken.NOT_AVAILABLE; |
989 |
| - } |
990 |
| - c = _decodeUTF8_4(_pending32, _inputCopy[0], next, _inputBuffer[_inputPtr++]); |
991 |
| - break; |
992 |
| - case 3: |
993 |
| - default: |
994 |
| - c = _decodeUTF8_4(_pending32, _inputCopy[0], _inputCopy[1], next); |
995 |
| - break; |
996 |
| - } |
997 |
| - // Let's add first part right away: |
998 |
| - outBuf[outPtr++] = (char) (0xD800 | (c >> 10)); |
999 |
| - if (outPtr >= outBuf.length) { |
1000 |
| - outBuf = _textBuffer.finishCurrentSegment(); |
1001 |
| - outPtr = 0; |
1002 |
| - } |
1003 |
| - c = 0xDC00 | (c & 0x3FF); |
1004 |
| - // And let the other char output down below |
1005 |
| - break; |
1006 |
| - default: |
1007 |
| - // Is this good enough error message? |
1008 |
| - _reportInvalidInitial(_pending32); |
1009 |
| - c = 0; |
1010 |
| - } |
1011 |
| - _inputCopyLen = 0; // just for safety |
1012 |
| - // Need more room? |
1013 |
| - if (outPtr >= outBuf.length) { |
1014 |
| - outBuf = _textBuffer.finishCurrentSegment(); |
1015 |
| - outPtr = 0; |
1016 |
| - } |
1017 |
| - // Ok, let's add char to output: |
1018 |
| - outBuf[outPtr++] = (char) c; |
1019 |
| - } |
1020 |
| - |
1021 | 952 | main_loop:
|
1022 | 953 | while (true) {
|
1023 | 954 | // First the tight ASCII loop:
|
@@ -1115,6 +1046,74 @@ private final JsonToken _finishLongUnicode() throws IOException
|
1115 | 1046 | return JsonToken.NOT_AVAILABLE;
|
1116 | 1047 | }
|
1117 | 1048 |
|
| 1049 | + private final boolean _finishPartialUnicodeChar() throws IOException |
| 1050 | + { |
| 1051 | + final int[] codes = SmileConstants.sUtf8UnitLengths; |
| 1052 | + int c; |
| 1053 | + |
| 1054 | + // NOTE: first byte stored in `_pending32` and we know we got one more byte for sure |
| 1055 | + int next = _inputBuffer[_inputPtr++]; |
| 1056 | + switch (codes[_pending32]) { // type of UTF-8 sequence (length - 1) |
| 1057 | + case 1: // 2-byte UTF |
| 1058 | + c = _decodeUTF8_2(_pending32, next); |
| 1059 | + break; |
| 1060 | + case 2: // 3-byte UTF: did we have one or two bytes? |
| 1061 | + if (_inputCopyLen == 1) { |
| 1062 | + if (_inputPtr >= _inputEnd) { |
| 1063 | + _inputCopy[0] = (byte) next; |
| 1064 | + _inputCopyLen = 2; |
| 1065 | + return false; |
| 1066 | + } |
| 1067 | + c = _decodeUTF8_3(_pending32, next, _inputBuffer[_inputPtr++]); |
| 1068 | + } else { |
| 1069 | + c = _decodeUTF8_3(_pending32, _inputCopy[0], next); |
| 1070 | + } |
| 1071 | + break; |
| 1072 | + case 3: // 4-byte UTF; had 1/2/3 bytes, now got 2/3/4 |
| 1073 | + switch (_inputCopyLen) { |
| 1074 | + case 1: |
| 1075 | + if (_inputPtr >= _inputEnd) { |
| 1076 | + _inputCopy[0] = (byte) next; |
| 1077 | + _inputCopyLen = 2; |
| 1078 | + return false; |
| 1079 | + } |
| 1080 | + int i3 = _inputBuffer[_inputPtr++]; |
| 1081 | + if (_inputPtr >= _inputEnd) { |
| 1082 | + _inputCopy[0] = (byte) next; |
| 1083 | + _inputCopy[1] = (byte) i3; |
| 1084 | + _inputCopyLen = 3; |
| 1085 | + return false; |
| 1086 | + } |
| 1087 | + c = _decodeUTF8_4(_pending32, next, i3, _inputBuffer[_inputPtr++]); |
| 1088 | + break; |
| 1089 | + case 2: |
| 1090 | + if (_inputPtr >= _inputEnd) { |
| 1091 | + _inputCopy[1] = (byte) next; |
| 1092 | + _inputCopyLen = 3; |
| 1093 | + return false; |
| 1094 | + } |
| 1095 | + c = _decodeUTF8_4(_pending32, _inputCopy[0], next, _inputBuffer[_inputPtr++]); |
| 1096 | + break; |
| 1097 | + case 3: |
| 1098 | + default: |
| 1099 | + c = _decodeUTF8_4(_pending32, _inputCopy[0], _inputCopy[1], next); |
| 1100 | + break; |
| 1101 | + } |
| 1102 | + // Let's add first part right away: |
| 1103 | + _textBuffer.append((char) (0xD800 | (c >> 10))); |
| 1104 | + c = 0xDC00 | (c & 0x3FF); |
| 1105 | + // And let the other char output down below |
| 1106 | + break; |
| 1107 | + default: |
| 1108 | + // Is this good enough error message? |
| 1109 | + _reportInvalidInitial(_pending32); |
| 1110 | + c = 0; |
| 1111 | + } |
| 1112 | + _inputCopyLen = 0; // just for safety |
| 1113 | + _textBuffer.append((char) c); |
| 1114 | + return true; |
| 1115 | + } |
| 1116 | + |
1118 | 1117 | /*
|
1119 | 1118 | /**********************************************************************
|
1120 | 1119 | /* Internal methods, UTF8 decoding
|
|
0 commit comments