Skip to content

Commit 54857d5

Browse files
committed
Replace surrogate chars with U+FFFD
Also add some more unicode input tests.
1 parent 38dd99e commit 54857d5

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

app/src/main/java/com/termux/terminal/TerminalEmulator.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ public final class TerminalEmulator {
218218
*/
219219
private int mScrollCounter = 0;
220220

221-
private int mUtf8ToFollow, mUtf8Index;
221+
private byte mUtf8ToFollow, mUtf8Index;
222222
private final byte[] mUtf8InputBuffer = new byte[4];
223223

224224
public final TerminalColors mColors = new TerminalColors();
@@ -424,7 +424,11 @@ private void processByte(byte byteToProcess) {
424424
processCodePoint(/* escape (hexadecimal=0x1B, octal=033): */27);
425425
processCodePoint((codePoint & 0x7F) + 0x40);
426426
} else {
427-
if (Character.UNASSIGNED == Character.getType(codePoint)) codePoint = UNICODE_REPLACEMENT_CHAR;
427+
switch (Character.getType(codePoint)) {
428+
case Character.UNASSIGNED:
429+
case Character.SURROGATE:
430+
codePoint = UNICODE_REPLACEMENT_CHAR;
431+
}
428432
processCodePoint(codePoint);
429433
}
430434
}

app/src/test/java/com/termux/terminal/UnicodeInputTest.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,47 @@ public void testIllFormedUtf8SuccessorByteNotConsumed() throws Exception {
1212
withTerminalSized(5, 5);
1313
mTerminal.append(new byte[]{(byte) 0b11101111, (byte) 'a'}, 2);
1414
assertLineIs(0, ((char) TerminalEmulator.UNICODE_REPLACEMENT_CHAR) + "a ");
15+
16+
// https://code.google.com/p/chromium/issues/detail?id=212704
17+
byte[] input = new byte[]{
18+
(byte) 0x61, (byte) 0xF1,
19+
(byte) 0x80, (byte) 0x80,
20+
(byte) 0xe1, (byte) 0x80,
21+
(byte) 0xc2, (byte) 0x62,
22+
(byte) 0x80, (byte) 0x63,
23+
(byte) 0x80, (byte) 0xbf,
24+
(byte) 0x64
25+
};
26+
withTerminalSized(10, 2);
27+
mTerminal.append(input, input.length);
28+
assertLinesAre("a\uFFFD\uFFFD\uFFFDb\uFFFDc\uFFFD\uFFFDd", " ");
29+
30+
// Surrogate pairs.
31+
withTerminalSized(5, 2);
32+
input = new byte[]{
33+
(byte) 0xed, (byte) 0xa0,
34+
(byte) 0x80, (byte) 0xed,
35+
(byte) 0xad, (byte) 0xbf,
36+
(byte) 0xed, (byte) 0xae,
37+
(byte) 0x80, (byte) 0xed,
38+
(byte) 0xbf, (byte) 0xbf
39+
};
40+
mTerminal.append(input, input.length);
41+
assertLinesAre("\uFFFD\uFFFD\uFFFD\uFFFD ", " ");
42+
43+
// https://bugzilla.mozilla.org/show_bug.cgi?id=746900: "with this patch 0xe0 0x80 is decoded as two U+FFFDs,
44+
// but 0xe0 0xa0 is decoded as a single U+FFFD, and this is correct according to the "Best Practices", but IE
45+
// and Chrome (Version 22.0.1229.94) decode both of them as two U+FFFDs. Opera 12.11 decodes both of them as
46+
// one U+FFFD".
47+
withTerminalSized(5, 2);
48+
input = new byte[]{(byte) 0xe0, (byte) 0xa0, ' '};
49+
mTerminal.append(input, input.length);
50+
assertLinesAre("\uFFFD ", " ");
51+
52+
// withTerminalSized(5, 2);
53+
// input = new byte[]{(byte) 0xe0, (byte) 0x80, 'a'};
54+
// mTerminal.append(input, input.length);
55+
// assertLinesAre("\uFFFD\uFFFDa ", " ");
1556
}
1657

1758
public void testUnassignedCodePoint() throws UnsupportedEncodingException {

0 commit comments

Comments
 (0)