Skip to content

Commit c54609c

Browse files
authored
[test] Cleanup UTF tests. NFC (#21876)
1 parent b8f54f8 commit c54609c

File tree

6 files changed

+94
-79
lines changed

6 files changed

+94
-79
lines changed

test/benchmark/benchmark_utf16.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
#include <cassert>
1111
#include <emscripten.h>
1212

13+
EM_JS_DEPS(deps, "$UTF16ToString");
14+
1315
double test(const unsigned short *str) {
1416
double res = EM_ASM_DOUBLE({
1517
var t0 = _emscripten_get_now();
16-
var str = Module.UTF16ToString($0);
18+
var str = UTF16ToString($0);
1719
var t1 = _emscripten_get_now();
1820
out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
1921
return (t1-t0);

test/core/test_utf16.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <vector>
1111
#include <wchar.h>
1212

13+
EM_JS_DEPS(deps, "$UTF16ToString,$stringToUTF16");
14+
1315
// Roundtrip a (non-)null-terminated string between C++ and JS.
1416
EM_JS(void, roundtripString, (const char16_t* str, int strBytes, char16_t* result, int resultBytes), {
1517
#if __wasm64__

test/test_core.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5670,17 +5670,14 @@ def test_utf(self):
56705670
self.do_core_test('test_utf.c')
56715671

56725672
def test_utf32(self):
5673-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF32ToString', 'stringToUTF32', 'lengthBytesUTF32'])
56745673
self.do_runf('utf32.cpp', 'OK.')
56755674
self.do_runf('utf32.cpp', 'OK.', args=['-fshort-wchar'])
56765675

56775676
@crossplatform
56785677
def test_utf16(self):
5679-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF16ToString', 'stringToUTF16'])
56805678
self.do_runf('core/test_utf16.cpp', 'OK.')
56815679

56825680
def test_utf8(self):
5683-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF8ToString', 'stringToUTF8', 'AsciiToString', 'stringToAscii'])
56845681
self.do_runf('utf8.cpp', 'OK.')
56855682

56865683
@also_with_wasm_bigint
@@ -5689,26 +5686,25 @@ def test_utf8_textdecoder(self):
56895686
self.do_runf('benchmark/benchmark_utf8.c', 'OK.')
56905687

56915688
# Test that invalid character in UTF8 does not cause decoding to crash.
5692-
def test_utf8_invalid(self):
5693-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF8ToString', 'stringToUTF8'])
5694-
for decoder_mode in [[], ['-sTEXTDECODER']]:
5695-
self.emcc_args += decoder_mode
5696-
print(str(decoder_mode))
5697-
self.do_runf('utf8_invalid.cpp', 'OK.')
5689+
@parameterized({
5690+
'': [[]],
5691+
'textdecoder': [['-sTEXTDECODER']],
5692+
})
5693+
def test_utf8_invalid(self, args):
5694+
self.do_runf('utf8_invalid.cpp', 'OK.', emcc_args=args)
56985695

56995696
# Test that invalid character in UTF8 does not cause decoding to crash.
57005697
@no_asan('TODO: ASan support in minimal runtime')
5701-
def test_minimal_runtime_utf8_invalid(self):
5702-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF8ToString', 'stringToUTF8'])
5698+
@parameterized({
5699+
'': [[]],
5700+
'textdecoder': [['-sTEXTDECODER']],
5701+
})
5702+
def test_minimal_runtime_utf8_invalid(self, args):
57035703
self.set_setting('MINIMAL_RUNTIME')
57045704
self.emcc_args += ['--pre-js', test_file('minimal_runtime_exit_handling.js')]
5705-
for decoder_mode in [0, 1]:
5706-
self.set_setting('TEXTDECODER', decoder_mode)
5707-
print(str(decoder_mode))
5708-
self.do_runf('utf8_invalid.cpp', 'OK.')
5705+
self.do_runf('utf8_invalid.cpp', 'OK.', emcc_args=args)
57095706

57105707
def test_utf16_textdecoder(self):
5711-
self.set_setting('EXPORTED_RUNTIME_METHODS', ['UTF16ToString', 'stringToUTF16', 'lengthBytesUTF16'])
57125708
self.emcc_args += ['--embed-file', test_file('utf16_corpus.txt') + '@/utf16_corpus.txt']
57135709
self.do_runf('benchmark/benchmark_utf16.cpp', 'OK.')
57145710

test/utf32.cpp

Lines changed: 59 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,67 +12,75 @@
1212
typedef unsigned int utf32;
1313
typedef unsigned short utf16;
1414

15+
EM_JS_DEPS(deps, "$UTF32ToString,$stringToUTF32");
16+
1517
// This code tests that Unicode std::wstrings can be marshalled between C++ and JS.
1618
int main() {
17-
std::wstring wstr = L"abc\u2603\u20AC\U0002007C123 --- abc\u2603\u20AC\U0002007C123"; // U+2603 is snowman, U+20AC is the Euro sign, U+2007C is a Chinese Han character that looks like three raindrops.
19+
// U+2603 is snowman,
20+
// U+20AC is the Euro sign,
21+
// U+2007C is a Chinese Han character that looks like three raindrops.
22+
std::wstring wstr = L"abc\u2603\u20AC\U0002007C123 --- abc\u2603\u20AC\U0002007C123";
23+
24+
printf("sizeof(wchar_t): %d.\n", (int)sizeof(wchar_t));
1825

19-
printf("sizeof(wchar_t): %d.\n", (int)sizeof(wchar_t));
26+
if (sizeof(wchar_t) == 4) {
27+
utf32 *memory = new utf32[wstr.length()+1];
2028

21-
if (sizeof(wchar_t) == 4) {
22-
utf32 *memory = new utf32[wstr.length()+1];
29+
EM_ASM({
30+
var str = UTF32ToString($0);
31+
out(str);
32+
var numBytesWritten = stringToUTF32(str, $1, Number($2));
33+
if (numBytesWritten != 23*4) throw 'stringToUTF32 wrote an invalid length ' + numBytesWritten;
34+
}, wstr.c_str(), memory, (wstr.length()+1)*sizeof(utf32));
2335

24-
EM_ASM({
25-
var str = UTF32ToString($0);
26-
out(str);
27-
var numBytesWritten = stringToUTF32(str, $1, Number($2));
28-
if (numBytesWritten != 23*4) throw 'stringToUTF32 wrote an invalid length ' + numBytesWritten;
29-
}, wstr.c_str(), memory, (wstr.length()+1)*sizeof(utf32));
36+
// Compare memory to confirm that the string is intact after taking a route
37+
// through JS side.
38+
const utf32 *srcPtr = reinterpret_cast<const utf32 *>(wstr.c_str());
39+
for (int i = 0;; ++i) {
40+
assert(memory[i] == srcPtr[i]);
41+
if (srcPtr[i] == 0)
42+
break;
43+
}
3044

31-
// Compare memory to confirm that the string is intact after taking a route through JS side.
32-
const utf32 *srcPtr = reinterpret_cast<const utf32 *>(wstr.c_str());
33-
for(int i = 0;; ++i) {
34-
assert(memory[i] == srcPtr[i]);
35-
if (srcPtr[i] == 0)
36-
break;
37-
}
45+
EM_ASM({
46+
var str = UTF32ToString($0);
47+
out(str);
48+
var numBytesWritten = stringToUTF32(str, $1, Number($2));
49+
if (numBytesWritten != 5*4) throw 'stringToUTF32 wrote an invalid length ' + numBytesWritten;
50+
}, wstr.c_str(), memory, 6*sizeof(utf32));
51+
assert(memory[5] == 0);
3852

39-
EM_ASM({
40-
var str = UTF32ToString($0);
41-
out(str);
42-
var numBytesWritten = stringToUTF32(str, $1, Number($2));
43-
if (numBytesWritten != 5*4) throw 'stringToUTF32 wrote an invalid length ' + numBytesWritten;
44-
}, wstr.c_str(), memory, 6*sizeof(utf32));
45-
assert(memory[5] == 0);
53+
delete[] memory;
54+
} else {
55+
// sizeof(wchar_t) == 2, and we're building with -fshort-wchar.
56+
utf16 *memory = new utf16[2*wstr.length()+1];
4657

47-
delete[] memory;
48-
} else { // sizeof(wchar_t) == 2, and we're building with -fshort-wchar.
49-
utf16 *memory = new utf16[2*wstr.length()+1];
58+
EM_ASM({
59+
var str = UTF16ToString($0);
60+
out(str);
61+
var numBytesWritten = stringToUTF16(str, $1, $2);
62+
if (numBytesWritten != 25*2) throw 'stringToUTF16 wrote an invalid length ' + numBytesWritten;
63+
}, wstr.c_str(), memory, (2*wstr.length()+1)*sizeof(utf16));
5064

51-
EM_ASM({
52-
var str = UTF16ToString($0);
53-
out(str);
54-
var numBytesWritten = stringToUTF16(str, $1, $2);
55-
if (numBytesWritten != 25*2) throw 'stringToUTF16 wrote an invalid length ' + numBytesWritten;
56-
}, wstr.c_str(), memory, (2*wstr.length()+1)*sizeof(utf16));
65+
// Compare memory to confirm that the string is intact after taking a route
66+
// through JS side.
67+
const utf16 *srcPtr = reinterpret_cast<const utf16 *>(wstr.c_str());
68+
for (int i = 0;; ++i) {
69+
assert(memory[i] == srcPtr[i]);
70+
if (srcPtr[i] == 0)
71+
break;
72+
}
5773

58-
// Compare memory to confirm that the string is intact after taking a route through JS side.
59-
const utf16 *srcPtr = reinterpret_cast<const utf16 *>(wstr.c_str());
60-
for(int i = 0;; ++i) {
61-
assert(memory[i] == srcPtr[i]);
62-
if (srcPtr[i] == 0)
63-
break;
64-
}
74+
EM_ASM({
75+
var str = UTF16ToString($0);
76+
out(str);
77+
var numBytesWritten = stringToUTF16(str, $1, $2);
78+
if (numBytesWritten != 5*2) throw 'stringToUTF16 wrote an invalid length ' + numBytesWritten;
79+
}, wstr.c_str(), memory, 6*sizeof(utf16));
80+
assert(memory[5] == 0);
6581

66-
EM_ASM({
67-
var str = UTF16ToString($0);
68-
out(str);
69-
var numBytesWritten = stringToUTF16(str, $1, $2);
70-
if (numBytesWritten != 5*2) throw 'stringToUTF16 wrote an invalid length ' + numBytesWritten;
71-
}, wstr.c_str(), memory, 6*sizeof(utf16));
72-
assert(memory[5] == 0);
73-
74-
delete[] memory;
75-
}
82+
delete[] memory;
83+
}
7684

77-
printf("OK.\n");
85+
printf("OK.\n");
7886
}

test/utf8.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <cassert>
1111
#include <emscripten.h>
1212

13+
EM_JS_DEPS(deps, "$UTF8ToString,$stringToUTF8,$AsciiToString,$stringToAscii");
14+
1315
// This code tests that Unicode std::wstrings can be marshalled between C++ and JS.
1416
int main() {
1517
const char latin1String[] = "\x26\xA0\xF7";
@@ -45,9 +47,11 @@ int main() {
4547
if (numBytesWritten != 69) throw 'stringToUTF8 wrote an invalid length ' + numBytesWritten;
4648
}, utf8String, utf8String2, 128);
4749
assert(strlen(utf8String) == strlen(utf8String2));
48-
for(int i = 0; i < strlen(utf8String)+1; ++i)
49-
if (utf8String[i] != utf8String2[i])
50+
for (int i = 0; i < strlen(utf8String)+1; ++i) {
51+
if (utf8String[i] != utf8String2[i]) {
5052
printf("i=%d:%u,%u\n", i, (unsigned int)(unsigned char)utf8String[i], (unsigned int)(unsigned char)utf8String2[i]);
53+
}
54+
}
5155
assert(!strcmp(utf8String, utf8String2));
5256

5357
// Test that text gets properly cut off if output buffer is too small.

test/utf8_invalid.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
#include <emscripten/emscripten.h>
22
#include <stdio.h>
33

4-
int main()
5-
{
6-
char ch[256] = {};
7-
for(int i = 0; i < 255; ++i)
8-
ch[i] = i+1;
9-
int totalLen = 0;
10-
for(int i = 0; i < 256; ++i)
11-
totalLen += EM_ASM_INT({return UTF8ToString($0).length}, ch);
12-
printf("OK. Length: %d\n", totalLen);
4+
EM_JS_DEPS(deps, "$UTF8ToString");
5+
6+
int main() {
7+
char ch[256] = {};
8+
for (int i = 0; i < 255; ++i) {
9+
ch[i] = i+1;
10+
}
11+
int totalLen = 0;
12+
for (int i = 0; i < 256; ++i) {
13+
totalLen += EM_ASM_INT({return UTF8ToString($0).length}, ch);
14+
}
15+
printf("OK. Length: %d\n", totalLen);
1316
}

0 commit comments

Comments
 (0)