Skip to content

Commit 446ec01

Browse files
authored
Issue #170 Fix src/library/charset tests (#261)
1 parent bf899ce commit 446ec01

File tree

11 files changed

+194
-102
lines changed

11 files changed

+194
-102
lines changed

src/library/charset/ci_string_ut.cpp

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/library/charset/lite/ut/ya.make

Lines changed: 0 additions & 11 deletions
This file was deleted.

src/library/charset/ut/ya.make

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/library/charset/wide.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@ bool CanBeEncoded(std::u16string_view text, ECharset encoding) {
77
wchar16 decodeBuf[BUFSIZE];
88

99
while (!text.empty()) {
10-
std::u16string_view src = text;
11-
src.remove_prefix(LEN);
10+
std::u16string_view src = text.substr(0, LEN);
11+
text.remove_prefix(std::min(LEN, text.size()));
1212
std::string_view encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding);
1313
std::u16string_view decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding);
14-
if (decoded != src)
14+
if (decoded != src) {
1515
return false;
16+
}
1617
}
1718

1819
return true;

tests/library/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_subdirectory(cache)
22
add_subdirectory(case_insensitive_string)
33
add_subdirectory(cgiparam)
4+
add_subdirectory(charset)
45
add_subdirectory(containers)
56
add_subdirectory(coroutine)
67
add_subdirectory(json)

tests/library/charset/CMakeLists.txt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
add_ydb_test(NAME library-charset-ci_string_ut
2+
SOURCES
3+
ci_string_ut.cpp
4+
INCLUDE_DIRS
5+
${YDB_SDK_SOURCE_DIR}/src/library/charset
6+
LINK_LIBRARIES
7+
library-charset-lite
8+
cpp-testing-unittest_main
9+
)
10+
11+
add_ydb_test(NAME library-charset-codepage_ut
12+
SOURCES
13+
codepage_ut.cpp
14+
INCLUDE_DIRS
15+
${YDB_SDK_SOURCE_DIR}/src/library/charset
16+
LINK_LIBRARIES
17+
library-charset
18+
cpp-testing-unittest_main
19+
)
20+
21+
add_ydb_test(NAME library-charset-iconv_ut
22+
SOURCES
23+
iconv_ut.cpp
24+
INCLUDE_DIRS
25+
${YDB_SDK_SOURCE_DIR}/src/library/charset
26+
LINK_LIBRARIES
27+
library-charset
28+
cpp-testing-unittest_main
29+
)
30+
31+
add_ydb_test(NAME library-charset-recyr_int_ut
32+
SOURCES
33+
recyr_int_ut.cpp
34+
INCLUDE_DIRS
35+
${YDB_SDK_SOURCE_DIR}/src/library/charset
36+
LINK_LIBRARIES
37+
library-charset
38+
cpp-testing-unittest_main
39+
)
40+
41+
add_ydb_test(NAME library-charset-wide_ut
42+
SOURCES
43+
wide_ut.cpp
44+
INCLUDE_DIRS
45+
${YDB_SDK_SOURCE_DIR}/src/library/charset
46+
LINK_LIBRARIES
47+
library-charset
48+
cpp-testing-unittest_main
49+
)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include "ci_string.h"
2+
3+
#include <src/library/testing/unittest/registar.h>
4+
5+
Y_UNIT_TEST_SUITE(TCiStringTest) {
6+
Y_UNIT_TEST(TestOperators) {
7+
TCiString s("0123456");
8+
9+
const auto x = "x";
10+
const auto y = "y";
11+
const auto z = "z";
12+
13+
// operator +=
14+
s += TCiString(x);
15+
UNIT_ASSERT(s == "0123456x");
16+
17+
s += y;
18+
UNIT_ASSERT(s == "0123456xy");
19+
20+
s += *z;
21+
UNIT_ASSERT(s == "0123456xyz");
22+
23+
// operator +
24+
s = "0123456";
25+
s = s + TCiString(x);
26+
UNIT_ASSERT(s == "0123456x");
27+
28+
s = s + y;
29+
UNIT_ASSERT(s == "0123456xy");
30+
31+
s = s + *z;
32+
UNIT_ASSERT(s == "0123456xyz");
33+
34+
// operator !=
35+
s = "012345";
36+
const auto xyz = "xyz";
37+
UNIT_ASSERT(s != TCiString(xyz));
38+
UNIT_ASSERT(s != xyz);
39+
UNIT_ASSERT(xyz != s);
40+
41+
// operator <
42+
UNIT_ASSERT_EQUAL(s < TCiString(xyz), true);
43+
UNIT_ASSERT_EQUAL(s < xyz, true);
44+
UNIT_ASSERT_EQUAL(xyz < s, false);
45+
46+
// operator <=
47+
UNIT_ASSERT_EQUAL(s <= TCiString(xyz), true);
48+
UNIT_ASSERT_EQUAL(s <= xyz, true);
49+
UNIT_ASSERT_EQUAL(xyz <= s, false);
50+
51+
// operator >
52+
UNIT_ASSERT_EQUAL(s > TCiString(xyz), false);
53+
UNIT_ASSERT_EQUAL(s > xyz, false);
54+
UNIT_ASSERT_EQUAL(xyz > s, true);
55+
56+
// operator >=
57+
UNIT_ASSERT_EQUAL(s >= TCiString(xyz), false);
58+
UNIT_ASSERT_EQUAL(s >= xyz, false);
59+
UNIT_ASSERT_EQUAL(xyz >= s, true);
60+
}
61+
62+
Y_UNIT_TEST(TestOperatorsCI) {
63+
TCiString s("ABCD");
64+
UNIT_ASSERT(s > "abc0123456xyz");
65+
UNIT_ASSERT("abc0123456xyz" < s);
66+
UNIT_ASSERT(s == "abcd");
67+
UNIT_ASSERT("abcd" == s);
68+
}
69+
70+
Y_UNIT_TEST(TestSpecial) {
71+
TCiString ss = "0123456"; // type 'TCiString' is used as is
72+
size_t hash_val = THash<TCiString>{}(ss);
73+
UNIT_ASSERT(hash_val == 1489244);
74+
}
75+
}

src/library/charset/codepage_ut.cpp renamed to tests/library/charset/codepage_ut.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#include "codepage.h"
2-
#include "wide.h"
32

4-
#include <src/library/testing/unittest/registar.h>
3+
#include <ydb-cpp-sdk/util/system/yassert.h>
54

65
#include <src/util/charset/utf8.h>
7-
#include <ydb-cpp-sdk/util/system/yassert.h>
6+
7+
#include <src/library/testing/unittest/registar.h>
88

99
#if defined(_MSC_VER)
1010
#pragma warning(disable : 4309) /*truncation of constant value*/
@@ -72,14 +72,15 @@ void TCodepageTest::TestUTF() {
7272
size_t rune_len;
7373
size_t ref_len = 0;
7474

75-
if (i < 0x80)
75+
if (i < 0x80) {
7676
ref_len = 1;
77-
else if (i < 0x800)
77+
} else if (i < 0x800) {
7878
ref_len = 2;
79-
else if (i < 0x10000)
79+
} else if (i < 0x10000) {
8080
ref_len = 3;
81-
else
81+
} else {
8282
ref_len = 4;
83+
}
8384

8485
RECODE_RESULT res = SafeWriteUTF8Char(i, rune_len, buffer, buffer + 32);
8586
UNIT_ASSERT(res == RECODE_OK);
@@ -143,7 +144,7 @@ void TCodepageTest::TestUTF() {
143144
};
144145
for (size_t i = 0; i < Y_ARRAY_SIZE(badStrings); ++i) {
145146
wchar32 rune;
146-
const ui8* p = (const ui8*)badStrings[i];
147+
const ui8* p = reinterpret_cast<const ui8*>(badStrings[i]);
147148
size_t len;
148149
RECODE_RESULT res = SafeReadUTF8Char(rune, len, p, p + strlen(badStrings[i]));
149150
UNIT_ASSERT(res == RECODE_BROKENSYMBOL);

src/library/charset/iconv_ut.cpp renamed to tests/library/charset/iconv_ut.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#include <src/library/testing/unittest/registar.h>
66

77
static void TestIconv(const std::string& utf8, const std::string& other, ECharset enc) {
8-
TUtf16String wide0 = CharToWide(utf8, CODES_UTF8);
9-
TUtf16String wide1 = CharToWide(other, enc);
8+
std::u16string wide0 = CharToWide(utf8, CODES_UTF8);
9+
std::u16string wide1 = CharToWide(other, enc);
1010

1111
UNIT_ASSERT(wide0 == wide1);
1212

@@ -25,13 +25,13 @@ static void TestIconv(const std::string& utf8, const std::string& other, ECharse
2525
size_t read = 0;
2626
size_t written = 0;
2727

28-
RECODE_RESULT res = RecodeToUnicode(enc, other.c_str(), wide1.begin(), other.size(), wide1.size(), read, written);
28+
RECODE_RESULT res = RecodeToUnicode(enc, other.c_str(), wide1.data(), other.size(), wide1.size(), read, written);
2929
UNIT_ASSERT(res == RECODE_OK);
3030
UNIT_ASSERT(read == other.size());
3131
UNIT_ASSERT(written == wide1.size());
3232
UNIT_ASSERT(wide0 == wide1);
3333

34-
res = RecodeFromUnicode(enc, wide0.c_str(), temp.begin(), wide0.size(), temp.size(), read, written);
34+
res = RecodeFromUnicode(enc, wide0.c_str(), temp.data(), wide0.size(), temp.size(), read, written);
3535
UNIT_ASSERT(res == RECODE_OK);
3636
UNIT_ASSERT(read == wide0.size());
3737
UNIT_ASSERT(written == other.size());

src/library/charset/recyr_int_ut.cpp renamed to tests/library/charset/recyr_int_ut.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
#include "recyr.hh"
33
#include "wide.h"
44

5-
#include <src/library/testing/unittest/registar.h>
5+
#include <ydb-cpp-sdk/util/system/yassert.h>
66

77
#include <src/util/charset/utf8.h>
8-
#include <ydb-cpp-sdk/util/system/yassert.h>
8+
9+
#include <src/library/testing/unittest/registar.h>
910

1011
class TRecyr_intTest: public TTestBase {
1112
private:
@@ -83,8 +84,9 @@ void TRecyr_intTest::TestUTFFromUnknownPlane() {
8384
DecodeUnknownPlane(charbuffer, charbufferend, CODES_UTF8);
8485

8586
UNIT_ASSERT(charbufferend == charbuffer + samplelen);
86-
for (size_t i = 0; i < samplelen; ++i)
87+
for (size_t i = 0; i < samplelen; ++i) {
8788
UNIT_ASSERT(sampletext[i] == charbuffer[i]);
89+
}
8890

8991
// Now, concatenate the thing with an explicit character and retest
9092
res = RecodeToUnicode(CODES_UNKNOWNPLANE, bytebuffer, charbuffer, writtenbytes, BUFFER_SIZE, readbytes, writtenchars);
@@ -114,9 +116,9 @@ void TRecyr_intTest::TestUTFFromUnknownPlane() {
114116

115117
// test TChar version
116118
// bytebuffer of len writtenbytes contains sampletext of len samplelen chars in utf8
117-
TUtf16String wtr = CharToWide(std::string_view(bytebuffer, writtenbytes), CODES_UNKNOWNPLANE);
118-
TChar* strend = wtr.begin() + wtr.size();
119-
DecodeUnknownPlane(wtr.begin(), strend, CODES_UTF8);
119+
std::u16string wtr = CharToWide(std::string_view(bytebuffer, writtenbytes), CODES_UNKNOWNPLANE);
120+
TChar* strend = wtr.data() + wtr.size();
121+
DecodeUnknownPlane(wtr.data(), strend, CODES_UTF8);
120122
wtr.resize(strend - wtr.data(), 'Q');
121123
UNIT_ASSERT_VALUES_EQUAL(wtr.size(), samplelen);
122124
for (size_t i = 0; i < wtr.size(); ++i) {

0 commit comments

Comments
 (0)