|
22 | 22 | *
|
23 | 23 | * It tries to provide a crude expanded form of array wrt any of the above supported types.
|
24 | 24 | * For this one needs to define keys using the pattern TheKeyName-0, TheKeyName-1, ....
|
| 25 | + * |
| 26 | + * ## Additional notes |
| 27 | + * |
| 28 | + * NativeCharSize encoded char refers to chars which fit within the size of char type in a given |
| 29 | + * type of c++ string or base bitsize of a encoding standard, like 1 byte in case of std::string, |
| 30 | + * utf-8, ... |
| 31 | + * * example english alphabets in utf-8 encoding space are 1byte chars, in its variable length |
| 32 | + * encoding space. |
| 33 | + * |
| 34 | + * MultiNativeCharSize encoded char refers to chars which occupy multiple base-char-bit-size of |
| 35 | + * a c++ string type or char encoding standard. |
| 36 | + * * example indian scripts alphabets in utf-8 encoding space occupy multiple bytes in its variable |
| 37 | + * length encoding space. |
| 38 | + * |
| 39 | + * Sane variable length encoding - refers to encoding where the values of NativeCharSized chars of |
| 40 | + * a char encoding space cant overlap with values in NativeCharSize subparts of MultiNativeCharSized |
| 41 | + * chars of the same char encoding standard. |
| 42 | + * * utf-8 shows this behaviour |
| 43 | + * * chances are utf-16 and utf-32 also show this behaviour (need to cross check once) |
25 | 44 | */
|
26 | 45 |
|
27 | 46 | #include <map>
|
@@ -163,6 +182,7 @@ std::string str_trim_oversmart(std::string sIn, const std::string &trimChars=" \
|
163 | 182 | // NOTE: Chars being trimmed (ie in trimChars) needs to be part of NativeCharSize
|
164 | 183 | // subset of the string's encoded char space, to avoid mix up when working with
|
165 | 184 | // strings which can be utf-8/utf-16/utf-32/sane-variable-length encoded strings.
|
| 185 | +// |
166 | 186 | // NOTE:UTF8: This will work provided the string being trimmed as well the chars
|
167 | 187 | // being trimmed are made up of 1byte encoded chars in case of utf8 encoding space.
|
168 | 188 | // If the string being trimmed includes multibyte (ie MultiNativeCharSize) encoded
|
@@ -193,13 +213,17 @@ TString str_trim_single(TString sin, const TString& trimChars=" \t\n") {
|
193 | 213 | return sin;
|
194 | 214 | }
|
195 | 215 |
|
196 |
| -// This works for 1byte encoded chars, including in utf8 encoding space. |
| 216 | +// This works for NativeCharSize encoded chars, including in utf8 encoding space. |
197 | 217 | // This wont work for multibyte encoded chars.
|
198 |
| -std::string str_tolower(const std::string &sin) { |
199 |
| - std::string sout; |
| 218 | +template <typename TString> |
| 219 | +TString str_tolower(const TString &sin) { |
| 220 | + TString sout; |
200 | 221 | sout.resize(sin.size());
|
201 |
| - std::transform(sin.begin(), sin.end(), sout.begin(), [](char c)->char {return std::tolower(c);}); |
202 |
| - //LDBUG_LN("DBUG:%s:%s:%s", __func__, sin.c_str(), sout.c_str()); |
| 222 | + std::transform(sin.begin(), sin.end(), sout.begin(), [](auto c)->auto {return std::tolower(c);}); |
| 223 | +#ifdef SC_DEBUG_VERBOSE |
| 224 | + dumphex_string(sin, std::format("DBUG:{}:in:", __func__)); |
| 225 | + dumphex_string(sout, std::format("DBUG:{}:out:", __func__)); |
| 226 | +#endif |
203 | 227 | return sout;
|
204 | 228 | }
|
205 | 229 |
|
|
0 commit comments