@@ -97,30 +97,32 @@ void dumphex_string(const TString &sIn, const std::string &msgTag){
97
97
// belongs to one of the chars in trimChars.
98
98
//
99
99
// NOTE: This will work perfectly provided the string being trimmed as well as
100
- // chars being trimmed are made up of FixedSize chars from the same encoded space.
100
+ // chars being trimmed are made up of NativeCharSize chars from same encoded space.
101
101
// For utf-8, this means the ascii equivalent 1byteSized chars of utf8 and not
102
- // variable length ones.
103
- // NOTE: It will also work, if atleast either end of string have fixedSize chars
104
- // from their encoding space, rather than variable length based chars if any.
105
- // And the trimChars are also fixedSize encoded chars .
102
+ // variable length MultiNativeCharSize (ie multibye in case of utf-8) ones.
103
+ // NOTE: It will also work, if atleast either end of string as well as trimChars
104
+ // have NativeCharSize chars from their encoding space, rather than variable
105
+ // length MultiNativeCharSize based chars if any .
106
106
//
107
- // NOTE: Given the way UTF-8 char encoding is designed, where fixedSize 1byte
108
- // encoded chars are fully unique and dont overlap with any bytes from any of
109
- // the variable length encoded chars in the utf-8 space, so as long as the
110
- // trimChars belong to the fixedSize chars subset, the logic should work, even
111
- // if the string has a mixture of fixed and variable length encoded chars.
107
+ // NOTE: Given the way UTF-8 char encoding is designed, where NativeCharSize 1byte
108
+ // encoded chars are fully unique and dont overlap with any bytes from any of the
109
+ // variable length MultiNativeCharSize encoded chars in the utf-8 space, so as long as
110
+ // the trimChars belong to NativeCharSize chars subset, the logic should work, even
111
+ // if string has a mixture of NativeCharSize and MultiNativeCharSize encoded chars.
112
112
// Chances are utf-16 and utf-32 also have similar characteristics wrt thier
113
- // fixedSize encoded chars, and so equivalent semantic applies to them also.
113
+ // NativeCharSize encoded chars (ie fully encoded within single 16bit and 32bit value
114
+ // respectively), and so equivalent semantic applies to them also.
114
115
//
115
- // ALERT: Given that this simple minded logic, works at individual bytes level
116
- // only, If trimChars involve variable length encoded chars, then
117
- // * because different bytes from different trim chars when clubbed together
118
- // can map to some other new char, if there is that new char at either end
119
- // of the string, it may get trimmed, because of the possibility of mix up
120
- // mentioned.
121
- // * given that different variable length encoded chars may have some common
122
- // bytes between them, if one of these chars is at either end of the string
123
- // and another char is in trimChars, then string may get partially trimmed.
116
+ // ALERT: Given that this simple minded logic, works at individual NativeCharSize level
117
+ // only, If trimChars involve variable length MultiNativeCharSize encoded chars, then
118
+ // * because different NativeCharSize subparts (bytes in case of utf-8) from different
119
+ // MultiNativeCharSize trim chars when clubbed together can map to some other new char
120
+ // in a variable length encoded char space, if there is that new char at either end
121
+ // of the string, it may get trimmed, because of the possibility of mix up mentioned.
122
+ // * given that different variable length MultiNativeCharSize encoded chars may have
123
+ // some common NativeCharSize subparts (bytes in case of utf-8) between them, if one
124
+ // of these chars is at either end of the string and another char is in trimChars,
125
+ // then string may get partially trimmed.
124
126
//
125
127
template <typename TString>
126
128
TString str_trim_dumb (TString sin, const TString &trimChars=" \t\n " ) {
@@ -137,8 +139,10 @@ TString str_trim_dumb(TString sin, const TString &trimChars=" \t\n") {
137
139
// to one of the chars in trimChars.
138
140
// NOTE: Internally converts to wchar/wstring to try and support proper trimming,
139
141
// wrt possibly more languages, to some extent. IE even if the passed string
140
- // contains multibyte encoded characters in it in utf-8 space, it may get converted
141
- // to fixed size chars in the expanded wchar_t encoding space.
142
+ // contains multibyte encoded characters in it in utf-8 space (ie MultiNativeCharSize),
143
+ // it may get converted to NativeCharSize chars in the expanded wchar_t encoding space,
144
+ // thus leading to fixed NativeCharSize driven logic itself handling things sufficiently.
145
+ // Look at str_trim_dumb comments for additional aspects.
142
146
std::string str_trim_oversmart (std::string sIn , const std::string &trimChars=" \t\n " ) {
143
147
std::wstring wIn;
144
148
mbs_to_wcs (wIn, sIn );
@@ -152,8 +156,8 @@ std::string str_trim_oversmart(std::string sIn, const std::string &trimChars=" \
152
156
153
157
// Remove atmost 1 char at the begin and 1 char at the end of the passed string,
154
158
// provided the char belongs to one of the chars in trimChars.
155
- // NOTE: Chars being trimmed (ie in trimChars) needs to be 1byte encoded chars, to
156
- // avoid mix up when working utf-8/variable length encoded strings.
159
+ // NOTE: Chars being trimmed (ie in trimChars) needs to be FixedSize encoded chars,
160
+ // to avoid mix up when working with strings which can utf-8/variable length encoded strings.
157
161
// NOTE: This will work provided the string being trimmed as well the chars being
158
162
// trimmed are made up of 1byte encoded chars including in utf8 encoding space.
159
163
// If the string being trimmed includes multibyte encoded characters at the end,
0 commit comments