Skip to content

Commit c646932

Browse files
authored
ENH: String Utilities Split API Optimization (#22)
* initial string functionality port * Strip out unsupported optimizations [compiling, passing]
1 parent 17220a5 commit c646932

File tree

1 file changed

+137
-35
lines changed

1 file changed

+137
-35
lines changed

Source/EbsdLib/Utilities/EbsdStringUtils.hpp

Lines changed: 137 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,25 @@
4848

4949
namespace EbsdStringUtils
5050
{
51-
52-
using StringTokenType = std::vector<std::string>;
53-
54-
// 5 statements
55-
template <class InputIt, class ForwardIt, class BinOp>
56-
void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, BinOp binary_op)
51+
namespace detail
52+
{
53+
template <bool ProcessEmptyV, class InputIt, class ForwardIt, typename TokenT>
54+
void tokenize(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_last, std::vector<TokenT>& tokens)
5755
{
5856
while(true)
5957
{
6058
const auto pos = std::find_first_of(first, last, s_first, s_last);
61-
binary_op(first, pos);
59+
if(first != pos)
60+
{
61+
tokens.emplace_back(std::string{first, pos});
62+
}
63+
else
64+
{
65+
if constexpr(ProcessEmptyV)
66+
{
67+
tokens.emplace_back("");
68+
}
69+
}
6270
if(pos == last)
6371
{
6472
break;
@@ -67,39 +75,137 @@ void for_each_token(InputIt first, InputIt last, ForwardIt s_first, ForwardIt s_
6775
}
6876
}
6977

70-
// 2 statements
71-
inline StringTokenType split(const std::string& str, char delim)
78+
template <bool ConsecutiveAsEmptyV, bool EmptyInitialV, bool EmptyFinalV>
79+
struct SplitTypeOptions
7280
{
73-
StringTokenType tokens;
74-
std::string temp(str);
75-
std::array<char, 1> delims = {delim};
76-
auto endPos = std::end(temp);
77-
for_each_token(std::begin(temp), endPos, std::cbegin(delims), std::cend(delims), [&endPos, &tokens](auto first, auto second) {
78-
if(first != second)
81+
static inline constexpr bool AllowConsecutiveAsEmpty = ConsecutiveAsEmptyV;
82+
static inline constexpr bool AllowEmptyInital = EmptyInitialV;
83+
static inline constexpr bool AllowEmptyFinal = EmptyFinalV;
84+
};
85+
86+
using SplitIgnoreEmpty = SplitTypeOptions<false, false, false>;
87+
using SplitAllowAll = SplitTypeOptions<true, true, true>;
88+
using SplitNoStripIgnoreConsecutive = SplitTypeOptions<false, true, true>;
89+
using SplitOnlyConsecutive = SplitTypeOptions<true, false, false>;
90+
using SplitAllowEmptyLeftAnalyze = SplitTypeOptions<true, true, false>;
91+
using SplitAllowEmptyRightAnalyze = SplitTypeOptions<true, false, true>;
92+
93+
template <class SplitTypeOptionsV = SplitIgnoreEmpty>
94+
inline std::vector<std::string> optimized_split(std::string_view str, std::vector<char>&& delimiters)
95+
{
96+
if(str.empty())
97+
{
98+
return {};
99+
}
100+
auto endPos = str.end();
101+
auto startPos = str.begin();
102+
103+
std::vector<std::string> tokens;
104+
tokens.reserve(str.size() / 2);
105+
106+
if constexpr(SplitTypeOptionsV::AllowEmptyInital)
107+
{
108+
if(std::find(delimiters.cbegin(), delimiters.cend(), str[0]) != delimiters.cend())
79109
{
80-
if(second != endPos)
110+
tokens.emplace_back("");
111+
startPos++;
112+
}
113+
}
114+
115+
if constexpr(!SplitTypeOptionsV::AllowEmptyFinal)
116+
{
117+
if(std::find(delimiters.cbegin(), delimiters.cend(), str[str.size() - 1]) != delimiters.cend())
118+
{
119+
endPos--;
120+
}
121+
}
122+
123+
if constexpr(!SplitTypeOptionsV::AllowConsecutiveAsEmpty)
124+
{
125+
tokenize<false>(startPos, endPos, delimiters.cbegin(), delimiters.cend(), tokens);
126+
if constexpr(SplitTypeOptionsV::AllowEmptyFinal)
127+
{
128+
if(std::find(delimiters.cbegin(), delimiters.cend(), str[str.size() - 1]) != delimiters.cend())
81129
{
82-
*second = '\0';
130+
tokens.emplace_back("");
83131
}
84-
tokens.push_back({&*first});
85132
}
86-
});
87-
// std::cout << "Tokens: " << tokens.size() << std::endl;
133+
}
134+
else
135+
{
136+
if constexpr(!SplitTypeOptionsV::AllowEmptyInital)
137+
{
138+
if(std::find(delimiters.cbegin(), delimiters.cend(), str[0]) != delimiters.cend())
139+
{
140+
startPos++;
141+
}
142+
}
143+
tokenize<true>(startPos, endPos, delimiters.cbegin(), delimiters.cend(), tokens);
144+
}
145+
146+
tokens.shrink_to_fit();
147+
148+
// No Delimiters found
149+
if(tokens.empty())
150+
{
151+
tokens.emplace_back(str);
152+
}
153+
88154
return tokens;
89155
}
156+
} // namespace detail
157+
158+
inline const std::string k_Whitespaces = " \t\f\v\n\r";
159+
160+
using StringTokenType = std::vector<std::string>;
90161

91-
inline StringTokenType split_2(const std::string& line, char delimiter)
162+
enum SplitType : uint8_t
92163
{
93-
std::stringstream ss(line);
164+
IgnoreEmpty,
165+
AllowAll,
166+
NoStripIgnoreConsecutive,
167+
OnlyConsecutive,
168+
AllowEmptyLeftAnalyze,
169+
AllowEmptyRightAnalyze
170+
};
94171

95-
StringTokenType tokens;
96-
std::string temp_str;
172+
inline std::vector<std::string> specific_split(std::string_view str, std::vector<char>&& delimiters, SplitType splitType)
173+
{
174+
switch(splitType)
175+
{
176+
case IgnoreEmpty:
177+
return detail::optimized_split<detail::SplitIgnoreEmpty>(str, std::move(delimiters));
178+
case AllowAll:
179+
return detail::optimized_split<detail::SplitAllowAll>(str, std::move(delimiters));
180+
case NoStripIgnoreConsecutive:
181+
return detail::optimized_split<detail::SplitNoStripIgnoreConsecutive>(str, std::move(delimiters));
182+
case OnlyConsecutive:
183+
return detail::optimized_split<detail::SplitOnlyConsecutive>(str, std::move(delimiters));
184+
case AllowEmptyLeftAnalyze:
185+
return detail::optimized_split<detail::SplitAllowEmptyLeftAnalyze>(str, std::move(delimiters));
186+
case AllowEmptyRightAnalyze:
187+
return detail::optimized_split<detail::SplitAllowEmptyRightAnalyze>(str, std::move(delimiters));
188+
}
97189

98-
while(getline(ss, temp_str, delimiter))
190+
return {};
191+
}
192+
193+
inline std::vector<std::string> split(std::string_view str, std::vector<char>&& delimiters, bool consecutiveDelimiters)
194+
{
195+
if(consecutiveDelimiters)
99196
{
100-
tokens.push_back(temp_str);
197+
// Split Allow All was selected to match QString's base split functionality
198+
return detail::optimized_split<detail::SplitAllowAll>(str, std::move(delimiters));
199+
}
200+
else
201+
{
202+
return detail::optimized_split<detail::SplitIgnoreEmpty>(str, std::move(delimiters));
101203
}
102-
return tokens;
204+
}
205+
206+
inline std::vector<std::string> split(std::string_view str, char delim)
207+
{
208+
return detail::optimized_split<detail::SplitIgnoreEmpty>(str, std::vector<char>{delim});
103209
}
104210

105211
inline std::string replace(std::string str, const std::string& from, const std::string& to)
@@ -120,8 +226,7 @@ inline std::string ltrim(const std::string& s)
120226
{
121227
return out;
122228
}
123-
std::string whitespaces(" \t\f\v\n\r");
124-
std::string::size_type front = out.find_first_not_of(whitespaces);
229+
std::string::size_type front = out.find_first_not_of(k_Whitespaces);
125230
if(front != std::string::npos)
126231
{
127232
out = out.substr(front);
@@ -140,8 +245,7 @@ inline std::string rtrim(const std::string& s)
140245
{
141246
return out;
142247
}
143-
std::string whitespaces(" \t\f\v\n\r");
144-
std::string::size_type back = out.find_last_not_of(whitespaces);
248+
std::string::size_type back = out.find_last_not_of(k_Whitespaces);
145249
if(back != std::string::npos)
146250
{
147251
out.erase(back + 1);
@@ -160,8 +264,7 @@ inline std::string trimmed(const std::string& s)
160264
{
161265
return out;
162266
}
163-
std::string whitespaces(" \t\f\v\n\r");
164-
std::string::size_type back = out.find_last_not_of(whitespaces);
267+
std::string::size_type back = out.find_last_not_of(k_Whitespaces);
165268
if(back != std::string::npos)
166269
{
167270
out.erase(back + 1);
@@ -170,7 +273,7 @@ inline std::string trimmed(const std::string& s)
170273
{
171274
out.clear();
172275
}
173-
std::string::size_type front = out.find_first_not_of(whitespaces);
276+
std::string::size_type front = out.find_first_not_of(k_Whitespaces);
174277
if(front != std::string::npos)
175278
{
176279
out = out.substr(front);
@@ -217,5 +320,4 @@ inline std::string simplified(const std::string& text)
217320
}
218321
return finalString;
219322
}
220-
221323
} // namespace EbsdStringUtils

0 commit comments

Comments
 (0)