@@ -86,52 +86,118 @@ impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
86
86
type Output = String;
87
87
88
88
fn concat(&self) -> String {
89
- if self.is_empty() {
90
- return String::new();
91
- }
92
-
93
- // `len` calculation may overflow but push_str will check boundaries
94
- let len = self.iter().map(|s| s.borrow().len()).sum();
95
- let mut result = String::with_capacity(len);
96
-
97
- for s in self {
98
- result.push_str(s.borrow())
99
- }
100
-
101
- result
89
+ self.join("")
102
90
}
103
91
104
92
fn join(&self, sep: &str) -> String {
105
- if self.is_empty() {
106
- return String::new();
93
+ unsafe {
94
+ String::from_utf8_unchecked( join_generic_copy(self, sep.as_bytes()) )
107
95
}
96
+ }
108
97
109
- // concat is faster
110
- if sep.is_empty() {
111
- return self.concat();
112
- }
98
+ fn connect(&self, sep: &str) -> String {
99
+ self.join(sep)
100
+ }
101
+ }
113
102
114
- // this is wrong without the guarantee that `self` is non-empty
115
- // `len` calculation may overflow but push_str but will check boundaries
116
- let len = sep.len() * (self.len() - 1) +
117
- self.iter().map(|s| s.borrow().len()).sum::<usize>();
118
- let mut result = String::with_capacity(len);
119
- let mut first = true;
103
+ macro_rules! spezialize_for_lengths {
104
+ ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {
105
+ let mut target = $target;
106
+ let iter = $iter;
107
+ let sep_len = $separator.len();
108
+ let sep_bytes = $separator;
109
+ match $separator.len() {
110
+ $(
111
+ // loops with hardcoded sizes run much faster
112
+ // specialize the cases with small separator lengths
113
+ $num => {
114
+ for s in iter {
115
+ target.get_unchecked_mut(..$num)
116
+ .copy_from_slice(sep_bytes);
117
+
118
+ let s_bytes = s.borrow().as_ref();
119
+ let offset = s_bytes.len();
120
+ target = {target}.get_unchecked_mut($num..);
121
+ target.get_unchecked_mut(..offset)
122
+ .copy_from_slice(s_bytes);
123
+ target = {target}.get_unchecked_mut(offset..);
124
+ }
125
+ },
126
+ )*
127
+ 0 => {
128
+ // concat, same principle without the separator
129
+ for s in iter {
130
+ let s_bytes = s.borrow().as_ref();
131
+ let offset = s_bytes.len();
132
+ target.get_unchecked_mut(..offset)
133
+ .copy_from_slice(s_bytes);
134
+ target = {target}.get_unchecked_mut(offset..);
135
+ }
136
+ },
137
+ _ => {
138
+ // arbitrary non-zero size fallback
139
+ for s in iter {
140
+ target.get_unchecked_mut(..sep_len)
141
+ .copy_from_slice(sep_bytes);
142
+
143
+ let s_bytes = s.borrow().as_ref();
144
+ let offset = s_bytes.len();
145
+ target = {target}.get_unchecked_mut(sep_len..);
146
+ target.get_unchecked_mut(..offset)
147
+ .copy_from_slice(s_bytes);
148
+ target = {target}.get_unchecked_mut(offset..);
149
+ }
150
+ }
151
+ }
152
+ };
153
+ }
120
154
121
- for s in self {
122
- if first {
123
- first = false;
124
- } else {
125
- result.push_str(sep);
155
+ // Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec
156
+ // Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262)
157
+ // For this reason SliceConcatExt<T> is not specialized for T: Copy and SliceConcatExt<str> is the
158
+ // only user of this function. It is left in place for the time when that is fixed.
159
+ //
160
+ // the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]>
161
+ // [T] and str both impl AsRef<[T]> for some T
162
+ // => s.borrow().as_ref() and we always have slices
163
+ fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T>
164
+ where
165
+ T: Copy,
166
+ B: AsRef<[T]> + ?Sized,
167
+ S: Borrow<B>,
168
+ {
169
+ let sep_len = sep.len();
170
+ let mut iter = slice.iter();
171
+ iter.next().map_or(vec![], |first| {
172
+ // this is wrong without the guarantee that `slice` is non-empty
173
+ // if the `len` calculation overflows, we'll panic
174
+ // we would have run out of memory anyway and the rest of the function requires
175
+ // the entire String pre-allocated for safety
176
+ //
177
+ // this is the exact len of the resulting String
178
+ let len = sep_len.checked_mul(slice.len() - 1).and_then(|n| {
179
+ slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add)
180
+ }).expect("attempt to join into collection with len > usize::MAX");
181
+
182
+ // crucial for safety
183
+ let mut result = Vec::with_capacity(len);
184
+
185
+ unsafe {
186
+ result.extend_from_slice(first.borrow().as_ref());
187
+
188
+ {
189
+ let pos = result.len();
190
+ let target = result.get_unchecked_mut(pos..len);
191
+
192
+ // copy separator and strs over without bounds checks
193
+ // generate loops with hardcoded offsets for small separators
194
+ // massive improvements possible (~ x2)
195
+ spezialize_for_lengths!(sep, target, iter; 1, 2, 3, 4);
126
196
}
127
- result.push_str(s.borrow() );
197
+ result.set_len(len );
128
198
}
129
199
result
130
- }
131
-
132
- fn connect(&self, sep: &str) -> String {
133
- self.join(sep)
134
- }
200
+ })
135
201
}
136
202
137
203
#[stable(feature = "rust1", since = "1.0.0")]
0 commit comments