@@ -16,6 +16,11 @@ trait NeedleWithSize: Needle {
16
16
self . as_bytes ( ) . len ( )
17
17
}
18
18
}
19
+
20
+ #[ inline]
21
+ fn is_empty ( & self ) -> bool {
22
+ self . size ( ) == 0
23
+ }
19
24
}
20
25
21
26
impl < N : Needle + ?Sized > NeedleWithSize for N { }
@@ -145,36 +150,35 @@ impl<V: Vector> VectorHash<V> {
145
150
}
146
151
}
147
152
148
- /// Single-substring searcher using an AVX2 algorithm based on the
149
- /// "Generic SIMD" algorithm [presented by Wojciech
153
+ /// Single-substring searcher using an AVX2 algorithm based on the "Generic
154
+ /// SIMD" algorithm [presented by Wojciech
150
155
/// Muła](http://0x80.pl/articles/simd-strfind.html).
151
156
///
152
- /// It is similar to the Rabin-Karp algorithm, except that the hash is
153
- /// not rolling and is calculated for several lanes at once. It begins
154
- /// by picking the first byte in the needle and checking at which
155
- /// positions in the haystack it occurs. Any position where it does not
156
- /// can be immediately discounted as a potential match.
157
+ /// It is similar to the Rabin-Karp algorithm, except that the hash is not
158
+ /// rolling and is calculated for several lanes at once. It begins by picking
159
+ /// the first byte in the needle and checking at which positions in the haystack
160
+ /// it occurs. Any position where it does not can be immediately discounted as a
161
+ /// potential match.
157
162
///
158
163
/// We then repeat this idea with a second byte in the needle (where the
159
- /// haystack is suitably offset) and take a bitwise AND to further limit
160
- /// the possible positions the needle can match in. Any remaining
161
- /// positions are fully evaluated using an equality comparison with the
162
- /// needle.
164
+ /// haystack is suitably offset) and take a bitwise AND to further limit the
165
+ /// possible positions the needle can match in. Any remaining positions are
166
+ /// fully evaluated using an equality comparison with the needle.
163
167
///
164
- /// Originally, the algorithm always used the last byte for this second
165
- /// byte. Whilst this is often the most efficient option, it is
166
- /// vulnerable to a worst-case attack and so this implementation instead
167
- /// allows any byte (including a random one) to be chosen.
168
+ /// Originally, the algorithm always used the last byte for this second byte.
169
+ /// Whilst this is often the most efficient option, it is vulnerable to a
170
+ /// worst-case attack and so this implementation instead allows any byte
171
+ /// (including a random one) to be chosen.
168
172
///
169
- /// In the case where the needle is not a multiple of the number of SIMD
170
- /// lanes, the last chunk is made up of a partial overlap with the
171
- /// penultimate chunk to avoid reading random memory, differing from the
172
- /// original implementation. In this case, a mask is used to prevent
173
- /// performing an equality comparison on the same position twice.
173
+ /// In the case where the needle is not a multiple of the number of SIMD lanes,
174
+ /// the last chunk is made up of a partial overlap with the penultimate chunk to
175
+ /// avoid reading random memory, differing from the original implementation. In
176
+ /// this case, a mask is used to prevent performing an equality comparison on
177
+ /// the same position twice.
174
178
///
175
- /// When the haystack is too short for an AVX2 register, a similar SSE2
176
- /// fallback is used instead. Finally, for very short haystacks there is
177
- /// a scalar Rabin-Karp implementation.
179
+ /// When the haystack is too short for an AVX2 register, a similar SSE2 fallback
180
+ /// is used instead. Finally, for very short haystacks there is a scalar
181
+ /// Rabin-Karp implementation.
178
182
pub struct Avx2Searcher < N : Needle > {
179
183
position : usize ,
180
184
scalar_hash : ScalarHash ,
@@ -184,21 +188,35 @@ pub struct Avx2Searcher<N: Needle> {
184
188
}
185
189
186
190
impl < N : Needle > Avx2Searcher < N > {
187
- /// Creates a new searcher for `needle`. By default, `position` is
188
- /// set to the last character in the needle.
191
+ /// Creates a new searcher for `needle`. By default, `position` is set to
192
+ /// the last character in the needle.
193
+ ///
194
+ /// # Panics
195
+ ///
196
+ /// Panics if `needle` is empty or if the associated `SIZE` constant does
197
+ /// not correspond to the actual size of `needle`.
189
198
#[ target_feature( enable = "avx2" ) ]
190
199
pub unsafe fn new ( needle : N ) -> Self {
191
- let position = needle. size ( ) - 1 ;
200
+ // Wrapping prevents panicking on unsigned integer underflow when
201
+ // `needle` is empty.
202
+ let position = needle. size ( ) . wrapping_sub ( 1 ) ;
192
203
Self :: with_position ( needle, position)
193
204
}
194
205
195
- /// Same as `new` but allows additionally specifying the `position`
196
- /// to use.
206
+ /// Same as `new` but allows additionally specifying the `position` to use.
207
+ ///
208
+ /// # Panics
209
+ ///
210
+ /// Panics if `needle` is empty, if `position` is not a valid index for
211
+ /// `needle` or if the associated `SIZE` constant does not correspond to the
212
+ /// actual size of `needle`.
197
213
#[ target_feature( enable = "avx2" ) ]
198
214
pub unsafe fn with_position ( needle : N , position : usize ) -> Self {
215
+ // Implicitly checks that the needle is not empty because position is an
216
+ // unsized integer.
217
+ assert ! ( position < needle. size( ) ) ;
218
+
199
219
let bytes = needle. as_bytes ( ) ;
200
- assert ! ( !bytes. is_empty( ) ) ;
201
- assert ! ( position < bytes. len( ) ) ;
202
220
if let Some ( size) = N :: SIZE {
203
221
assert_eq ! ( size, bytes. len( ) ) ;
204
222
}
@@ -402,48 +420,72 @@ impl DynamicAvx2Searcher {
402
420
/// the last character in the needle.
403
421
#[ target_feature( enable = "avx2" ) ]
404
422
pub unsafe fn new ( needle : Box < [ u8 ] > ) -> Self {
405
- let position = needle. len ( ) - 1 ;
423
+ // Wrapping prevents panicking on unsigned integer underflow when
424
+ // `needle` is empty.
425
+ let position = needle. len ( ) . wrapping_sub ( 1 ) ;
406
426
Self :: with_position ( needle, position)
407
427
}
408
428
409
429
/// Same as `new` but allows additionally specifying the `position` to use.
430
+ ///
431
+ /// # Panics
432
+ ///
433
+ /// When `needle` is not empty, panics if `position` is not a valid index
434
+ /// for `needle`.
410
435
#[ target_feature( enable = "avx2" ) ]
411
436
pub unsafe fn with_position ( needle : Box < [ u8 ] > , position : usize ) -> Self {
412
- assert ! ( !needle. is_empty( ) ) ;
413
- assert ! ( position < needle. len( ) ) ;
414
-
415
437
match * needle {
416
438
[ ] => Self :: N0 ,
417
- [ c0] => Self :: N1 ( MemchrSearcher :: new ( c0) ) ,
418
- [ c0, c1] => Self :: N2 ( Avx2Searcher :: new ( [ c0, c1] ) ) ,
419
- [ c0, c1, c2] => Self :: N3 ( Avx2Searcher :: new ( [ c0, c1, c2] ) ) ,
420
- [ c0, c1, c2, c3] => Self :: N4 ( Avx2Searcher :: new ( [ c0, c1, c2, c3] ) ) ,
421
- [ c0, c1, c2, c3, c4] => Self :: N5 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4] ) ) ,
422
- [ c0, c1, c2, c3, c4, c5] => Self :: N6 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4, c5] ) ) ,
423
- [ c0, c1, c2, c3, c4, c5, c6] => {
424
- Self :: N7 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4, c5, c6] ) )
439
+ [ c0] => {
440
+ // Check that `position` is set correctly for consistency.
441
+ assert_eq ! ( position, 0 ) ;
442
+ Self :: N1 ( MemchrSearcher :: new ( c0) )
425
443
}
426
- [ c0, c1, c2, c3, c4, c5, c6, c7] => {
427
- Self :: N8 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4, c5, c6, c7] ) )
444
+ [ c0, c1] => Self :: N2 ( Avx2Searcher :: with_position ( [ c0, c1] , position) ) ,
445
+ [ c0, c1, c2] => Self :: N3 ( Avx2Searcher :: with_position ( [ c0, c1, c2] , position) ) ,
446
+ [ c0, c1, c2, c3] => Self :: N4 ( Avx2Searcher :: with_position ( [ c0, c1, c2, c3] , position) ) ,
447
+ [ c0, c1, c2, c3, c4] => {
448
+ Self :: N5 ( Avx2Searcher :: with_position ( [ c0, c1, c2, c3, c4] , position) )
428
449
}
429
- [ c0, c1, c2, c3, c4, c5, c6, c7, c8] => {
430
- Self :: N9 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4, c5, c6, c7, c8] ) )
450
+ [ c0, c1, c2, c3, c4, c5] => Self :: N6 ( Avx2Searcher :: with_position (
451
+ [ c0, c1, c2, c3, c4, c5] ,
452
+ position,
453
+ ) ) ,
454
+ [ c0, c1, c2, c3, c4, c5, c6] => Self :: N7 ( Avx2Searcher :: with_position (
455
+ [ c0, c1, c2, c3, c4, c5, c6] ,
456
+ position,
457
+ ) ) ,
458
+ [ c0, c1, c2, c3, c4, c5, c6, c7] => Self :: N8 ( Avx2Searcher :: with_position (
459
+ [ c0, c1, c2, c3, c4, c5, c6, c7] ,
460
+ position,
461
+ ) ) ,
462
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8] => Self :: N9 ( Avx2Searcher :: with_position (
463
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8] ,
464
+ position,
465
+ ) ) ,
466
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9] => Self :: N10 ( Avx2Searcher :: with_position (
467
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9] ,
468
+ position,
469
+ ) ) ,
470
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] => {
471
+ Self :: N11 ( Avx2Searcher :: with_position (
472
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] ,
473
+ position,
474
+ ) )
431
475
}
432
- [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9] => {
433
- Self :: N10 ( Avx2Searcher :: new ( [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9] ) )
476
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11] => {
477
+ Self :: N12 ( Avx2Searcher :: with_position (
478
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11] ,
479
+ position,
480
+ ) )
434
481
}
435
- [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10] => Self :: N11 ( Avx2Searcher :: new ( [
436
- c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,
437
- ] ) ) ,
438
- [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11] => Self :: N12 ( Avx2Searcher :: new ( [
439
- c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11,
440
- ] ) ) ,
441
482
[ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12] => {
442
- Self :: N13 ( Avx2Searcher :: new ( [
443
- c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
444
- ] ) )
483
+ Self :: N13 ( Avx2Searcher :: with_position (
484
+ [ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12] ,
485
+ position,
486
+ ) )
445
487
}
446
- _ => Self :: N ( Avx2Searcher :: new ( needle) ) ,
488
+ _ => Self :: N ( Avx2Searcher :: with_position ( needle, position ) ) ,
447
489
}
448
490
}
449
491
0 commit comments