Coverage Report

Created: 2025-06-23 13:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/cargo-vendor-dir/memchr-2.7.4/src/memmem/searcher.rs
Line
Count
Source
1
use crate::arch::all::{
2
    packedpair::{HeuristicFrequencyRank, Pair},
3
    rabinkarp, twoway,
4
};
5
6
#[cfg(target_arch = "aarch64")]
7
use crate::arch::aarch64::neon::packedpair as neon;
8
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
9
use crate::arch::wasm32::simd128::packedpair as simd128;
10
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
11
use crate::arch::x86_64::{
12
    avx2::packedpair as avx2, sse2::packedpair as sse2,
13
};
14
15
/// A "meta" substring searcher.
16
///
17
/// To a first approximation, this chooses what it believes to be the "best"
18
/// substring search implemnetation based on the needle at construction time.
19
/// Then, every call to `find` will execute that particular implementation. To
20
/// a second approximation, multiple substring search algorithms may be used,
21
/// depending on the haystack. For example, for supremely short haystacks,
22
/// Rabin-Karp is typically used.
23
///
24
/// See the documentation on `Prefilter` for an explanation of the dispatching
25
/// mechanism. The quick summary is that an enum has too much overhead and
26
/// we can't use dynamic dispatch via traits because we need to work in a
27
/// core-only environment. (Dynamic dispatch works in core-only, but you
28
/// need `&dyn Trait` and we really need a `Box<dyn Trait>` here. The latter
29
/// requires `alloc`.) So instead, we use a union and an appropriately paired
30
/// free function to read from the correct field on the union and execute the
31
/// chosen substring search implementation.
32
#[derive(Clone)]
33
pub(crate) struct Searcher {
34
    call: SearcherKindFn,
35
    kind: SearcherKind,
36
    rabinkarp: rabinkarp::Finder,
37
}
38
39
impl Searcher {
40
    /// Creates a new "meta" substring searcher that attempts to choose the
41
    /// best algorithm based on the needle, heuristics and what the current
42
    /// target supports.
43
    #[inline]
44
0
    pub(crate) fn new<R: HeuristicFrequencyRank>(
45
0
        prefilter: PrefilterConfig,
46
0
        ranker: R,
47
0
        needle: &[u8],
48
0
    ) -> Searcher {
49
0
        let rabinkarp = rabinkarp::Finder::new(needle);
50
0
        if needle.len() <= 1 {
51
0
            return if needle.is_empty() {
52
                trace!("building empty substring searcher");
53
0
                Searcher {
54
0
                    call: searcher_kind_empty,
55
0
                    kind: SearcherKind { empty: () },
56
0
                    rabinkarp,
57
0
                }
58
            } else {
59
                trace!("building one-byte substring searcher");
60
0
                debug_assert_eq!(1, needle.len());
61
0
                Searcher {
62
0
                    call: searcher_kind_one_byte,
63
0
                    kind: SearcherKind { one_byte: needle[0] },
64
0
                    rabinkarp,
65
0
                }
66
            };
67
0
        }
68
0
        let pair = match Pair::with_ranker(needle, &ranker) {
69
0
            Some(pair) => pair,
70
0
            None => return Searcher::twoway(needle, rabinkarp, None),
71
        };
72
0
        debug_assert_ne!(
73
0
            pair.index1(),
74
0
            pair.index2(),
75
0
            "pair offsets should not be equivalent"
76
        );
77
        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
78
        {
79
0
            if let Some(pp) = avx2::Finder::with_pair(needle, pair) {
80
0
                if do_packed_search(needle) {
81
                    trace!("building x86_64 AVX2 substring searcher");
82
0
                    let kind = SearcherKind { avx2: pp };
83
0
                    Searcher { call: searcher_kind_avx2, kind, rabinkarp }
84
0
                } else if prefilter.is_none() {
85
0
                    Searcher::twoway(needle, rabinkarp, None)
86
                } else {
87
0
                    let prestrat = Prefilter::avx2(pp, needle);
88
0
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
89
                }
90
0
            } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) {
91
0
                if do_packed_search(needle) {
92
                    trace!("building x86_64 SSE2 substring searcher");
93
0
                    let kind = SearcherKind { sse2: pp };
94
0
                    Searcher { call: searcher_kind_sse2, kind, rabinkarp }
95
0
                } else if prefilter.is_none() {
96
0
                    Searcher::twoway(needle, rabinkarp, None)
97
                } else {
98
0
                    let prestrat = Prefilter::sse2(pp, needle);
99
0
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
100
                }
101
0
            } else if prefilter.is_none() {
102
0
                Searcher::twoway(needle, rabinkarp, None)
103
            } else {
104
                // We're pretty unlikely to get to this point, but it is
105
                // possible to be running on x86_64 without SSE2. Namely, it's
106
                // really up to the OS whether it wants to support vector
107
                // registers or not.
108
0
                let prestrat = Prefilter::fallback(ranker, pair, needle);
109
0
                Searcher::twoway(needle, rabinkarp, prestrat)
110
            }
111
        }
112
        #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
113
        {
114
            if let Some(pp) = simd128::Finder::with_pair(needle, pair) {
115
                if do_packed_search(needle) {
116
                    trace!("building wasm32 simd128 substring searcher");
117
                    let kind = SearcherKind { simd128: pp };
118
                    Searcher { call: searcher_kind_simd128, kind, rabinkarp }
119
                } else if prefilter.is_none() {
120
                    Searcher::twoway(needle, rabinkarp, None)
121
                } else {
122
                    let prestrat = Prefilter::simd128(pp, needle);
123
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
124
                }
125
            } else if prefilter.is_none() {
126
                Searcher::twoway(needle, rabinkarp, None)
127
            } else {
128
                let prestrat = Prefilter::fallback(ranker, pair, needle);
129
                Searcher::twoway(needle, rabinkarp, prestrat)
130
            }
131
        }
132
        #[cfg(target_arch = "aarch64")]
133
        {
134
            if let Some(pp) = neon::Finder::with_pair(needle, pair) {
135
                if do_packed_search(needle) {
136
                    trace!("building aarch64 neon substring searcher");
137
                    let kind = SearcherKind { neon: pp };
138
                    Searcher { call: searcher_kind_neon, kind, rabinkarp }
139
                } else if prefilter.is_none() {
140
                    Searcher::twoway(needle, rabinkarp, None)
141
                } else {
142
                    let prestrat = Prefilter::neon(pp, needle);
143
                    Searcher::twoway(needle, rabinkarp, Some(prestrat))
144
                }
145
            } else if prefilter.is_none() {
146
                Searcher::twoway(needle, rabinkarp, None)
147
            } else {
148
                let prestrat = Prefilter::fallback(ranker, pair, needle);
149
                Searcher::twoway(needle, rabinkarp, prestrat)
150
            }
151
        }
152
        #[cfg(not(any(
153
            all(target_arch = "x86_64", target_feature = "sse2"),
154
            all(target_arch = "wasm32", target_feature = "simd128"),
155
            target_arch = "aarch64"
156
        )))]
157
        {
158
            if prefilter.is_none() {
159
                Searcher::twoway(needle, rabinkarp, None)
160
            } else {
161
                let prestrat = Prefilter::fallback(ranker, pair, needle);
162
                Searcher::twoway(needle, rabinkarp, prestrat)
163
            }
164
        }
165
0
    }
166
167
    /// Creates a new searcher that always uses the Two-Way algorithm. This is
168
    /// typically used when vector algorithms are unavailable or inappropriate.
169
    /// (For example, when the needle is "too long.")
170
    ///
171
    /// If a prefilter is given, then the searcher returned will be accelerated
172
    /// by the prefilter.
173
    #[inline]
174
0
    fn twoway(
175
0
        needle: &[u8],
176
0
        rabinkarp: rabinkarp::Finder,
177
0
        prestrat: Option<Prefilter>,
178
0
    ) -> Searcher {
179
0
        let finder = twoway::Finder::new(needle);
180
0
        match prestrat {
181
            None => {
182
                trace!("building scalar two-way substring searcher");
183
0
                let kind = SearcherKind { two_way: finder };
184
0
                Searcher { call: searcher_kind_two_way, kind, rabinkarp }
185
            }
186
0
            Some(prestrat) => {
187
0
                trace!(
188
0
                    "building scalar two-way \
189
0
                     substring searcher with a prefilter"
190
0
                );
191
0
                let two_way_with_prefilter =
192
0
                    TwoWayWithPrefilter { finder, prestrat };
193
0
                let kind = SearcherKind { two_way_with_prefilter };
194
0
                Searcher {
195
0
                    call: searcher_kind_two_way_with_prefilter,
196
0
                    kind,
197
0
                    rabinkarp,
198
0
                }
199
            }
200
        }
201
0
    }
202
203
    /// Searches the given haystack for the given needle. The needle given
204
    /// should be the same as the needle that this finder was initialized
205
    /// with.
206
    ///
207
    /// Inlining this can lead to big wins for latency, and #[inline] doesn't
208
    /// seem to be enough in some cases.
209
    #[inline(always)]
210
0
    pub(crate) fn find(
211
0
        &self,
212
0
        prestate: &mut PrefilterState,
213
0
        haystack: &[u8],
214
0
        needle: &[u8],
215
0
    ) -> Option<usize> {
216
0
        if haystack.len() < needle.len() {
217
0
            None
218
        } else {
219
            // SAFETY: By construction, we've ensured that the function
220
            // in `self.call` is properly paired with the union used in
221
            // `self.kind`.
222
0
            unsafe { (self.call)(self, prestate, haystack, needle) }
223
        }
224
0
    }
225
}
226
227
impl core::fmt::Debug for Searcher {
228
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
229
0
        f.debug_struct("Searcher")
230
0
            .field("call", &"<searcher function>")
231
0
            .field("kind", &"<searcher kind union>")
232
0
            .field("rabinkarp", &self.rabinkarp)
233
0
            .finish()
234
0
    }
235
}
236
237
/// A union indicating one of several possible substring search implementations
238
/// that are in active use.
239
///
240
/// This union should only be read by one of the functions prefixed with
241
/// `searcher_kind_`. Namely, the correct function is meant to be paired with
242
/// the union by the caller, such that the function always reads from the
243
/// designated union field.
244
#[derive(Clone, Copy)]
245
union SearcherKind {
246
    empty: (),
247
    one_byte: u8,
248
    two_way: twoway::Finder,
249
    two_way_with_prefilter: TwoWayWithPrefilter,
250
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
251
    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
252
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
253
    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
254
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
255
    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
256
    #[cfg(target_arch = "aarch64")]
257
    neon: crate::arch::aarch64::neon::packedpair::Finder,
258
}
259
260
/// A two-way substring searcher with a prefilter.
261
#[derive(Copy, Clone, Debug)]
262
struct TwoWayWithPrefilter {
263
    finder: twoway::Finder,
264
    prestrat: Prefilter,
265
}
266
267
/// The type of a substring search function.
268
///
269
/// # Safety
270
///
271
/// When using a function of this type, callers must ensure that the correct
272
/// function is paired with the value populated in `SearcherKind` union.
273
type SearcherKindFn = unsafe fn(
274
    searcher: &Searcher,
275
    prestate: &mut PrefilterState,
276
    haystack: &[u8],
277
    needle: &[u8],
278
) -> Option<usize>;
279
280
/// Reads from the `empty` field of `SearcherKind` to handle the case of
281
/// searching for the empty needle. Works on all platforms.
282
///
283
/// # Safety
284
///
285
/// Callers must ensure that the `searcher.kind.empty` union field is set.
286
0
unsafe fn searcher_kind_empty(
287
0
    _searcher: &Searcher,
288
0
    _prestate: &mut PrefilterState,
289
0
    _haystack: &[u8],
290
0
    _needle: &[u8],
291
0
) -> Option<usize> {
292
0
    Some(0)
293
0
}
294
295
/// Reads from the `one_byte` field of `SearcherKind` to handle the case of
296
/// searching for a single byte needle. Works on all platforms.
297
///
298
/// # Safety
299
///
300
/// Callers must ensure that the `searcher.kind.one_byte` union field is set.
301
0
unsafe fn searcher_kind_one_byte(
302
0
    searcher: &Searcher,
303
0
    _prestate: &mut PrefilterState,
304
0
    haystack: &[u8],
305
0
    _needle: &[u8],
306
0
) -> Option<usize> {
307
0
    let needle = searcher.kind.one_byte;
308
0
    crate::memchr(needle, haystack)
309
0
}
310
311
/// Reads from the `two_way` field of `SearcherKind` to handle the case of
312
/// searching for an arbitrary needle without prefilter acceleration. Works on
313
/// all platforms.
314
///
315
/// # Safety
316
///
317
/// Callers must ensure that the `searcher.kind.two_way` union field is set.
318
0
unsafe fn searcher_kind_two_way(
319
0
    searcher: &Searcher,
320
0
    _prestate: &mut PrefilterState,
321
0
    haystack: &[u8],
322
0
    needle: &[u8],
323
0
) -> Option<usize> {
324
0
    if rabinkarp::is_fast(haystack, needle) {
325
0
        searcher.rabinkarp.find(haystack, needle)
326
    } else {
327
0
        searcher.kind.two_way.find(haystack, needle)
328
    }
329
0
}
330
331
/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle
332
/// the case of searching for an arbitrary needle with prefilter acceleration.
333
/// Works on all platforms.
334
///
335
/// # Safety
336
///
337
/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union
338
/// field is set.
339
0
unsafe fn searcher_kind_two_way_with_prefilter(
340
0
    searcher: &Searcher,
341
0
    prestate: &mut PrefilterState,
342
0
    haystack: &[u8],
343
0
    needle: &[u8],
344
0
) -> Option<usize> {
345
0
    if rabinkarp::is_fast(haystack, needle) {
346
0
        searcher.rabinkarp.find(haystack, needle)
347
    } else {
348
0
        let TwoWayWithPrefilter { ref finder, ref prestrat } =
349
0
            searcher.kind.two_way_with_prefilter;
350
0
        let pre = Pre { prestate, prestrat };
351
0
        finder.find_with_prefilter(Some(pre), haystack, needle)
352
    }
353
0
}
354
355
/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2
356
/// vectorized substring search implementation.
357
///
358
/// # Safety
359
///
360
/// Callers must ensure that the `searcher.kind.sse2` union field is set.
361
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
362
0
unsafe fn searcher_kind_sse2(
363
0
    searcher: &Searcher,
364
0
    _prestate: &mut PrefilterState,
365
0
    haystack: &[u8],
366
0
    needle: &[u8],
367
0
) -> Option<usize> {
368
0
    let finder = &searcher.kind.sse2;
369
0
    if haystack.len() < finder.min_haystack_len() {
370
0
        searcher.rabinkarp.find(haystack, needle)
371
    } else {
372
0
        finder.find(haystack, needle)
373
    }
374
0
}
375
376
/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2
377
/// vectorized substring search implementation.
378
///
379
/// # Safety
380
///
381
/// Callers must ensure that the `searcher.kind.avx2` union field is set.
382
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
383
0
unsafe fn searcher_kind_avx2(
384
0
    searcher: &Searcher,
385
0
    _prestate: &mut PrefilterState,
386
0
    haystack: &[u8],
387
0
    needle: &[u8],
388
0
) -> Option<usize> {
389
0
    let finder = &searcher.kind.avx2;
390
0
    if haystack.len() < finder.min_haystack_len() {
391
0
        searcher.rabinkarp.find(haystack, needle)
392
    } else {
393
0
        finder.find(haystack, needle)
394
    }
395
0
}
396
397
/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32
398
/// simd128 vectorized substring search implementation.
399
///
400
/// # Safety
401
///
402
/// Callers must ensure that the `searcher.kind.simd128` union field is set.
403
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
404
unsafe fn searcher_kind_simd128(
405
    searcher: &Searcher,
406
    _prestate: &mut PrefilterState,
407
    haystack: &[u8],
408
    needle: &[u8],
409
) -> Option<usize> {
410
    let finder = &searcher.kind.simd128;
411
    if haystack.len() < finder.min_haystack_len() {
412
        searcher.rabinkarp.find(haystack, needle)
413
    } else {
414
        finder.find(haystack, needle)
415
    }
416
}
417
418
/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon
419
/// vectorized substring search implementation.
420
///
421
/// # Safety
422
///
423
/// Callers must ensure that the `searcher.kind.neon` union field is set.
424
#[cfg(target_arch = "aarch64")]
425
unsafe fn searcher_kind_neon(
426
    searcher: &Searcher,
427
    _prestate: &mut PrefilterState,
428
    haystack: &[u8],
429
    needle: &[u8],
430
) -> Option<usize> {
431
    let finder = &searcher.kind.neon;
432
    if haystack.len() < finder.min_haystack_len() {
433
        searcher.rabinkarp.find(haystack, needle)
434
    } else {
435
        finder.find(haystack, needle)
436
    }
437
}
438
439
/// A reverse substring searcher.
440
#[derive(Clone, Debug)]
441
pub(crate) struct SearcherRev {
442
    kind: SearcherRevKind,
443
    rabinkarp: rabinkarp::FinderRev,
444
}
445
446
/// The kind of the reverse searcher.
447
///
448
/// For the reverse case, we don't do any SIMD acceleration or prefilters.
449
/// There is no specific technical reason why we don't, but rather don't do it
450
/// because it's not clear it's worth the extra code to do so. If you have a
451
/// use case for it, please file an issue.
452
///
453
/// We also don't do the union trick as we do with the forward case and
454
/// prefilters. Basically for the same reason we don't have prefilters or
455
/// vector algorithms for reverse searching: it's not clear it's worth doing.
456
/// Please file an issue if you have a compelling use case for fast reverse
457
/// substring search.
458
#[derive(Clone, Debug)]
459
enum SearcherRevKind {
460
    Empty,
461
    OneByte { needle: u8 },
462
    TwoWay { finder: twoway::FinderRev },
463
}
464
465
impl SearcherRev {
466
    /// Creates a new searcher for finding occurrences of the given needle in
467
    /// reverse. That is, it reports the last (instead of the first) occurrence
468
    /// of a needle in a haystack.
469
    #[inline]
470
0
    pub(crate) fn new(needle: &[u8]) -> SearcherRev {
471
0
        let kind = if needle.len() <= 1 {
472
0
            if needle.is_empty() {
473
                trace!("building empty reverse substring searcher");
474
0
                SearcherRevKind::Empty
475
            } else {
476
                trace!("building one-byte reverse substring searcher");
477
0
                debug_assert_eq!(1, needle.len());
478
0
                SearcherRevKind::OneByte { needle: needle[0] }
479
            }
480
        } else {
481
            trace!("building scalar two-way reverse substring searcher");
482
0
            let finder = twoway::FinderRev::new(needle);
483
0
            SearcherRevKind::TwoWay { finder }
484
        };
485
0
        let rabinkarp = rabinkarp::FinderRev::new(needle);
486
0
        SearcherRev { kind, rabinkarp }
487
0
    }
488
489
    /// Searches the given haystack for the last occurrence of the given
490
    /// needle. The needle given should be the same as the needle that this
491
    /// finder was initialized with.
492
    #[inline]
493
0
    pub(crate) fn rfind(
494
0
        &self,
495
0
        haystack: &[u8],
496
0
        needle: &[u8],
497
0
    ) -> Option<usize> {
498
0
        if haystack.len() < needle.len() {
499
0
            return None;
500
0
        }
501
0
        match self.kind {
502
0
            SearcherRevKind::Empty => Some(haystack.len()),
503
0
            SearcherRevKind::OneByte { needle } => {
504
0
                crate::memrchr(needle, haystack)
505
            }
506
0
            SearcherRevKind::TwoWay { ref finder } => {
507
0
                if rabinkarp::is_fast(haystack, needle) {
508
0
                    self.rabinkarp.rfind(haystack, needle)
509
                } else {
510
0
                    finder.rfind(haystack, needle)
511
                }
512
            }
513
        }
514
0
    }
515
}
516
517
/// Prefilter controls whether heuristics are used to accelerate searching.
518
///
519
/// A prefilter refers to the idea of detecting candidate matches very quickly,
520
/// and then confirming whether those candidates are full matches. This
521
/// idea can be quite effective since it's often the case that looking for
522
/// candidates can be a lot faster than running a complete substring search
523
/// over the entire input. Namely, looking for candidates can be done with
524
/// extremely fast vectorized code.
525
///
526
/// The downside of a prefilter is that it assumes false positives (which are
527
/// candidates generated by a prefilter that aren't matches) are somewhat rare
528
/// relative to the frequency of full matches. That is, if a lot of false
529
/// positives are generated, then it's possible for search time to be worse
530
/// than if the prefilter wasn't enabled in the first place.
531
///
532
/// Another downside of a prefilter is that it can result in highly variable
533
/// performance, where some cases are extraordinarily fast and others aren't.
534
/// Typically, variable performance isn't a problem, but it may be for your use
535
/// case.
536
///
537
/// The use of prefilters in this implementation does use a heuristic to detect
538
/// when a prefilter might not be carrying its weight, and will dynamically
539
/// disable its use. Nevertheless, this configuration option gives callers
540
/// the ability to disable prefilters if you have knowledge that they won't be
541
/// useful.
542
#[derive(Clone, Copy, Debug)]
543
#[non_exhaustive]
544
pub enum PrefilterConfig {
545
    /// Never used a prefilter in substring search.
546
    None,
547
    /// Automatically detect whether a heuristic prefilter should be used. If
548
    /// it is used, then heuristics will be used to dynamically disable the
549
    /// prefilter if it is believed to not be carrying its weight.
550
    Auto,
551
}
552
553
impl Default for PrefilterConfig {
554
0
    fn default() -> PrefilterConfig {
555
0
        PrefilterConfig::Auto
556
0
    }
557
}
558
559
impl PrefilterConfig {
560
    /// Returns true when this prefilter is set to the `None` variant.
561
0
    fn is_none(&self) -> bool {
562
0
        matches!(*self, PrefilterConfig::None)
563
0
    }
564
}
565
566
/// The implementation of a prefilter.
567
///
568
/// This type encapsulates dispatch to one of several possible choices for a
569
/// prefilter. Generally speaking, all prefilters have the same approximate
570
/// algorithm: they choose a couple of bytes from the needle that are believed
571
/// to be rare, use a fast vector algorithm to look for those bytes and return
572
/// positions as candidates for some substring search algorithm (currently only
573
/// Two-Way) to confirm as a match or not.
574
///
575
/// The differences between the algorithms are actually at the vector
576
/// implementation level. Namely, we need different routines based on both
577
/// which target architecture we're on and what CPU features are supported.
578
///
579
/// The straight-forwardly obvious approach here is to use an enum, and make
580
/// `Prefilter::find` do case analysis to determine which algorithm was
581
/// selected and invoke it. However, I've observed that this leads to poor
582
/// codegen in some cases, especially in latency sensitive benchmarks. That is,
583
/// this approach comes with overhead that I wasn't able to eliminate.
584
///
585
/// The second obvious approach is to use dynamic dispatch with traits. Doing
586
/// that in this context where `Prefilter` owns the selection generally
587
/// requires heap allocation, and this code is designed to run in core-only
588
/// environments.
589
///
590
/// So we settle on using a union (that's `PrefilterKind`) and a function
591
/// pointer (that's `PrefilterKindFn`). We select the right function pointer
592
/// based on which field in the union we set, and that function in turn
593
/// knows which field of the union to access. The downside of this approach
594
/// is that it forces us to think about safety, but the upside is that
595
/// there are some nice latency improvements to benchmarks. (Especially the
596
/// `memmem/sliceslice/short` benchmark.)
597
///
598
/// In cases where we've selected a vector algorithm and the haystack given
599
/// is too short, we fallback to the scalar version of `memchr` on the
600
/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive
601
/// byte-at-a-time loop because it will read in `usize`-sized chunks at a
602
/// time.)
603
#[derive(Clone, Copy)]
604
struct Prefilter {
605
    call: PrefilterKindFn,
606
    kind: PrefilterKind,
607
    rarest_byte: u8,
608
    rarest_offset: u8,
609
}
610
611
impl Prefilter {
612
    /// Return a "fallback" prefilter, but only if it is believed to be
613
    /// effective.
614
    #[inline]
615
0
    fn fallback<R: HeuristicFrequencyRank>(
616
0
        ranker: R,
617
0
        pair: Pair,
618
0
        needle: &[u8],
619
0
    ) -> Option<Prefilter> {
620
        /// The maximum frequency rank permitted for the fallback prefilter.
621
        /// If the rarest byte in the needle has a frequency rank above this
622
        /// value, then no prefilter is used if the fallback prefilter would
623
        /// otherwise be selected.
624
        const MAX_FALLBACK_RANK: u8 = 250;
625
626
        trace!("building fallback prefilter");
627
0
        let rarest_offset = pair.index1();
628
0
        let rarest_byte = needle[usize::from(rarest_offset)];
629
0
        let rarest_rank = ranker.rank(rarest_byte);
630
0
        if rarest_rank > MAX_FALLBACK_RANK {
631
0
            None
632
        } else {
633
0
            let finder = crate::arch::all::packedpair::Finder::with_pair(
634
0
                needle,
635
0
                pair.clone(),
636
0
            )?;
637
0
            let call = prefilter_kind_fallback;
638
0
            let kind = PrefilterKind { fallback: finder };
639
0
            Some(Prefilter { call, kind, rarest_byte, rarest_offset })
640
        }
641
0
    }
642
643
    /// Return a prefilter using a x86_64 SSE2 vector algorithm.
644
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
645
    #[inline]
646
0
    fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter {
647
0
        trace!("building x86_64 SSE2 prefilter");
648
0
        let rarest_offset = finder.pair().index1();
649
0
        let rarest_byte = needle[usize::from(rarest_offset)];
650
0
        Prefilter {
651
0
            call: prefilter_kind_sse2,
652
0
            kind: PrefilterKind { sse2: finder },
653
0
            rarest_byte,
654
0
            rarest_offset,
655
0
        }
656
0
    }
657
658
    /// Return a prefilter using a x86_64 AVX2 vector algorithm.
659
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
660
    #[inline]
661
0
    fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter {
662
0
        trace!("building x86_64 AVX2 prefilter");
663
0
        let rarest_offset = finder.pair().index1();
664
0
        let rarest_byte = needle[usize::from(rarest_offset)];
665
0
        Prefilter {
666
0
            call: prefilter_kind_avx2,
667
0
            kind: PrefilterKind { avx2: finder },
668
0
            rarest_byte,
669
0
            rarest_offset,
670
0
        }
671
0
    }
672
673
    /// Return a prefilter using a wasm32 simd128 vector algorithm.
674
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
675
    #[inline]
676
    fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter {
677
        trace!("building wasm32 simd128 prefilter");
678
        let rarest_offset = finder.pair().index1();
679
        let rarest_byte = needle[usize::from(rarest_offset)];
680
        Prefilter {
681
            call: prefilter_kind_simd128,
682
            kind: PrefilterKind { simd128: finder },
683
            rarest_byte,
684
            rarest_offset,
685
        }
686
    }
687
688
    /// Return a prefilter using a aarch64 neon vector algorithm.
689
    #[cfg(target_arch = "aarch64")]
690
    #[inline]
691
    fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter {
692
        trace!("building aarch64 neon prefilter");
693
        let rarest_offset = finder.pair().index1();
694
        let rarest_byte = needle[usize::from(rarest_offset)];
695
        Prefilter {
696
            call: prefilter_kind_neon,
697
            kind: PrefilterKind { neon: finder },
698
            rarest_byte,
699
            rarest_offset,
700
        }
701
    }
702
703
    /// Return a *candidate* position for a match.
704
    ///
705
    /// When this returns an offset, it implies that a match could begin at
706
    /// that offset, but it may not. That is, it is possible for a false
707
    /// positive to be returned.
708
    ///
709
    /// When `None` is returned, then it is guaranteed that there are no
710
    /// matches for the needle in the given haystack. That is, it is impossible
711
    /// for a false negative to be returned.
712
    ///
713
    /// The purpose of this routine is to look for candidate matching positions
714
    /// as quickly as possible before running a (likely) slower confirmation
715
    /// step.
716
    #[inline]
717
0
    fn find(&self, haystack: &[u8]) -> Option<usize> {
718
0
        // SAFETY: By construction, we've ensured that the function in
719
0
        // `self.call` is properly paired with the union used in `self.kind`.
720
0
        unsafe { (self.call)(self, haystack) }
721
0
    }
722
723
    /// A "simple" prefilter that just looks for the occurrence of the rarest
724
    /// byte from the needle. This is generally only used for very small
725
    /// haystacks.
726
    #[inline]
727
0
    fn find_simple(&self, haystack: &[u8]) -> Option<usize> {
728
0
        // We don't use crate::memchr here because the haystack should be small
729
0
        // enough that memchr won't be able to use vector routines anyway. So
730
0
        // we just skip straight to the fallback implementation which is likely
731
0
        // faster. (A byte-at-a-time loop is only used when the haystack is
732
0
        // smaller than `size_of::<usize>()`.)
733
0
        crate::arch::all::memchr::One::new(self.rarest_byte)
734
0
            .find(haystack)
735
0
            .map(|i| i.saturating_sub(usize::from(self.rarest_offset)))
736
0
    }
737
}
738
739
impl core::fmt::Debug for Prefilter {
740
0
    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
741
0
        f.debug_struct("Prefilter")
742
0
            .field("call", &"<prefilter function>")
743
0
            .field("kind", &"<prefilter kind union>")
744
0
            .field("rarest_byte", &self.rarest_byte)
745
0
            .field("rarest_offset", &self.rarest_offset)
746
0
            .finish()
747
0
    }
748
}
749
750
/// A union indicating one of several possible prefilters that are in active
751
/// use.
752
///
753
/// This union should only be read by one of the functions prefixed with
754
/// `prefilter_kind_`. Namely, the correct function is meant to be paired with
755
/// the union by the caller, such that the function always reads from the
756
/// designated union field.
757
#[derive(Clone, Copy)]
758
union PrefilterKind {
759
    fallback: crate::arch::all::packedpair::Finder,
760
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
761
    sse2: crate::arch::x86_64::sse2::packedpair::Finder,
762
    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
763
    avx2: crate::arch::x86_64::avx2::packedpair::Finder,
764
    #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
765
    simd128: crate::arch::wasm32::simd128::packedpair::Finder,
766
    #[cfg(target_arch = "aarch64")]
767
    neon: crate::arch::aarch64::neon::packedpair::Finder,
768
}
769
770
/// The type of a prefilter function.
771
///
772
/// # Safety
773
///
774
/// When using a function of this type, callers must ensure that the correct
775
/// function is paired with the value populated in `PrefilterKind` union.
776
type PrefilterKindFn =
777
    unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option<usize>;
778
779
/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback
780
/// prefilter. Works on all platforms.
781
///
782
/// # Safety
783
///
784
/// Callers must ensure that the `strat.kind.fallback` union field is set.
785
0
unsafe fn prefilter_kind_fallback(
786
0
    strat: &Prefilter,
787
0
    haystack: &[u8],
788
0
) -> Option<usize> {
789
0
    strat.kind.fallback.find_prefilter(haystack)
790
0
}
791
792
/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2
793
/// prefilter.
794
///
795
/// # Safety
796
///
797
/// Callers must ensure that the `strat.kind.sse2` union field is set.
798
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
799
0
unsafe fn prefilter_kind_sse2(
800
0
    strat: &Prefilter,
801
0
    haystack: &[u8],
802
0
) -> Option<usize> {
803
0
    let finder = &strat.kind.sse2;
804
0
    if haystack.len() < finder.min_haystack_len() {
805
0
        strat.find_simple(haystack)
806
    } else {
807
0
        finder.find_prefilter(haystack)
808
    }
809
0
}
810
811
/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2
812
/// prefilter.
813
///
814
/// # Safety
815
///
816
/// Callers must ensure that the `strat.kind.avx2` union field is set.
817
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
818
0
unsafe fn prefilter_kind_avx2(
819
0
    strat: &Prefilter,
820
0
    haystack: &[u8],
821
0
) -> Option<usize> {
822
0
    let finder = &strat.kind.avx2;
823
0
    if haystack.len() < finder.min_haystack_len() {
824
0
        strat.find_simple(haystack)
825
    } else {
826
0
        finder.find_prefilter(haystack)
827
    }
828
0
}
829
830
/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32
831
/// simd128 prefilter.
832
///
833
/// # Safety
834
///
835
/// Callers must ensure that the `strat.kind.simd128` union field is set.
836
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
837
unsafe fn prefilter_kind_simd128(
838
    strat: &Prefilter,
839
    haystack: &[u8],
840
) -> Option<usize> {
841
    let finder = &strat.kind.simd128;
842
    if haystack.len() < finder.min_haystack_len() {
843
        strat.find_simple(haystack)
844
    } else {
845
        finder.find_prefilter(haystack)
846
    }
847
}
848
849
/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon
850
/// prefilter.
851
///
852
/// # Safety
853
///
854
/// Callers must ensure that the `strat.kind.neon` union field is set.
855
#[cfg(target_arch = "aarch64")]
856
unsafe fn prefilter_kind_neon(
857
    strat: &Prefilter,
858
    haystack: &[u8],
859
) -> Option<usize> {
860
    let finder = &strat.kind.neon;
861
    if haystack.len() < finder.min_haystack_len() {
862
        strat.find_simple(haystack)
863
    } else {
864
        finder.find_prefilter(haystack)
865
    }
866
}
867
868
/// PrefilterState tracks state associated with the effectiveness of a
869
/// prefilter. It is used to track how many bytes, on average, are skipped by
870
/// the prefilter. If this average dips below a certain threshold over time,
871
/// then the state renders the prefilter inert and stops using it.
872
///
873
/// A prefilter state should be created for each search. (Where creating an
874
/// iterator is treated as a single search.) A prefilter state should only be
875
/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
876
/// `PrefilterState`.
877
#[derive(Clone, Copy, Debug)]
878
pub(crate) struct PrefilterState {
879
    /// The number of skips that has been executed. This is always 1 greater
880
    /// than the actual number of skips. The special sentinel value of 0
881
    /// indicates that the prefilter is inert. This is useful to avoid
882
    /// additional checks to determine whether the prefilter is still
883
    /// "effective." Once a prefilter becomes inert, it should no longer be
884
    /// used (according to our heuristics).
885
    skips: u32,
886
    /// The total number of bytes that have been skipped.
887
    skipped: u32,
888
}
889
890
impl PrefilterState {
891
    /// The minimum number of skip attempts to try before considering whether
892
    /// a prefilter is effective or not.
893
    const MIN_SKIPS: u32 = 50;
894
895
    /// The minimum amount of bytes that skipping must average.
896
    ///
897
    /// This value was chosen based on varying it and checking
898
    /// the microbenchmarks. In particular, this can impact the
899
    /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
900
    /// too low.
901
    const MIN_SKIP_BYTES: u32 = 8;
902
903
    /// Create a fresh prefilter state.
904
    #[inline]
905
0
    pub(crate) fn new() -> PrefilterState {
906
0
        PrefilterState { skips: 1, skipped: 0 }
907
0
    }
908
909
    /// Update this state with the number of bytes skipped on the last
910
    /// invocation of the prefilter.
911
    #[inline]
912
0
    fn update(&mut self, skipped: usize) {
913
0
        self.skips = self.skips.saturating_add(1);
914
0
        // We need to do this dance since it's technically possible for
915
0
        // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
916
0
        // size of a prefilter state.)
917
0
        self.skipped = match u32::try_from(skipped) {
918
0
            Err(_) => core::u32::MAX,
919
0
            Ok(skipped) => self.skipped.saturating_add(skipped),
920
        };
921
0
    }
922
923
    /// Return true if and only if this state indicates that a prefilter is
924
    /// still effective.
925
    #[inline]
926
0
    fn is_effective(&mut self) -> bool {
927
0
        if self.is_inert() {
928
0
            return false;
929
0
        }
930
0
        if self.skips() < PrefilterState::MIN_SKIPS {
931
0
            return true;
932
0
        }
933
0
        if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
934
0
            return true;
935
0
        }
936
0
937
0
        // We're inert.
938
0
        self.skips = 0;
939
0
        false
940
0
    }
941
942
    /// Returns true if the prefilter this state represents should no longer
943
    /// be used.
944
    #[inline]
945
0
    fn is_inert(&self) -> bool {
946
0
        self.skips == 0
947
0
    }
948
949
    /// Returns the total number of times the prefilter has been used.
950
    #[inline]
951
0
    fn skips(&self) -> u32 {
952
0
        // Remember, `0` is a sentinel value indicating inertness, so we
953
0
        // always need to subtract `1` to get our actual number of skips.
954
0
        self.skips.saturating_sub(1)
955
0
    }
956
}
957
958
/// A combination of prefilter effectiveness state and the prefilter itself.
959
#[derive(Debug)]
960
pub(crate) struct Pre<'a> {
961
    /// State that tracks the effectiveness of a prefilter.
962
    prestate: &'a mut PrefilterState,
963
    /// The actual prefilter.
964
    prestrat: &'a Prefilter,
965
}
966
967
impl<'a> Pre<'a> {
968
    /// Call this prefilter on the given haystack with the given needle.
969
    #[inline]
970
0
    pub(crate) fn find(&mut self, haystack: &[u8]) -> Option<usize> {
971
0
        let result = self.prestrat.find(haystack);
972
0
        self.prestate.update(result.unwrap_or(haystack.len()));
973
0
        result
974
0
    }
975
976
    /// Return true if and only if this prefilter should be used.
977
    #[inline]
978
0
    pub(crate) fn is_effective(&mut self) -> bool {
979
0
        self.prestate.is_effective()
980
0
    }
981
}
982
983
/// Returns true if the needle has the right characteristics for a vector
984
/// algorithm to handle the entirety of substring search.
985
///
986
/// Vector algorithms can be used for prefilters for other substring search
987
/// algorithms (like Two-Way), but they can also be used for substring search
988
/// on their own. When used for substring search, vector algorithms will
989
/// quickly identify candidate match positions (just like in the prefilter
990
/// case), but instead of returning the candidate position they will try to
991
/// confirm the match themselves. Confirmation happens via `memcmp`. This
992
/// works well for short needles, but can break down when many false candidate
993
/// positions are generated for large needles. Thus, we only permit vector
994
/// algorithms to own substring search when the needle is of a certain length.
995
#[inline]
996
0
fn do_packed_search(needle: &[u8]) -> bool {
997
    /// The minimum length of a needle required for this algorithm. The minimum
998
    /// is 2 since a length of 1 should just use memchr and a length of 0 isn't
999
    /// a case handled by this searcher.
1000
    const MIN_LEN: usize = 2;
1001
1002
    /// The maximum length of a needle required for this algorithm.
1003
    ///
1004
    /// In reality, there is no hard max here. The code below can handle any
1005
    /// length needle. (Perhaps that suggests there are missing optimizations.)
1006
    /// Instead, this is a heuristic and a bound guaranteeing our linear time
1007
    /// complexity.
1008
    ///
1009
    /// It is a heuristic because when a candidate match is found, memcmp is
1010
    /// run. For very large needles with lots of false positives, memcmp can
1011
    /// make the code run quite slow.
1012
    ///
1013
    /// It is a bound because the worst case behavior with memcmp is
1014
    /// multiplicative in the size of the needle and haystack, and we want
1015
    /// to keep that additive. This bound ensures we still meet that bound
1016
    /// theoretically, since it's just a constant. We aren't acting in bad
1017
    /// faith here, memcmp on tiny needles is so fast that even in pathological
1018
    /// cases (see pathological vector benchmarks), this is still just as fast
1019
    /// or faster in practice.
1020
    ///
1021
    /// This specific number was chosen by tweaking a bit and running
1022
    /// benchmarks. The rare-medium-needle, for example, gets about 5% faster
1023
    /// by using this algorithm instead of a prefilter-accelerated Two-Way.
1024
    /// There's also a theoretical desire to keep this number reasonably
1025
    /// low, to mitigate the impact of pathological cases. I did try 64, and
1026
    /// some benchmarks got a little better, and others (particularly the
1027
    /// pathological ones), got a lot worse. So... 32 it is?
1028
    const MAX_LEN: usize = 32;
1029
0
    MIN_LEN <= needle.len() && needle.len() <= MAX_LEN
1030
0
}