Coverage Report

Created: 2025-06-23 13:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/cargo-vendor-dir/regex-automata-0.4.9/src/meta/strategy.rs
Line
Count
Source
1
use core::{
2
    fmt::Debug,
3
    panic::{RefUnwindSafe, UnwindSafe},
4
};
5
6
use alloc::sync::Arc;
7
8
use regex_syntax::hir::{literal, Hir};
9
10
use crate::{
11
    meta::{
12
        error::{BuildError, RetryError, RetryFailError, RetryQuadraticError},
13
        regex::{Cache, RegexInfo},
14
        reverse_inner, wrappers,
15
    },
16
    nfa::thompson::{self, WhichCaptures, NFA},
17
    util::{
18
        captures::{Captures, GroupInfo},
19
        look::LookMatcher,
20
        prefilter::{self, Prefilter, PrefilterI},
21
        primitives::{NonMaxUsize, PatternID},
22
        search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet},
23
    },
24
};
25
26
/// A trait that represents a single meta strategy. Its main utility is in
27
/// providing a way to do dynamic dispatch over a few choices.
28
///
29
/// Why dynamic dispatch? I actually don't have a super compelling reason, and
30
/// importantly, I have not benchmarked it with the main alternative: an enum.
31
/// I went with dynamic dispatch initially because the regex engine search code
32
/// really can't be inlined into caller code in most cases because it's just
33
/// too big. In other words, it is already expected that every regex search
34
/// will entail at least the cost of a function call.
35
///
36
/// I do wonder whether using enums would result in better codegen overall
37
/// though. It's a worthwhile experiment to try. Probably the most interesting
38
/// benchmark to run in such a case would be one with a high match count. That
39
/// is, a benchmark to test the overall latency of a search call.
40
pub(super) trait Strategy:
41
    Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static
42
{
43
    fn group_info(&self) -> &GroupInfo;
44
45
    fn create_cache(&self) -> Cache;
46
47
    fn reset_cache(&self, cache: &mut Cache);
48
49
    fn is_accelerated(&self) -> bool;
50
51
    fn memory_usage(&self) -> usize;
52
53
    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match>;
54
55
    fn search_half(
56
        &self,
57
        cache: &mut Cache,
58
        input: &Input<'_>,
59
    ) -> Option<HalfMatch>;
60
61
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool;
62
63
    fn search_slots(
64
        &self,
65
        cache: &mut Cache,
66
        input: &Input<'_>,
67
        slots: &mut [Option<NonMaxUsize>],
68
    ) -> Option<PatternID>;
69
70
    fn which_overlapping_matches(
71
        &self,
72
        cache: &mut Cache,
73
        input: &Input<'_>,
74
        patset: &mut PatternSet,
75
    );
76
}
77
78
2
pub(super) fn new(
79
2
    info: &RegexInfo,
80
2
    hirs: &[&Hir],
81
2
) -> Result<Arc<dyn Strategy>, BuildError> {
82
    // At this point, we're committed to a regex engine of some kind. So pull
83
    // out a prefilter if we can, which will feed to each of the constituent
84
    // regex engines.
85
2
    let pre = if info.is_always_anchored_start() {
86
        // PERF: I'm not sure we necessarily want to do this... We may want to
87
        // run a prefilter for quickly rejecting in some cases. The problem
88
        // is that anchored searches overlap quite a bit with the use case
89
        // of "run a regex on every line to extract data." In that case, the
90
        // regex always matches, so running a prefilter doesn't really help us
91
        // there. The main place where a prefilter helps in an anchored search
92
        // is if the anchored search is not expected to match frequently. That
93
        // is, the prefilter gives us a way to possibly reject a haystack very
94
        // quickly.
95
        //
96
        // Maybe we should do use a prefilter, but only for longer haystacks?
97
        // Or maybe we should only use a prefilter when we think it's "fast"?
98
        //
99
        // Interestingly, I think we currently lack the infrastructure for
100
        // disabling a prefilter based on haystack length. That would probably
101
        // need to be a new 'Input' option. (Interestingly, an 'Input' used to
102
        // carry a 'Prefilter' with it, but I moved away from that.)
103
        debug!("skipping literal extraction since regex is anchored");
104
1
        None
105
1
    } else if let Some(
pre0
) = info.config().get_prefilter() {
106
        debug!(
107
            "skipping literal extraction since the caller provided a prefilter"
108
        );
109
0
        Some(pre.clone())
110
1
    } else if info.config().get_auto_prefilter() {
111
1
        let kind = info.config().get_match_kind();
112
1
        let prefixes = crate::util::prefilter::prefixes(kind, hirs);
113
        // If we can build a full `Strategy` from just the extracted prefixes,
114
        // then we can short-circuit and avoid building a regex engine at all.
115
1
        if let Some(
pre0
) = Pre::from_prefixes(info, &prefixes) {
116
            debug!(
117
                "found that the regex can be broken down to a literal \
118
                 search, avoiding the regex engine entirely",
119
            );
120
0
            return Ok(pre);
121
1
        }
122
        // This now attempts another short-circuit of the regex engine: if we
123
        // have a huge alternation of just plain literals, then we can just use
124
        // Aho-Corasick for that and avoid the regex engine entirely.
125
        //
126
        // You might think this case would just be handled by
127
        // `Pre::from_prefixes`, but that technique relies on heuristic literal
128
        // extraction from the corresponding `Hir`. That works, but part of
129
        // heuristics limit the size and number of literals returned. This case
130
        // will specifically handle patterns with very large alternations.
131
        //
132
        // One wonders if we should just roll this our heuristic literal
133
        // extraction, and then I think this case could disappear entirely.
134
1
        if let Some(
pre0
) = Pre::from_alternation_literals(info, hirs) {
135
            debug!(
136
                "found plain alternation of literals, \
137
                 avoiding regex engine entirely and using Aho-Corasick"
138
            );
139
0
            return Ok(pre);
140
1
        }
141
1
        prefixes.literals().and_then(|strings| {
142
0
            debug!(
143
0
                "creating prefilter from {} literals: {:?}",
144
0
                strings.len(),
145
0
                strings,
146
0
            );
147
0
            Prefilter::new(kind, strings)
148
1
        })
149
    } else {
150
        debug!("skipping literal extraction since prefilters were disabled");
151
0
        None
152
    };
153
2
    let mut core = Core::new(info.clone(), pre.clone(), hirs)
?0
;
154
    // Now that we have our core regex engines built, there are a few cases
155
    // where we can do a little bit better than just a normal "search forward
156
    // and maybe use a prefilter when in a start state." However, these cases
157
    // may not always work or otherwise build on top of the Core searcher.
158
    // For example, the reverse anchored optimization seems like it might
159
    // always work, but only the DFAs support reverse searching and the DFAs
160
    // might give up or quit for reasons. If we had, e.g., a PikeVM that
161
    // supported reverse searching, then we could avoid building a full Core
162
    // engine for this case.
163
2
    core = match ReverseAnchored::new(core) {
164
2
        Err(core) => core,
165
0
        Ok(ra) => {
166
0
            debug!("using reverse anchored strategy");
167
0
            return Ok(Arc::new(ra));
168
        }
169
    };
170
2
    core = match ReverseSuffix::new(core, hirs) {
171
2
        Err(core) => core,
172
0
        Ok(rs) => {
173
0
            debug!("using reverse suffix strategy");
174
0
            return Ok(Arc::new(rs));
175
        }
176
    };
177
2
    core = match ReverseInner::new(core, hirs) {
178
2
        Err(core) => core,
179
0
        Ok(ri) => {
180
0
            debug!("using reverse inner strategy");
181
0
            return Ok(Arc::new(ri));
182
        }
183
    };
184
    debug!("using core strategy");
185
2
    Ok(Arc::new(core))
186
2
}
187
188
#[derive(Clone, Debug)]
189
struct Pre<P> {
190
    pre: P,
191
    group_info: GroupInfo,
192
}
193
194
impl<P: PrefilterI> Pre<P> {
195
0
    fn new(pre: P) -> Arc<dyn Strategy> {
196
0
        // The only thing we support when we use prefilters directly as a
197
0
        // strategy is the start and end of the overall match for a single
198
0
        // pattern. In other words, exactly one implicit capturing group. Which
199
0
        // is exactly what we use here for a GroupInfo.
200
0
        let group_info = GroupInfo::new([[None::<&str>]]).unwrap();
201
0
        Arc::new(Pre { pre, group_info })
202
0
    }
203
}
204
205
// This is a little weird, but we don't actually care about the type parameter
206
// here because we're selecting which underlying prefilter to use. So we just
207
// define it on an arbitrary type.
208
impl Pre<()> {
209
    /// Given a sequence of prefixes, attempt to return a full `Strategy` using
210
    /// just the prefixes.
211
    ///
212
    /// Basically, this occurs when the prefixes given not just prefixes,
213
    /// but an enumeration of the entire language matched by the regular
214
    /// expression.
215
    ///
216
    /// A number of other conditions need to be true too. For example, there
217
    /// can be only one pattern, the number of explicit capture groups is 0, no
218
    /// look-around assertions and so on.
219
    ///
220
    /// Note that this ignores `Config::get_auto_prefilter` because if this
221
    /// returns something, then it isn't a prefilter but a matcher itself.
222
    /// Therefore, it shouldn't suffer from the problems typical to prefilters
223
    /// (such as a high false positive rate).
224
1
    fn from_prefixes(
225
1
        info: &RegexInfo,
226
1
        prefixes: &literal::Seq,
227
1
    ) -> Option<Arc<dyn Strategy>> {
228
1
        let kind = info.config().get_match_kind();
229
1
        // Check to see if our prefixes are exact, which means we might be
230
1
        // able to bypass the regex engine entirely and just rely on literal
231
1
        // searches.
232
1
        if !prefixes.is_exact() {
233
1
            return None;
234
0
        }
235
0
        // We also require that we have a single regex pattern. Namely,
236
0
        // we reuse the prefilter infrastructure to implement search and
237
0
        // prefilters only report spans. Prefilters don't know about pattern
238
0
        // IDs. The multi-regex case isn't a lost cause, we might still use
239
0
        // Aho-Corasick and we might still just use a regular prefilter, but
240
0
        // that's done below.
241
0
        if info.pattern_len() != 1 {
242
0
            return None;
243
0
        }
244
0
        // We can't have any capture groups either. The literal engines don't
245
0
        // know how to deal with things like '(foo)(bar)'. In that case, a
246
0
        // prefilter will just be used and then the regex engine will resolve
247
0
        // the capture groups.
248
0
        if info.props()[0].explicit_captures_len() != 0 {
249
0
            return None;
250
0
        }
251
0
        // We also require that it has zero look-around assertions. Namely,
252
0
        // literal extraction treats look-around assertions as if they match
253
0
        // *every* empty string. But of course, that isn't true. So for
254
0
        // example, 'foo\bquux' never matches anything, but 'fooquux' is
255
0
        // extracted from that as an exact literal. Such cases should just run
256
0
        // the regex engine. 'fooquux' will be used as a normal prefilter, and
257
0
        // then the regex engine will try to look for an actual match.
258
0
        if !info.props()[0].look_set().is_empty() {
259
0
            return None;
260
0
        }
261
0
        // Finally, currently, our prefilters are all oriented around
262
0
        // leftmost-first match semantics, so don't try to use them if the
263
0
        // caller asked for anything else.
264
0
        if kind != MatchKind::LeftmostFirst {
265
0
            return None;
266
0
        }
267
0
        // The above seems like a lot of requirements to meet, but it applies
268
0
        // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet
269
0
        // the above criteria, for example.
270
0
        //
271
0
        // Note that this is effectively a latency optimization. If we didn't
272
0
        // do this, then the extracted literals would still get bundled into
273
0
        // a prefilter, and every regex engine capable of running unanchored
274
0
        // searches supports prefilters. So this optimization merely sidesteps
275
0
        // having to run the regex engine at all to confirm the match. Thus, it
276
0
        // decreases the latency of a match.
277
0
278
0
        // OK because we know the set is exact and thus finite.
279
0
        let prefixes = prefixes.literals().unwrap();
280
        debug!(
281
            "trying to bypass regex engine by creating \
282
             prefilter from {} literals: {:?}",
283
            prefixes.len(),
284
            prefixes,
285
        );
286
0
        let choice = match prefilter::Choice::new(kind, prefixes) {
287
0
            Some(choice) => choice,
288
            None => {
289
                debug!(
290
                    "regex bypass failed because no prefilter could be built"
291
                );
292
0
                return None;
293
            }
294
        };
295
0
        let strat: Arc<dyn Strategy> = match choice {
296
0
            prefilter::Choice::Memchr(pre) => Pre::new(pre),
297
0
            prefilter::Choice::Memchr2(pre) => Pre::new(pre),
298
0
            prefilter::Choice::Memchr3(pre) => Pre::new(pre),
299
0
            prefilter::Choice::Memmem(pre) => Pre::new(pre),
300
0
            prefilter::Choice::Teddy(pre) => Pre::new(pre),
301
0
            prefilter::Choice::ByteSet(pre) => Pre::new(pre),
302
0
            prefilter::Choice::AhoCorasick(pre) => Pre::new(pre),
303
        };
304
0
        Some(strat)
305
1
    }
306
307
    /// Attempts to extract an alternation of literals, and if it's deemed
308
    /// worth doing, returns an Aho-Corasick prefilter as a strategy.
309
    ///
310
    /// And currently, this only returns something when 'hirs.len() == 1'. This
311
    /// could in theory do something if there are multiple HIRs where all of
312
    /// them are alternation of literals, but I haven't had the time to go down
313
    /// that path yet.
314
1
    fn from_alternation_literals(
315
1
        info: &RegexInfo,
316
1
        hirs: &[&Hir],
317
1
    ) -> Option<Arc<dyn Strategy>> {
318
        use crate::util::prefilter::AhoCorasick;
319
320
1
        let 
lits0
= crate::meta::literal::alternation_literals(info, hirs)?;
321
0
        let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?;
322
0
        Some(Pre::new(ac))
323
1
    }
324
}
325
326
// This implements Strategy for anything that implements PrefilterI.
327
//
328
// Note that this must only be used for regexes of length 1. Multi-regexes
329
// don't work here. The prefilter interface only provides the span of a match
330
// and not the pattern ID. (I did consider making it more expressive, but I
331
// couldn't figure out how to tie everything together elegantly.) Thus, so long
332
// as the regex only contains one pattern, we can simply assume that a match
333
// corresponds to PatternID::ZERO. And indeed, that's what we do here.
334
//
335
// In practice, since this impl is used to report matches directly and thus
336
// completely bypasses the regex engine, we only wind up using this under the
337
// following restrictions:
338
//
339
// * There must be only one pattern. As explained above.
340
// * The literal sequence must be finite and only contain exact literals.
341
// * There must not be any look-around assertions. If there are, the literals
342
// extracted might be exact, but a match doesn't necessarily imply an overall
343
// match. As a trivial example, 'foo\bbar' does not match 'foobar'.
344
// * The pattern must not have any explicit capturing groups. If it does, the
345
// caller might expect them to be resolved. e.g., 'foo(bar)'.
346
//
347
// So when all of those things are true, we use a prefilter directly as a
348
// strategy.
349
//
350
// In the case where the number of patterns is more than 1, we don't use this
351
// but do use a special Aho-Corasick strategy if all of the regexes are just
352
// simple literals or alternations of literals. (We also use the Aho-Corasick
353
// strategy when len(patterns)==1 if the number of literals is large. In that
354
// case, literal extraction gives up and will return an infinite set.)
355
impl<P: PrefilterI> Strategy for Pre<P> {
356
    #[cfg_attr(feature = "perf-inline", inline(always))]
357
0
    fn group_info(&self) -> &GroupInfo {
358
0
        &self.group_info
359
0
    }
360
361
0
    fn create_cache(&self) -> Cache {
362
0
        Cache {
363
0
            capmatches: Captures::all(self.group_info().clone()),
364
0
            pikevm: wrappers::PikeVMCache::none(),
365
0
            backtrack: wrappers::BoundedBacktrackerCache::none(),
366
0
            onepass: wrappers::OnePassCache::none(),
367
0
            hybrid: wrappers::HybridCache::none(),
368
0
            revhybrid: wrappers::ReverseHybridCache::none(),
369
0
        }
370
0
    }
371
372
0
    fn reset_cache(&self, _cache: &mut Cache) {}
373
374
0
    fn is_accelerated(&self) -> bool {
375
0
        self.pre.is_fast()
376
0
    }
377
378
0
    fn memory_usage(&self) -> usize {
379
0
        self.pre.memory_usage()
380
0
    }
381
382
    #[cfg_attr(feature = "perf-inline", inline(always))]
383
0
    fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
384
0
        if input.is_done() {
385
0
            return None;
386
0
        }
387
0
        if input.get_anchored().is_anchored() {
388
0
            return self
389
0
                .pre
390
0
                .prefix(input.haystack(), input.get_span())
391
0
                .map(|sp| Match::new(PatternID::ZERO, sp));
392
0
        }
393
0
        self.pre
394
0
            .find(input.haystack(), input.get_span())
395
0
            .map(|sp| Match::new(PatternID::ZERO, sp))
396
0
    }
397
398
    #[cfg_attr(feature = "perf-inline", inline(always))]
399
0
    fn search_half(
400
0
        &self,
401
0
        cache: &mut Cache,
402
0
        input: &Input<'_>,
403
0
    ) -> Option<HalfMatch> {
404
0
        self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end()))
405
0
    }
406
407
    #[cfg_attr(feature = "perf-inline", inline(always))]
408
0
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
409
0
        self.search(cache, input).is_some()
410
0
    }
411
412
    #[cfg_attr(feature = "perf-inline", inline(always))]
413
0
    fn search_slots(
414
0
        &self,
415
0
        cache: &mut Cache,
416
0
        input: &Input<'_>,
417
0
        slots: &mut [Option<NonMaxUsize>],
418
0
    ) -> Option<PatternID> {
419
0
        let m = self.search(cache, input)?;
420
0
        if let Some(slot) = slots.get_mut(0) {
421
0
            *slot = NonMaxUsize::new(m.start());
422
0
        }
423
0
        if let Some(slot) = slots.get_mut(1) {
424
0
            *slot = NonMaxUsize::new(m.end());
425
0
        }
426
0
        Some(m.pattern())
427
0
    }
428
429
    #[cfg_attr(feature = "perf-inline", inline(always))]
430
0
    fn which_overlapping_matches(
431
0
        &self,
432
0
        cache: &mut Cache,
433
0
        input: &Input<'_>,
434
0
        patset: &mut PatternSet,
435
0
    ) {
436
0
        if self.search(cache, input).is_some() {
437
0
            patset.insert(PatternID::ZERO);
438
0
        }
439
0
    }
440
}
441
442
#[derive(Debug)]
443
struct Core {
444
    info: RegexInfo,
445
    pre: Option<Prefilter>,
446
    nfa: NFA,
447
    nfarev: Option<NFA>,
448
    pikevm: wrappers::PikeVM,
449
    backtrack: wrappers::BoundedBacktracker,
450
    onepass: wrappers::OnePass,
451
    hybrid: wrappers::Hybrid,
452
    dfa: wrappers::DFA,
453
}
454
455
impl Core {
456
2
    fn new(
457
2
        info: RegexInfo,
458
2
        pre: Option<Prefilter>,
459
2
        hirs: &[&Hir],
460
2
    ) -> Result<Core, BuildError> {
461
2
        let mut lookm = LookMatcher::new();
462
2
        lookm.set_line_terminator(info.config().get_line_terminator());
463
2
        let thompson_config = thompson::Config::new()
464
2
            .utf8(info.config().get_utf8_empty())
465
2
            .nfa_size_limit(info.config().get_nfa_size_limit())
466
2
            .shrink(false)
467
2
            .which_captures(info.config().get_which_captures())
468
2
            .look_matcher(lookm);
469
2
        let nfa = thompson::Compiler::new()
470
2
            .configure(thompson_config.clone())
471
2
            .build_many_from_hir(hirs)
472
2
            .map_err(BuildError::nfa)
?0
;
473
        // It's possible for the PikeVM or the BB to fail to build, even though
474
        // at this point, we already have a full NFA in hand. They can fail
475
        // when a Unicode word boundary is used but where Unicode word boundary
476
        // support is disabled at compile time, thus making it impossible to
477
        // match. (Construction can also fail if the NFA was compiled without
478
        // captures, but we always enable that above.)
479
2
        let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)
?0
;
480
2
        let backtrack =
481
2
            wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)
?0
;
482
        // The onepass engine can of course fail to build, but we expect it to
483
        // fail in many cases because it is an optimization that doesn't apply
484
        // to all regexes. The 'OnePass' wrapper encapsulates this failure (and
485
        // logs a message if it occurs).
486
2
        let onepass = wrappers::OnePass::new(&info, &nfa);
487
        // We try to encapsulate whether a particular regex engine should be
488
        // used within each respective wrapper, but the DFAs need a reverse NFA
489
        // to build itself, and we really do not want to build a reverse NFA if
490
        // we know we aren't going to use the lazy DFA. So we do a config check
491
        // up front, which is in practice the only way we won't try to use the
492
        // DFA.
493
2
        let (nfarev, hybrid, dfa) =
494
2
            if !info.config().get_hybrid() && 
!info.config().get_dfa()0
{
495
0
                (None, wrappers::Hybrid::none(), wrappers::DFA::none())
496
            } else {
497
                // FIXME: Technically, we don't quite yet KNOW that we need
498
                // a reverse NFA. It's possible for the DFAs below to both
499
                // fail to build just based on the forward NFA. In which case,
500
                // building the reverse NFA was totally wasted work. But...
501
                // fixing this requires breaking DFA construction apart into
502
                // two pieces: one for the forward part and another for the
503
                // reverse part. Quite annoying. Making it worse, when building
504
                // both DFAs fails, it's quite likely that the NFA is large and
505
                // that it will take quite some time to build the reverse NFA
506
                // too. So... it's really probably worth it to do this!
507
2
                let nfarev = thompson::Compiler::new()
508
2
                    // Currently, reverse NFAs don't support capturing groups,
509
2
                    // so we MUST disable them. But even if we didn't have to,
510
2
                    // we would, because nothing in this crate does anything
511
2
                    // useful with capturing groups in reverse. And of course,
512
2
                    // the lazy DFA ignores capturing groups in all cases.
513
2
                    .configure(
514
2
                        thompson_config
515
2
                            .clone()
516
2
                            .which_captures(WhichCaptures::None)
517
2
                            .reverse(true),
518
2
                    )
519
2
                    .build_many_from_hir(hirs)
520
2
                    .map_err(BuildError::nfa)
?0
;
521
2
                let dfa = if !info.config().get_dfa() {
522
2
                    wrappers::DFA::none()
523
                } else {
524
0
                    wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
525
                };
526
2
                let hybrid = if !info.config().get_hybrid() {
527
0
                    wrappers::Hybrid::none()
528
2
                } else if dfa.is_some() {
529
                    debug!("skipping lazy DFA because we have a full DFA");
530
0
                    wrappers::Hybrid::none()
531
                } else {
532
2
                    wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
533
                };
534
2
                (Some(nfarev), hybrid, dfa)
535
            };
536
2
        Ok(Core {
537
2
            info,
538
2
            pre,
539
2
            nfa,
540
2
            nfarev,
541
2
            pikevm,
542
2
            backtrack,
543
2
            onepass,
544
2
            hybrid,
545
2
            dfa,
546
2
        })
547
2
    }
548
549
    #[cfg_attr(feature = "perf-inline", inline(always))]
550
0
    fn try_search_mayfail(
551
0
        &self,
552
0
        cache: &mut Cache,
553
0
        input: &Input<'_>,
554
0
    ) -> Option<Result<Option<Match>, RetryFailError>> {
555
0
        if let Some(e) = self.dfa.get(input) {
556
            trace!("using full DFA for search at {:?}", input.get_span());
557
0
            Some(e.try_search(input))
558
0
        } else if let Some(e) = self.hybrid.get(input) {
559
            trace!("using lazy DFA for search at {:?}", input.get_span());
560
0
            Some(e.try_search(&mut cache.hybrid, input))
561
        } else {
562
0
            None
563
        }
564
0
    }
565
566
0
    fn search_nofail(
567
0
        &self,
568
0
        cache: &mut Cache,
569
0
        input: &Input<'_>,
570
0
    ) -> Option<Match> {
571
0
        let caps = &mut cache.capmatches;
572
0
        caps.set_pattern(None);
573
        // We manually inline 'try_search_slots_nofail' here because we need to
574
        // borrow from 'cache.capmatches' in this method, but if we do, then
575
        // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a
576
        // classic example of how the borrow checker inhibits decomposition.
577
        // There are of course work-arounds (more types and/or interior
578
        // mutability), but that's more annoying than this IMO.
579
0
        let pid = if let Some(ref e) = self.onepass.get(input) {
580
            trace!("using OnePass for search at {:?}", input.get_span());
581
0
            e.search_slots(&mut cache.onepass, input, caps.slots_mut())
582
0
        } else if let Some(ref e) = self.backtrack.get(input) {
583
            trace!(
584
                "using BoundedBacktracker for search at {:?}",
585
                input.get_span()
586
            );
587
0
            e.search_slots(&mut cache.backtrack, input, caps.slots_mut())
588
        } else {
589
            trace!("using PikeVM for search at {:?}", input.get_span());
590
0
            let e = self.pikevm.get();
591
0
            e.search_slots(&mut cache.pikevm, input, caps.slots_mut())
592
        };
593
0
        caps.set_pattern(pid);
594
0
        caps.get_match()
595
0
    }
596
597
0
    fn search_half_nofail(
598
0
        &self,
599
0
        cache: &mut Cache,
600
0
        input: &Input<'_>,
601
0
    ) -> Option<HalfMatch> {
602
        // Only the lazy/full DFA returns half-matches, since the DFA requires
603
        // a reverse scan to find the start position. These fallback regex
604
        // engines can find the start and end in a single pass, so we just do
605
        // that and throw away the start offset to conform to the API.
606
0
        let m = self.search_nofail(cache, input)?;
607
0
        Some(HalfMatch::new(m.pattern(), m.end()))
608
0
    }
609
610
0
    fn search_slots_nofail(
611
0
        &self,
612
0
        cache: &mut Cache,
613
0
        input: &Input<'_>,
614
0
        slots: &mut [Option<NonMaxUsize>],
615
0
    ) -> Option<PatternID> {
616
0
        if let Some(ref e) = self.onepass.get(input) {
617
            trace!(
618
                "using OnePass for capture search at {:?}",
619
                input.get_span()
620
            );
621
0
            e.search_slots(&mut cache.onepass, input, slots)
622
0
        } else if let Some(ref e) = self.backtrack.get(input) {
623
            trace!(
624
                "using BoundedBacktracker for capture search at {:?}",
625
                input.get_span()
626
            );
627
0
            e.search_slots(&mut cache.backtrack, input, slots)
628
        } else {
629
            trace!(
630
                "using PikeVM for capture search at {:?}",
631
                input.get_span()
632
            );
633
0
            let e = self.pikevm.get();
634
0
            e.search_slots(&mut cache.pikevm, input, slots)
635
        }
636
0
    }
637
638
0
    fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
639
0
        if let Some(ref e) = self.onepass.get(input) {
640
            trace!(
641
                "using OnePass for is-match search at {:?}",
642
                input.get_span()
643
            );
644
0
            e.search_slots(&mut cache.onepass, input, &mut []).is_some()
645
0
        } else if let Some(ref e) = self.backtrack.get(input) {
646
            trace!(
647
                "using BoundedBacktracker for is-match search at {:?}",
648
                input.get_span()
649
            );
650
0
            e.is_match(&mut cache.backtrack, input)
651
        } else {
652
            trace!(
653
                "using PikeVM for is-match search at {:?}",
654
                input.get_span()
655
            );
656
0
            let e = self.pikevm.get();
657
0
            e.is_match(&mut cache.pikevm, input)
658
        }
659
0
    }
660
661
0
    fn is_capture_search_needed(&self, slots_len: usize) -> bool {
662
0
        slots_len > self.nfa.group_info().implicit_slot_len()
663
0
    }
664
}
665
666
impl Strategy for Core {
667
    #[cfg_attr(feature = "perf-inline", inline(always))]
668
2
    fn group_info(&self) -> &GroupInfo {
669
2
        self.nfa.group_info()
670
2
    }
671
672
    #[cfg_attr(feature = "perf-inline", inline(always))]
673
2
    fn create_cache(&self) -> Cache {
674
2
        Cache {
675
2
            capmatches: Captures::all(self.group_info().clone()),
676
2
            pikevm: self.pikevm.create_cache(),
677
2
            backtrack: self.backtrack.create_cache(),
678
2
            onepass: self.onepass.create_cache(),
679
2
            hybrid: self.hybrid.create_cache(),
680
2
            revhybrid: wrappers::ReverseHybridCache::none(),
681
2
        }
682
2
    }
683
684
    #[cfg_attr(feature = "perf-inline", inline(always))]
685
0
    fn reset_cache(&self, cache: &mut Cache) {
686
0
        cache.pikevm.reset(&self.pikevm);
687
0
        cache.backtrack.reset(&self.backtrack);
688
0
        cache.onepass.reset(&self.onepass);
689
0
        cache.hybrid.reset(&self.hybrid);
690
0
    }
691
692
0
    fn is_accelerated(&self) -> bool {
693
0
        self.pre.as_ref().map_or(false, |pre| pre.is_fast())
694
0
    }
695
696
0
    fn memory_usage(&self) -> usize {
697
0
        self.info.memory_usage()
698
0
            + self.pre.as_ref().map_or(0, |pre| pre.memory_usage())
699
0
            + self.nfa.memory_usage()
700
0
            + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage())
701
0
            + self.onepass.memory_usage()
702
0
            + self.dfa.memory_usage()
703
0
    }
704
705
    #[cfg_attr(feature = "perf-inline", inline(always))]
706
0
    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
707
        // We manually inline try_search_mayfail here because letting the
708
        // compiler do it seems to produce pretty crappy codegen.
709
0
        return if let Some(e) = self.dfa.get(input) {
710
            trace!("using full DFA for full search at {:?}", input.get_span());
711
0
            match e.try_search(input) {
712
0
                Ok(x) => x,
713
0
                Err(_err) => {
714
0
                    trace!("full DFA search failed: {}", _err);
715
0
                    self.search_nofail(cache, input)
716
                }
717
            }
718
0
        } else if let Some(e) = self.hybrid.get(input) {
719
            trace!("using lazy DFA for full search at {:?}", input.get_span());
720
0
            match e.try_search(&mut cache.hybrid, input) {
721
0
                Ok(x) => x,
722
0
                Err(_err) => {
723
0
                    trace!("lazy DFA search failed: {}", _err);
724
0
                    self.search_nofail(cache, input)
725
                }
726
            }
727
        } else {
728
0
            self.search_nofail(cache, input)
729
        };
730
0
    }
731
732
    #[cfg_attr(feature = "perf-inline", inline(always))]
733
300
    fn search_half(
734
300
        &self,
735
300
        cache: &mut Cache,
736
300
        input: &Input<'_>,
737
300
    ) -> Option<HalfMatch> {
738
        // The main difference with 'search' is that if we're using a DFA, we
739
        // can use a single forward scan without needing to run the reverse
740
        // DFA.
741
300
        if let Some(
e0
) = self.dfa.get(input) {
742
            trace!("using full DFA for half search at {:?}", input.get_span());
743
0
            match e.try_search_half_fwd(input) {
744
0
                Ok(x) => x,
745
0
                Err(_err) => {
746
0
                    trace!("full DFA half search failed: {}", _err);
747
0
                    self.search_half_nofail(cache, input)
748
                }
749
            }
750
300
        } else if let Some(e) = self.hybrid.get(input) {
751
            trace!("using lazy DFA for half search at {:?}", input.get_span());
752
300
            match e.try_search_half_fwd(&mut cache.hybrid, input) {
753
300
                Ok(x) => x,
754
0
                Err(_err) => {
755
0
                    trace!("lazy DFA half search failed: {}", _err);
756
0
                    self.search_half_nofail(cache, input)
757
                }
758
            }
759
        } else {
760
0
            self.search_half_nofail(cache, input)
761
        }
762
300
    }
763
764
    #[cfg_attr(feature = "perf-inline", inline(always))]
765
0
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
766
0
        if let Some(e) = self.dfa.get(input) {
767
            trace!(
768
                "using full DFA for is-match search at {:?}",
769
                input.get_span()
770
            );
771
0
            match e.try_search_half_fwd(input) {
772
0
                Ok(x) => x.is_some(),
773
0
                Err(_err) => {
774
0
                    trace!("full DFA half search failed: {}", _err);
775
0
                    self.is_match_nofail(cache, input)
776
                }
777
            }
778
0
        } else if let Some(e) = self.hybrid.get(input) {
779
            trace!(
780
                "using lazy DFA for is-match search at {:?}",
781
                input.get_span()
782
            );
783
0
            match e.try_search_half_fwd(&mut cache.hybrid, input) {
784
0
                Ok(x) => x.is_some(),
785
0
                Err(_err) => {
786
0
                    trace!("lazy DFA half search failed: {}", _err);
787
0
                    self.is_match_nofail(cache, input)
788
                }
789
            }
790
        } else {
791
0
            self.is_match_nofail(cache, input)
792
        }
793
0
    }
794
795
    #[cfg_attr(feature = "perf-inline", inline(always))]
796
0
    fn search_slots(
797
0
        &self,
798
0
        cache: &mut Cache,
799
0
        input: &Input<'_>,
800
0
        slots: &mut [Option<NonMaxUsize>],
801
0
    ) -> Option<PatternID> {
802
0
        // Even if the regex has explicit capture groups, if the caller didn't
803
0
        // provide any explicit slots, then it doesn't make sense to try and do
804
0
        // extra work to get offsets for those slots. Ideally the caller should
805
0
        // realize this and not call this routine in the first place, but alas,
806
0
        // we try to save the caller from themselves if they do.
807
0
        if !self.is_capture_search_needed(slots.len()) {
808
            trace!("asked for slots unnecessarily, trying fast path");
809
0
            let m = self.search(cache, input)?;
810
0
            copy_match_to_slots(m, slots);
811
0
            return Some(m.pattern());
812
0
        }
813
0
        // If the onepass DFA is available for this search (which only happens
814
0
        // when it's anchored), then skip running a fallible DFA. The onepass
815
0
        // DFA isn't as fast as a full or lazy DFA, but it is typically quite
816
0
        // a bit faster than the backtracker or the PikeVM. So it isn't as
817
0
        // advantageous to try and do a full/lazy DFA scan first.
818
0
        //
819
0
        // We still theorize that it's better to do a full/lazy DFA scan, even
820
0
        // when it's anchored, because it's usually much faster and permits us
821
0
        // to say "no match" much more quickly. This does hurt the case of,
822
0
        // say, parsing each line in a log file into capture groups, because
823
0
        // in that case, the line always matches. So the lazy DFA scan is
824
0
        // usually just wasted work. But, the lazy DFA is usually quite fast
825
0
        // and doesn't cost too much here.
826
0
        if self.onepass.get(&input).is_some() {
827
0
            return self.search_slots_nofail(cache, &input, slots);
828
0
        }
829
0
        let m = match self.try_search_mayfail(cache, input) {
830
0
            Some(Ok(Some(m))) => m,
831
0
            Some(Ok(None)) => return None,
832
0
            Some(Err(_err)) => {
833
0
                trace!("fast capture search failed: {}", _err);
834
0
                return self.search_slots_nofail(cache, input, slots);
835
            }
836
            None => {
837
0
                return self.search_slots_nofail(cache, input, slots);
838
            }
839
        };
840
        // At this point, now that we've found the bounds of the
841
        // match, we need to re-run something that can resolve
842
        // capturing groups. But we only need to run on it on the
843
        // match bounds and not the entire haystack.
844
        trace!(
845
            "match found at {}..{} in capture search, \
846
         using another engine to find captures",
847
            m.start(),
848
            m.end(),
849
        );
850
0
        let input = input
851
0
            .clone()
852
0
            .span(m.start()..m.end())
853
0
            .anchored(Anchored::Pattern(m.pattern()));
854
0
        Some(
855
0
            self.search_slots_nofail(cache, &input, slots)
856
0
                .expect("should find a match"),
857
0
        )
858
0
    }
859
860
    #[cfg_attr(feature = "perf-inline", inline(always))]
861
0
    fn which_overlapping_matches(
862
0
        &self,
863
0
        cache: &mut Cache,
864
0
        input: &Input<'_>,
865
0
        patset: &mut PatternSet,
866
0
    ) {
867
0
        if let Some(e) = self.dfa.get(input) {
868
            trace!(
869
                "using full DFA for overlapping search at {:?}",
870
                input.get_span()
871
            );
872
0
            let _err = match e.try_which_overlapping_matches(input, patset) {
873
0
                Ok(()) => return,
874
0
                Err(err) => err,
875
            };
876
            trace!("fast overlapping search failed: {}", _err);
877
0
        } else if let Some(e) = self.hybrid.get(input) {
878
            trace!(
879
                "using lazy DFA for overlapping search at {:?}",
880
                input.get_span()
881
            );
882
0
            let _err = match e.try_which_overlapping_matches(
883
0
                &mut cache.hybrid,
884
0
                input,
885
0
                patset,
886
0
            ) {
887
                Ok(()) => {
888
0
                    return;
889
                }
890
0
                Err(err) => err,
891
            };
892
            trace!("fast overlapping search failed: {}", _err);
893
0
        }
894
        trace!(
895
            "using PikeVM for overlapping search at {:?}",
896
            input.get_span()
897
        );
898
0
        let e = self.pikevm.get();
899
0
        e.which_overlapping_matches(&mut cache.pikevm, input, patset)
900
0
    }
901
}
902
903
#[derive(Debug)]
904
struct ReverseAnchored {
905
    core: Core,
906
}
907
908
impl ReverseAnchored {
909
2
    fn new(core: Core) -> Result<ReverseAnchored, Core> {
910
2
        if !core.info.is_always_anchored_end() {
911
            debug!(
912
                "skipping reverse anchored optimization because \
913
         the regex is not always anchored at the end"
914
            );
915
1
            return Err(core);
916
1
        }
917
1
        // Note that the caller can still request an anchored search even when
918
1
        // the regex isn't anchored at the start. We detect that case in the
919
1
        // search routines below and just fallback to the core engine. This
920
1
        // is fine because both searches are anchored. It's just a matter of
921
1
        // picking one. Falling back to the core engine is a little simpler,
922
1
        // since if we used the reverse anchored approach, we'd have to add an
923
1
        // extra check to ensure the match reported starts at the place where
924
1
        // the caller requested the search to start.
925
1
        if core.info.is_always_anchored_start() {
926
            debug!(
927
                "skipping reverse anchored optimization because \
928
         the regex is also anchored at the start"
929
            );
930
1
            return Err(core);
931
0
        }
932
0
        // Only DFAs can do reverse searches (currently), so we need one of
933
0
        // them in order to do this optimization. It's possible (although
934
0
        // pretty unlikely) that we have neither and need to give up.
935
0
        if !core.hybrid.is_some() && !core.dfa.is_some() {
936
            debug!(
937
                "skipping reverse anchored optimization because \
938
         we don't have a lazy DFA or a full DFA"
939
            );
940
0
            return Err(core);
941
0
        }
942
0
        Ok(ReverseAnchored { core })
943
2
    }
944
945
    #[cfg_attr(feature = "perf-inline", inline(always))]
946
0
    fn try_search_half_anchored_rev(
947
0
        &self,
948
0
        cache: &mut Cache,
949
0
        input: &Input<'_>,
950
0
    ) -> Result<Option<HalfMatch>, RetryFailError> {
951
0
        // We of course always want an anchored search. In theory, the
952
0
        // underlying regex engines should automatically enable anchored
953
0
        // searches since the regex is itself anchored, but this more clearly
954
0
        // expresses intent and is always correct.
955
0
        let input = input.clone().anchored(Anchored::Yes);
956
0
        if let Some(e) = self.core.dfa.get(&input) {
957
            trace!(
958
                "using full DFA for reverse anchored search at {:?}",
959
                input.get_span()
960
            );
961
0
            e.try_search_half_rev(&input)
962
0
        } else if let Some(e) = self.core.hybrid.get(&input) {
963
            trace!(
964
                "using lazy DFA for reverse anchored search at {:?}",
965
                input.get_span()
966
            );
967
0
            e.try_search_half_rev(&mut cache.hybrid, &input)
968
        } else {
969
0
            unreachable!("ReverseAnchored always has a DFA")
970
        }
971
0
    }
972
}
973
974
// Note that in this impl, we don't check that 'input.end() ==
975
// input.haystack().len()'. In particular, when that condition is false, a
976
// match is always impossible because we know that the regex is always anchored
977
// at the end (or else 'ReverseAnchored' won't be built). We don't check that
978
// here because the 'Regex' wrapper actually does that for us in all cases.
979
// Thus, in this impl, we can actually assume that the end position in 'input'
980
// is equivalent to the length of the haystack.
981
impl Strategy for ReverseAnchored {
982
    #[cfg_attr(feature = "perf-inline", inline(always))]
983
0
    fn group_info(&self) -> &GroupInfo {
984
0
        self.core.group_info()
985
0
    }
986
987
    #[cfg_attr(feature = "perf-inline", inline(always))]
988
0
    fn create_cache(&self) -> Cache {
989
0
        self.core.create_cache()
990
0
    }
991
992
    #[cfg_attr(feature = "perf-inline", inline(always))]
993
0
    fn reset_cache(&self, cache: &mut Cache) {
994
0
        self.core.reset_cache(cache);
995
0
    }
996
997
0
    fn is_accelerated(&self) -> bool {
998
0
        // Since this is anchored at the end, a reverse anchored search is
999
0
        // almost certainly guaranteed to result in a much faster search than
1000
0
        // a standard forward search.
1001
0
        true
1002
0
    }
1003
1004
0
    fn memory_usage(&self) -> usize {
1005
0
        self.core.memory_usage()
1006
0
    }
1007
1008
    #[cfg_attr(feature = "perf-inline", inline(always))]
1009
0
    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1010
0
        if input.get_anchored().is_anchored() {
1011
0
            return self.core.search(cache, input);
1012
0
        }
1013
0
        match self.try_search_half_anchored_rev(cache, input) {
1014
0
            Err(_err) => {
1015
0
                trace!("fast reverse anchored search failed: {}", _err);
1016
0
                self.core.search_nofail(cache, input)
1017
            }
1018
0
            Ok(None) => None,
1019
0
            Ok(Some(hm)) => {
1020
0
                Some(Match::new(hm.pattern(), hm.offset()..input.end()))
1021
            }
1022
        }
1023
0
    }
1024
1025
    #[cfg_attr(feature = "perf-inline", inline(always))]
1026
0
    fn search_half(
1027
0
        &self,
1028
0
        cache: &mut Cache,
1029
0
        input: &Input<'_>,
1030
0
    ) -> Option<HalfMatch> {
1031
0
        if input.get_anchored().is_anchored() {
1032
0
            return self.core.search_half(cache, input);
1033
0
        }
1034
0
        match self.try_search_half_anchored_rev(cache, input) {
1035
0
            Err(_err) => {
1036
0
                trace!("fast reverse anchored search failed: {}", _err);
1037
0
                self.core.search_half_nofail(cache, input)
1038
            }
1039
0
            Ok(None) => None,
1040
0
            Ok(Some(hm)) => {
1041
0
                // Careful here! 'try_search_half' is a *forward* search that
1042
0
                // only cares about the *end* position of a match. But
1043
0
                // 'hm.offset()' is actually the start of the match. So we
1044
0
                // actually just throw that away here and, since we know we
1045
0
                // have a match, return the only possible position at which a
1046
0
                // match can occur: input.end().
1047
0
                Some(HalfMatch::new(hm.pattern(), input.end()))
1048
            }
1049
        }
1050
0
    }
1051
1052
    #[cfg_attr(feature = "perf-inline", inline(always))]
1053
0
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1054
0
        if input.get_anchored().is_anchored() {
1055
0
            return self.core.is_match(cache, input);
1056
0
        }
1057
0
        match self.try_search_half_anchored_rev(cache, input) {
1058
0
            Err(_err) => {
1059
0
                trace!("fast reverse anchored search failed: {}", _err);
1060
0
                self.core.is_match_nofail(cache, input)
1061
            }
1062
0
            Ok(None) => false,
1063
0
            Ok(Some(_)) => true,
1064
        }
1065
0
    }
1066
1067
    #[cfg_attr(feature = "perf-inline", inline(always))]
1068
0
    fn search_slots(
1069
0
        &self,
1070
0
        cache: &mut Cache,
1071
0
        input: &Input<'_>,
1072
0
        slots: &mut [Option<NonMaxUsize>],
1073
0
    ) -> Option<PatternID> {
1074
0
        if input.get_anchored().is_anchored() {
1075
0
            return self.core.search_slots(cache, input, slots);
1076
0
        }
1077
0
        match self.try_search_half_anchored_rev(cache, input) {
1078
0
            Err(_err) => {
1079
0
                trace!("fast reverse anchored search failed: {}", _err);
1080
0
                self.core.search_slots_nofail(cache, input, slots)
1081
            }
1082
0
            Ok(None) => None,
1083
0
            Ok(Some(hm)) => {
1084
0
                if !self.core.is_capture_search_needed(slots.len()) {
1085
                    trace!("asked for slots unnecessarily, skipping captures");
1086
0
                    let m = Match::new(hm.pattern(), hm.offset()..input.end());
1087
0
                    copy_match_to_slots(m, slots);
1088
0
                    return Some(m.pattern());
1089
0
                }
1090
0
                let start = hm.offset();
1091
0
                let input = input
1092
0
                    .clone()
1093
0
                    .span(start..input.end())
1094
0
                    .anchored(Anchored::Pattern(hm.pattern()));
1095
0
                self.core.search_slots_nofail(cache, &input, slots)
1096
            }
1097
        }
1098
0
    }
1099
1100
    #[cfg_attr(feature = "perf-inline", inline(always))]
1101
0
    fn which_overlapping_matches(
1102
0
        &self,
1103
0
        cache: &mut Cache,
1104
0
        input: &Input<'_>,
1105
0
        patset: &mut PatternSet,
1106
0
    ) {
1107
0
        // It seems like this could probably benefit from a reverse anchored
1108
0
        // optimization, perhaps by doing an overlapping reverse search (which
1109
0
        // the DFAs do support). I haven't given it much thought though, and
1110
0
        // I'm currently focus more on the single pattern case.
1111
0
        self.core.which_overlapping_matches(cache, input, patset)
1112
0
    }
1113
}
1114
1115
#[derive(Debug)]
1116
struct ReverseSuffix {
1117
    core: Core,
1118
    pre: Prefilter,
1119
}
1120
1121
impl ReverseSuffix {
1122
2
    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseSuffix, Core> {
1123
2
        if !core.info.config().get_auto_prefilter() {
1124
            debug!(
1125
                "skipping reverse suffix optimization because \
1126
                 automatic prefilters are disabled"
1127
            );
1128
0
            return Err(core);
1129
2
        }
1130
2
        // Like the reverse inner optimization, we don't do this for regexes
1131
2
        // that are always anchored. It could lead to scanning too much, but
1132
2
        // could say "no match" much more quickly than running the regex
1133
2
        // engine if the initial literal scan doesn't match. With that said,
1134
2
        // the reverse suffix optimization has lower overhead, since it only
1135
2
        // requires a reverse scan after a literal match to confirm or reject
1136
2
        // the match. (Although, in the case of confirmation, it then needs to
1137
2
        // do another forward scan to find the end position.)
1138
2
        //
1139
2
        // Note that the caller can still request an anchored search even
1140
2
        // when the regex isn't anchored. We detect that case in the search
1141
2
        // routines below and just fallback to the core engine. Currently this
1142
2
        // optimization assumes all searches are unanchored, so if we do want
1143
2
        // to enable this optimization for anchored searches, it will need a
1144
2
        // little work to support it.
1145
2
        if core.info.is_always_anchored_start() {
1146
            debug!(
1147
                "skipping reverse suffix optimization because \
1148
         the regex is always anchored at the start",
1149
            );
1150
1
            return Err(core);
1151
1
        }
1152
1
        // Only DFAs can do reverse searches (currently), so we need one of
1153
1
        // them in order to do this optimization. It's possible (although
1154
1
        // pretty unlikely) that we have neither and need to give up.
1155
1
        if !core.hybrid.is_some() && 
!core.dfa.is_some()0
{
1156
            debug!(
1157
                "skipping reverse suffix optimization because \
1158
         we don't have a lazy DFA or a full DFA"
1159
            );
1160
0
            return Err(core);
1161
1
        }
1162
1
        if core.pre.as_ref().map_or(false, |p| 
p.is_fast()0
) {
1163
            debug!(
1164
                "skipping reverse suffix optimization because \
1165
         we already have a prefilter that we think is fast"
1166
            );
1167
0
            return Err(core);
1168
1
        }
1169
1
        let kind = core.info.config().get_match_kind();
1170
1
        let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171
1
        let 
lcs0
= match suffixes.longest_common_suffix() {
1172
            None => {
1173
                debug!(
1174
                    "skipping reverse suffix optimization because \
1175
                     a longest common suffix could not be found",
1176
                );
1177
1
                return Err(core);
1178
            }
1179
0
            Some(lcs) if lcs.is_empty() => {
1180
0
                debug!(
1181
0
                    "skipping reverse suffix optimization because \
1182
0
                     the longest common suffix is the empty string",
1183
0
                );
1184
0
                return Err(core);
1185
            }
1186
0
            Some(lcs) => lcs,
1187
        };
1188
0
        let pre = match Prefilter::new(kind, &[lcs]) {
1189
0
            Some(pre) => pre,
1190
            None => {
1191
                debug!(
1192
                    "skipping reverse suffix optimization because \
1193
                     a prefilter could not be constructed from the \
1194
                     longest common suffix",
1195
                );
1196
0
                return Err(core);
1197
            }
1198
        };
1199
0
        if !pre.is_fast() {
1200
            debug!(
1201
                "skipping reverse suffix optimization because \
1202
         while we have a suffix prefilter, it is not \
1203
         believed to be 'fast'"
1204
            );
1205
0
            return Err(core);
1206
0
        }
1207
0
        Ok(ReverseSuffix { core, pre })
1208
2
    }
1209
1210
    #[cfg_attr(feature = "perf-inline", inline(always))]
1211
0
    fn try_search_half_start(
1212
0
        &self,
1213
0
        cache: &mut Cache,
1214
0
        input: &Input<'_>,
1215
0
    ) -> Result<Option<HalfMatch>, RetryError> {
1216
0
        let mut span = input.get_span();
1217
0
        let mut min_start = 0;
1218
        loop {
1219
0
            let litmatch = match self.pre.find(input.haystack(), span) {
1220
0
                None => return Ok(None),
1221
0
                Some(span) => span,
1222
0
            };
1223
0
            trace!("reverse suffix scan found suffix match at {:?}", litmatch);
1224
0
            let revinput = input
1225
0
                .clone()
1226
0
                .anchored(Anchored::Yes)
1227
0
                .span(input.start()..litmatch.end);
1228
0
            match self
1229
0
                .try_search_half_rev_limited(cache, &revinput, min_start)?
1230
            {
1231
                None => {
1232
0
                    if span.start >= span.end {
1233
0
                        break;
1234
0
                    }
1235
0
                    span.start = litmatch.start.checked_add(1).unwrap();
1236
                }
1237
0
                Some(hm) => return Ok(Some(hm)),
1238
            }
1239
0
            min_start = litmatch.end;
1240
        }
1241
0
        Ok(None)
1242
0
    }
1243
1244
    #[cfg_attr(feature = "perf-inline", inline(always))]
1245
0
    fn try_search_half_fwd(
1246
0
        &self,
1247
0
        cache: &mut Cache,
1248
0
        input: &Input<'_>,
1249
0
    ) -> Result<Option<HalfMatch>, RetryFailError> {
1250
0
        if let Some(e) = self.core.dfa.get(&input) {
1251
            trace!(
1252
                "using full DFA for forward reverse suffix search at {:?}",
1253
                input.get_span()
1254
            );
1255
0
            e.try_search_half_fwd(&input)
1256
0
        } else if let Some(e) = self.core.hybrid.get(&input) {
1257
            trace!(
1258
                "using lazy DFA for forward reverse suffix search at {:?}",
1259
                input.get_span()
1260
            );
1261
0
            e.try_search_half_fwd(&mut cache.hybrid, &input)
1262
        } else {
1263
0
            unreachable!("ReverseSuffix always has a DFA")
1264
        }
1265
0
    }
1266
1267
    #[cfg_attr(feature = "perf-inline", inline(always))]
1268
0
    fn try_search_half_rev_limited(
1269
0
        &self,
1270
0
        cache: &mut Cache,
1271
0
        input: &Input<'_>,
1272
0
        min_start: usize,
1273
0
    ) -> Result<Option<HalfMatch>, RetryError> {
1274
0
        if let Some(e) = self.core.dfa.get(&input) {
1275
            trace!(
1276
                "using full DFA for reverse suffix search at {:?}, \
1277
                 but will be stopped at {} to avoid quadratic behavior",
1278
                input.get_span(),
1279
                min_start,
1280
            );
1281
0
            e.try_search_half_rev_limited(&input, min_start)
1282
0
        } else if let Some(e) = self.core.hybrid.get(&input) {
1283
            trace!(
1284
                "using lazy DFA for reverse suffix search at {:?}, \
1285
                 but will be stopped at {} to avoid quadratic behavior",
1286
                input.get_span(),
1287
                min_start,
1288
            );
1289
0
            e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start)
1290
        } else {
1291
0
            unreachable!("ReverseSuffix always has a DFA")
1292
        }
1293
0
    }
1294
}
1295
1296
impl Strategy for ReverseSuffix {
1297
    #[cfg_attr(feature = "perf-inline", inline(always))]
1298
0
    fn group_info(&self) -> &GroupInfo {
1299
0
        self.core.group_info()
1300
0
    }
1301
1302
    #[cfg_attr(feature = "perf-inline", inline(always))]
1303
0
    fn create_cache(&self) -> Cache {
1304
0
        self.core.create_cache()
1305
0
    }
1306
1307
    #[cfg_attr(feature = "perf-inline", inline(always))]
1308
0
    fn reset_cache(&self, cache: &mut Cache) {
1309
0
        self.core.reset_cache(cache);
1310
0
    }
1311
1312
0
    fn is_accelerated(&self) -> bool {
1313
0
        self.pre.is_fast()
1314
0
    }
1315
1316
0
    fn memory_usage(&self) -> usize {
1317
0
        self.core.memory_usage() + self.pre.memory_usage()
1318
0
    }
1319
1320
    #[cfg_attr(feature = "perf-inline", inline(always))]
1321
0
    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1322
0
        if input.get_anchored().is_anchored() {
1323
0
            return self.core.search(cache, input);
1324
0
        }
1325
0
        match self.try_search_half_start(cache, input) {
1326
0
            Err(RetryError::Quadratic(_err)) => {
1327
0
                trace!("reverse suffix optimization failed: {}", _err);
1328
0
                self.core.search(cache, input)
1329
            }
1330
0
            Err(RetryError::Fail(_err)) => {
1331
0
                trace!("reverse suffix reverse fast search failed: {}", _err);
1332
0
                self.core.search_nofail(cache, input)
1333
            }
1334
0
            Ok(None) => None,
1335
0
            Ok(Some(hm_start)) => {
1336
0
                let fwdinput = input
1337
0
                    .clone()
1338
0
                    .anchored(Anchored::Pattern(hm_start.pattern()))
1339
0
                    .span(hm_start.offset()..input.end());
1340
0
                match self.try_search_half_fwd(cache, &fwdinput) {
1341
0
                    Err(_err) => {
1342
0
                        trace!(
1343
0
                            "reverse suffix forward fast search failed: {}",
1344
0
                            _err
1345
0
                        );
1346
0
                        self.core.search_nofail(cache, input)
1347
                    }
1348
                    Ok(None) => {
1349
0
                        unreachable!(
1350
0
                            "suffix match plus reverse match implies \
1351
0
                 there must be a match",
1352
0
                        )
1353
                    }
1354
0
                    Ok(Some(hm_end)) => Some(Match::new(
1355
0
                        hm_start.pattern(),
1356
0
                        hm_start.offset()..hm_end.offset(),
1357
0
                    )),
1358
                }
1359
            }
1360
        }
1361
0
    }
1362
1363
    #[cfg_attr(feature = "perf-inline", inline(always))]
1364
0
    fn search_half(
1365
0
        &self,
1366
0
        cache: &mut Cache,
1367
0
        input: &Input<'_>,
1368
0
    ) -> Option<HalfMatch> {
1369
0
        if input.get_anchored().is_anchored() {
1370
0
            return self.core.search_half(cache, input);
1371
0
        }
1372
0
        match self.try_search_half_start(cache, input) {
1373
0
            Err(RetryError::Quadratic(_err)) => {
1374
0
                trace!("reverse suffix half optimization failed: {}", _err);
1375
0
                self.core.search_half(cache, input)
1376
            }
1377
0
            Err(RetryError::Fail(_err)) => {
1378
0
                trace!(
1379
0
                    "reverse suffix reverse fast half search failed: {}",
1380
0
                    _err
1381
0
                );
1382
0
                self.core.search_half_nofail(cache, input)
1383
            }
1384
0
            Ok(None) => None,
1385
0
            Ok(Some(hm_start)) => {
1386
0
                // This is a bit subtle. It is tempting to just stop searching
1387
0
                // at this point and return a half-match with an offset
1388
0
                // corresponding to where the suffix was found. But the suffix
1389
0
                // match does not necessarily correspond to the end of the
1390
0
                // proper leftmost-first match. Consider /[a-z]+ing/ against
1391
0
                // 'tingling'. The first suffix match is the first 'ing', and
1392
0
                // the /[a-z]+/ matches the 't'. So if we stopped here, then
1393
0
                // we'd report 'ting' as the match. But 'tingling' is the
1394
0
                // correct match because of greediness.
1395
0
                let fwdinput = input
1396
0
                    .clone()
1397
0
                    .anchored(Anchored::Pattern(hm_start.pattern()))
1398
0
                    .span(hm_start.offset()..input.end());
1399
0
                match self.try_search_half_fwd(cache, &fwdinput) {
1400
0
                    Err(_err) => {
1401
0
                        trace!(
1402
0
                            "reverse suffix forward fast search failed: {}",
1403
0
                            _err
1404
0
                        );
1405
0
                        self.core.search_half_nofail(cache, input)
1406
                    }
1407
                    Ok(None) => {
1408
0
                        unreachable!(
1409
0
                            "suffix match plus reverse match implies \
1410
0
                 there must be a match",
1411
0
                        )
1412
                    }
1413
0
                    Ok(Some(hm_end)) => Some(hm_end),
1414
                }
1415
            }
1416
        }
1417
0
    }
1418
1419
    #[cfg_attr(feature = "perf-inline", inline(always))]
1420
0
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1421
0
        if input.get_anchored().is_anchored() {
1422
0
            return self.core.is_match(cache, input);
1423
0
        }
1424
0
        match self.try_search_half_start(cache, input) {
1425
0
            Err(RetryError::Quadratic(_err)) => {
1426
0
                trace!("reverse suffix half optimization failed: {}", _err);
1427
0
                self.core.is_match_nofail(cache, input)
1428
            }
1429
0
            Err(RetryError::Fail(_err)) => {
1430
0
                trace!(
1431
0
                    "reverse suffix reverse fast half search failed: {}",
1432
0
                    _err
1433
0
                );
1434
0
                self.core.is_match_nofail(cache, input)
1435
            }
1436
0
            Ok(None) => false,
1437
0
            Ok(Some(_)) => true,
1438
        }
1439
0
    }
1440
1441
    #[cfg_attr(feature = "perf-inline", inline(always))]
1442
0
    fn search_slots(
1443
0
        &self,
1444
0
        cache: &mut Cache,
1445
0
        input: &Input<'_>,
1446
0
        slots: &mut [Option<NonMaxUsize>],
1447
0
    ) -> Option<PatternID> {
1448
0
        if input.get_anchored().is_anchored() {
1449
0
            return self.core.search_slots(cache, input, slots);
1450
0
        }
1451
0
        if !self.core.is_capture_search_needed(slots.len()) {
1452
            trace!("asked for slots unnecessarily, trying fast path");
1453
0
            let m = self.search(cache, input)?;
1454
0
            copy_match_to_slots(m, slots);
1455
0
            return Some(m.pattern());
1456
0
        }
1457
0
        let hm_start = match self.try_search_half_start(cache, input) {
1458
0
            Err(RetryError::Quadratic(_err)) => {
1459
0
                trace!(
1460
0
                    "reverse suffix captures optimization failed: {}",
1461
0
                    _err
1462
0
                );
1463
0
                return self.core.search_slots(cache, input, slots);
1464
            }
1465
0
            Err(RetryError::Fail(_err)) => {
1466
0
                trace!(
1467
0
                    "reverse suffix reverse fast captures search failed: {}",
1468
0
                    _err
1469
0
                );
1470
0
                return self.core.search_slots_nofail(cache, input, slots);
1471
            }
1472
0
            Ok(None) => return None,
1473
0
            Ok(Some(hm_start)) => hm_start,
1474
0
        };
1475
0
        trace!(
1476
0
            "match found at {}..{} in capture search, \
1477
0
         using another engine to find captures",
1478
0
            hm_start.offset(),
1479
0
            input.end(),
1480
0
        );
1481
0
        let start = hm_start.offset();
1482
0
        let input = input
1483
0
            .clone()
1484
0
            .span(start..input.end())
1485
0
            .anchored(Anchored::Pattern(hm_start.pattern()));
1486
0
        self.core.search_slots_nofail(cache, &input, slots)
1487
0
    }
1488
1489
    #[cfg_attr(feature = "perf-inline", inline(always))]
1490
0
    fn which_overlapping_matches(
1491
0
        &self,
1492
0
        cache: &mut Cache,
1493
0
        input: &Input<'_>,
1494
0
        patset: &mut PatternSet,
1495
0
    ) {
1496
0
        self.core.which_overlapping_matches(cache, input, patset)
1497
0
    }
1498
}
1499
1500
#[derive(Debug)]
1501
struct ReverseInner {
1502
    core: Core,
1503
    preinner: Prefilter,
1504
    nfarev: NFA,
1505
    hybrid: wrappers::ReverseHybrid,
1506
    dfa: wrappers::ReverseDFA,
1507
}
1508
1509
impl ReverseInner {
1510
2
    fn new(core: Core, hirs: &[&Hir]) -> Result<ReverseInner, Core> {
1511
2
        if !core.info.config().get_auto_prefilter() {
1512
            debug!(
1513
                "skipping reverse inner optimization because \
1514
                 automatic prefilters are disabled"
1515
            );
1516
0
            return Err(core);
1517
2
        }
1518
2
        // Currently we hard-code the assumption of leftmost-first match
1519
2
        // semantics. This isn't a huge deal because 'all' semantics tend to
1520
2
        // only be used for forward overlapping searches with multiple regexes,
1521
2
        // and this optimization only supports a single pattern at the moment.
1522
2
        if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
1523
            debug!(
1524
                "skipping reverse inner optimization because \
1525
         match kind is {:?} but this only supports leftmost-first",
1526
                core.info.config().get_match_kind(),
1527
            );
1528
0
            return Err(core);
1529
2
        }
1530
2
        // It's likely that a reverse inner scan has too much overhead for it
1531
2
        // to be worth it when the regex is anchored at the start. It is
1532
2
        // possible for it to be quite a bit faster if the initial literal
1533
2
        // scan fails to detect a match, in which case, we can say "no match"
1534
2
        // very quickly. But this could be undesirable, e.g., scanning too far
1535
2
        // or when the literal scan matches. If it matches, then confirming the
1536
2
        // match requires a reverse scan followed by a forward scan to confirm
1537
2
        // or reject, which is a fair bit of work.
1538
2
        //
1539
2
        // Note that the caller can still request an anchored search even
1540
2
        // when the regex isn't anchored. We detect that case in the search
1541
2
        // routines below and just fallback to the core engine. Currently this
1542
2
        // optimization assumes all searches are unanchored, so if we do want
1543
2
        // to enable this optimization for anchored searches, it will need a
1544
2
        // little work to support it.
1545
2
        if core.info.is_always_anchored_start() {
1546
            debug!(
1547
                "skipping reverse inner optimization because \
1548
         the regex is always anchored at the start",
1549
            );
1550
1
            return Err(core);
1551
1
        }
1552
1
        // Only DFAs can do reverse searches (currently), so we need one of
1553
1
        // them in order to do this optimization. It's possible (although
1554
1
        // pretty unlikely) that we have neither and need to give up.
1555
1
        if !core.hybrid.is_some() && 
!core.dfa.is_some()0
{
1556
            debug!(
1557
                "skipping reverse inner optimization because \
1558
         we don't have a lazy DFA or a full DFA"
1559
            );
1560
0
            return Err(core);
1561
1
        }
1562
1
        if core.pre.as_ref().map_or(false, |p| 
p.is_fast()0
) {
1563
            debug!(
1564
                "skipping reverse inner optimization because \
1565
         we already have a prefilter that we think is fast"
1566
            );
1567
0
            return Err(core);
1568
1
        } else if core.pre.is_some() {
1569
0
            debug!(
1570
0
                "core engine has a prefix prefilter, but it is \
1571
0
                 probably not fast, so continuing with attempt to \
1572
0
                 use reverse inner prefilter"
1573
0
            );
1574
1
        }
1575
1
        let (
concat_prefix, preinner0
) = match reverse_inner::extract(hirs) {
1576
0
            Some(x) => x,
1577
            // N.B. the 'extract' function emits debug messages explaining
1578
            // why we bailed out here.
1579
1
            None => return Err(core),
1580
        };
1581
        debug!("building reverse NFA for prefix before inner literal");
1582
0
        let mut lookm = LookMatcher::new();
1583
0
        lookm.set_line_terminator(core.info.config().get_line_terminator());
1584
0
        let thompson_config = thompson::Config::new()
1585
0
            .reverse(true)
1586
0
            .utf8(core.info.config().get_utf8_empty())
1587
0
            .nfa_size_limit(core.info.config().get_nfa_size_limit())
1588
0
            .shrink(false)
1589
0
            .which_captures(WhichCaptures::None)
1590
0
            .look_matcher(lookm);
1591
0
        let result = thompson::Compiler::new()
1592
0
            .configure(thompson_config)
1593
0
            .build_from_hir(&concat_prefix);
1594
0
        let nfarev = match result {
1595
0
            Ok(nfarev) => nfarev,
1596
0
            Err(_err) => {
1597
0
                debug!(
1598
0
                    "skipping reverse inner optimization because the \
1599
0
           reverse NFA failed to build: {}",
1600
0
                    _err,
1601
0
                );
1602
0
                return Err(core);
1603
            }
1604
        };
1605
        debug!("building reverse DFA for prefix before inner literal");
1606
0
        let dfa = if !core.info.config().get_dfa() {
1607
0
            wrappers::ReverseDFA::none()
1608
        } else {
1609
0
            wrappers::ReverseDFA::new(&core.info, &nfarev)
1610
        };
1611
0
        let hybrid = if !core.info.config().get_hybrid() {
1612
0
            wrappers::ReverseHybrid::none()
1613
0
        } else if dfa.is_some() {
1614
            debug!(
1615
                "skipping lazy DFA for reverse inner optimization \
1616
         because we have a full DFA"
1617
            );
1618
0
            wrappers::ReverseHybrid::none()
1619
        } else {
1620
0
            wrappers::ReverseHybrid::new(&core.info, &nfarev)
1621
        };
1622
0
        Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa })
1623
2
    }
1624
1625
    #[cfg_attr(feature = "perf-inline", inline(always))]
1626
0
    fn try_search_full(
1627
0
        &self,
1628
0
        cache: &mut Cache,
1629
0
        input: &Input<'_>,
1630
0
    ) -> Result<Option<Match>, RetryError> {
1631
0
        let mut span = input.get_span();
1632
0
        let mut min_match_start = 0;
1633
0
        let mut min_pre_start = 0;
1634
        loop {
1635
0
            let litmatch = match self.preinner.find(input.haystack(), span) {
1636
0
                None => return Ok(None),
1637
0
                Some(span) => span,
1638
0
            };
1639
0
            if litmatch.start < min_pre_start {
1640
                trace!(
1641
                    "found inner prefilter match at {:?}, which starts \
1642
           before the end of the last forward scan at {}, \
1643
           quitting to avoid quadratic behavior",
1644
                    litmatch,
1645
                    min_pre_start,
1646
                );
1647
0
                return Err(RetryError::Quadratic(RetryQuadraticError::new()));
1648
0
            }
1649
0
            trace!("reverse inner scan found inner match at {:?}", litmatch);
1650
0
            let revinput = input
1651
0
                .clone()
1652
0
                .anchored(Anchored::Yes)
1653
0
                .span(input.start()..litmatch.start);
1654
0
            // Note that in addition to the literal search above scanning past
1655
0
            // our minimum start point, this routine can also return an error
1656
0
            // as a result of detecting possible quadratic behavior if the
1657
0
            // reverse scan goes past the minimum start point. That is, the
1658
0
            // literal search might not, but the reverse regex search for the
1659
0
            // prefix might!
1660
0
            match self.try_search_half_rev_limited(
1661
0
                cache,
1662
0
                &revinput,
1663
0
                min_match_start,
1664
0
            )? {
1665
                None => {
1666
0
                    if span.start >= span.end {
1667
0
                        break;
1668
0
                    }
1669
0
                    span.start = litmatch.start.checked_add(1).unwrap();
1670
                }
1671
0
                Some(hm_start) => {
1672
0
                    let fwdinput = input
1673
0
                        .clone()
1674
0
                        .anchored(Anchored::Pattern(hm_start.pattern()))
1675
0
                        .span(hm_start.offset()..input.end());
1676
0
                    match self.try_search_half_fwd_stopat(cache, &fwdinput)? {
1677
0
                        Err(stopat) => {
1678
0
                            min_pre_start = stopat;
1679
0
                            span.start =
1680
0
                                litmatch.start.checked_add(1).unwrap();
1681
0
                        }
1682
0
                        Ok(hm_end) => {
1683
0
                            return Ok(Some(Match::new(
1684
0
                                hm_start.pattern(),
1685
0
                                hm_start.offset()..hm_end.offset(),
1686
0
                            )))
1687
                        }
1688
                    }
1689
                }
1690
            }
1691
0
            min_match_start = litmatch.end;
1692
        }
1693
0
        Ok(None)
1694
0
    }
1695
1696
    #[cfg_attr(feature = "perf-inline", inline(always))]
1697
0
    fn try_search_half_fwd_stopat(
1698
0
        &self,
1699
0
        cache: &mut Cache,
1700
0
        input: &Input<'_>,
1701
0
    ) -> Result<Result<HalfMatch, usize>, RetryFailError> {
1702
0
        if let Some(e) = self.core.dfa.get(&input) {
1703
            trace!(
1704
                "using full DFA for forward reverse inner search at {:?}",
1705
                input.get_span()
1706
            );
1707
0
            e.try_search_half_fwd_stopat(&input)
1708
0
        } else if let Some(e) = self.core.hybrid.get(&input) {
1709
            trace!(
1710
                "using lazy DFA for forward reverse inner search at {:?}",
1711
                input.get_span()
1712
            );
1713
0
            e.try_search_half_fwd_stopat(&mut cache.hybrid, &input)
1714
        } else {
1715
0
            unreachable!("ReverseInner always has a DFA")
1716
        }
1717
0
    }
1718
1719
    #[cfg_attr(feature = "perf-inline", inline(always))]
1720
0
    fn try_search_half_rev_limited(
1721
0
        &self,
1722
0
        cache: &mut Cache,
1723
0
        input: &Input<'_>,
1724
0
        min_start: usize,
1725
0
    ) -> Result<Option<HalfMatch>, RetryError> {
1726
0
        if let Some(e) = self.dfa.get(&input) {
1727
            trace!(
1728
                "using full DFA for reverse inner search at {:?}, \
1729
                 but will be stopped at {} to avoid quadratic behavior",
1730
                input.get_span(),
1731
                min_start,
1732
            );
1733
0
            e.try_search_half_rev_limited(&input, min_start)
1734
0
        } else if let Some(e) = self.hybrid.get(&input) {
1735
            trace!(
1736
                "using lazy DFA for reverse inner search at {:?}, \
1737
                 but will be stopped at {} to avoid quadratic behavior",
1738
                input.get_span(),
1739
                min_start,
1740
            );
1741
0
            e.try_search_half_rev_limited(
1742
0
                &mut cache.revhybrid,
1743
0
                &input,
1744
0
                min_start,
1745
0
            )
1746
        } else {
1747
0
            unreachable!("ReverseInner always has a DFA")
1748
        }
1749
0
    }
1750
}
1751
1752
impl Strategy for ReverseInner {
1753
    #[cfg_attr(feature = "perf-inline", inline(always))]
1754
0
    fn group_info(&self) -> &GroupInfo {
1755
0
        self.core.group_info()
1756
0
    }
1757
1758
    #[cfg_attr(feature = "perf-inline", inline(always))]
1759
0
    fn create_cache(&self) -> Cache {
1760
0
        let mut cache = self.core.create_cache();
1761
0
        cache.revhybrid = self.hybrid.create_cache();
1762
0
        cache
1763
0
    }
1764
1765
    #[cfg_attr(feature = "perf-inline", inline(always))]
1766
0
    fn reset_cache(&self, cache: &mut Cache) {
1767
0
        self.core.reset_cache(cache);
1768
0
        cache.revhybrid.reset(&self.hybrid);
1769
0
    }
1770
1771
0
    fn is_accelerated(&self) -> bool {
1772
0
        self.preinner.is_fast()
1773
0
    }
1774
1775
0
    fn memory_usage(&self) -> usize {
1776
0
        self.core.memory_usage()
1777
0
            + self.preinner.memory_usage()
1778
0
            + self.nfarev.memory_usage()
1779
0
            + self.dfa.memory_usage()
1780
0
    }
1781
1782
    #[cfg_attr(feature = "perf-inline", inline(always))]
1783
0
    fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
1784
0
        if input.get_anchored().is_anchored() {
1785
0
            return self.core.search(cache, input);
1786
0
        }
1787
0
        match self.try_search_full(cache, input) {
1788
0
            Err(RetryError::Quadratic(_err)) => {
1789
0
                trace!("reverse inner optimization failed: {}", _err);
1790
0
                self.core.search(cache, input)
1791
            }
1792
0
            Err(RetryError::Fail(_err)) => {
1793
0
                trace!("reverse inner fast search failed: {}", _err);
1794
0
                self.core.search_nofail(cache, input)
1795
            }
1796
0
            Ok(matornot) => matornot,
1797
        }
1798
0
    }
1799
1800
    #[cfg_attr(feature = "perf-inline", inline(always))]
1801
0
    fn search_half(
1802
0
        &self,
1803
0
        cache: &mut Cache,
1804
0
        input: &Input<'_>,
1805
0
    ) -> Option<HalfMatch> {
1806
0
        if input.get_anchored().is_anchored() {
1807
0
            return self.core.search_half(cache, input);
1808
0
        }
1809
0
        match self.try_search_full(cache, input) {
1810
0
            Err(RetryError::Quadratic(_err)) => {
1811
0
                trace!("reverse inner half optimization failed: {}", _err);
1812
0
                self.core.search_half(cache, input)
1813
            }
1814
0
            Err(RetryError::Fail(_err)) => {
1815
0
                trace!("reverse inner fast half search failed: {}", _err);
1816
0
                self.core.search_half_nofail(cache, input)
1817
            }
1818
0
            Ok(None) => None,
1819
0
            Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())),
1820
        }
1821
0
    }
1822
1823
    #[cfg_attr(feature = "perf-inline", inline(always))]
1824
0
    fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool {
1825
0
        if input.get_anchored().is_anchored() {
1826
0
            return self.core.is_match(cache, input);
1827
0
        }
1828
0
        match self.try_search_full(cache, input) {
1829
0
            Err(RetryError::Quadratic(_err)) => {
1830
0
                trace!("reverse inner half optimization failed: {}", _err);
1831
0
                self.core.is_match_nofail(cache, input)
1832
            }
1833
0
            Err(RetryError::Fail(_err)) => {
1834
0
                trace!("reverse inner fast half search failed: {}", _err);
1835
0
                self.core.is_match_nofail(cache, input)
1836
            }
1837
0
            Ok(None) => false,
1838
0
            Ok(Some(_)) => true,
1839
        }
1840
0
    }
1841
1842
    #[cfg_attr(feature = "perf-inline", inline(always))]
1843
0
    fn search_slots(
1844
0
        &self,
1845
0
        cache: &mut Cache,
1846
0
        input: &Input<'_>,
1847
0
        slots: &mut [Option<NonMaxUsize>],
1848
0
    ) -> Option<PatternID> {
1849
0
        if input.get_anchored().is_anchored() {
1850
0
            return self.core.search_slots(cache, input, slots);
1851
0
        }
1852
0
        if !self.core.is_capture_search_needed(slots.len()) {
1853
            trace!("asked for slots unnecessarily, trying fast path");
1854
0
            let m = self.search(cache, input)?;
1855
0
            copy_match_to_slots(m, slots);
1856
0
            return Some(m.pattern());
1857
0
        }
1858
0
        let m = match self.try_search_full(cache, input) {
1859
0
            Err(RetryError::Quadratic(_err)) => {
1860
0
                trace!("reverse inner captures optimization failed: {}", _err);
1861
0
                return self.core.search_slots(cache, input, slots);
1862
            }
1863
0
            Err(RetryError::Fail(_err)) => {
1864
0
                trace!("reverse inner fast captures search failed: {}", _err);
1865
0
                return self.core.search_slots_nofail(cache, input, slots);
1866
            }
1867
0
            Ok(None) => return None,
1868
0
            Ok(Some(m)) => m,
1869
0
        };
1870
0
        trace!(
1871
0
            "match found at {}..{} in capture search, \
1872
0
         using another engine to find captures",
1873
0
            m.start(),
1874
0
            m.end(),
1875
0
        );
1876
0
        let input = input
1877
0
            .clone()
1878
0
            .span(m.start()..m.end())
1879
0
            .anchored(Anchored::Pattern(m.pattern()));
1880
0
        self.core.search_slots_nofail(cache, &input, slots)
1881
0
    }
1882
1883
    #[cfg_attr(feature = "perf-inline", inline(always))]
1884
0
    fn which_overlapping_matches(
1885
0
        &self,
1886
0
        cache: &mut Cache,
1887
0
        input: &Input<'_>,
1888
0
        patset: &mut PatternSet,
1889
0
    ) {
1890
0
        self.core.which_overlapping_matches(cache, input, patset)
1891
0
    }
1892
}
1893
1894
/// Copies the offsets in the given match to the corresponding positions in
1895
/// `slots`.
1896
///
1897
/// In effect, this sets the slots corresponding to the implicit group for the
1898
/// pattern in the given match. If the indices for the corresponding slots do
1899
/// not exist, then no slots are set.
1900
///
1901
/// This is useful when the caller provides slots (or captures), but you use a
1902
/// regex engine that doesn't operate on slots (like a lazy DFA). This function
1903
/// lets you map the match you get back to the slots provided by the caller.
1904
#[cfg_attr(feature = "perf-inline", inline(always))]
1905
0
fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
1906
0
    let slot_start = m.pattern().as_usize() * 2;
1907
0
    let slot_end = slot_start + 1;
1908
0
    if let Some(slot) = slots.get_mut(slot_start) {
1909
0
        *slot = NonMaxUsize::new(m.start());
1910
0
    }
1911
0
    if let Some(slot) = slots.get_mut(slot_end) {
1912
0
        *slot = NonMaxUsize::new(m.end());
1913
0
    }
1914
0
}