vendor/regex-automata-0.3.9/src/nfa/thompson/pikevm.rs - toolchain/rustc - Git at Google

 /*!
 An NFA backed Pike VM for executing regex searches with capturing groups.

 This module provides a [`PikeVM`] that works by simulating an NFA and
 resolving all spans of capturing groups that participate in a match.
 */

 #[cfg(feature = "internal-instrument-pikevm")]
 use core::cell::RefCell;

 use alloc::{vec, vec::Vec};

 use crate::{
     nfa::thompson::{self, BuildError, State, NFA},
     util::{
         captures::Captures,
         empty, iter,
         prefilter::Prefilter,
         primitives::{NonMaxUsize, PatternID, SmallIndex, StateID},
         search::{
             Anchored, HalfMatch, Input, Match, MatchKind, PatternSet, Span,
         },
         sparse_set::SparseSet,
     },
 };

 /// A simple macro for conditionally executing instrumentation logic when
 /// the 'trace' log level is enabled. This is a compile-time no-op when the
 /// 'internal-instrument-pikevm' feature isn't enabled. The intent here is that
 /// this makes it easier to avoid doing extra work when instrumentation isn't
 /// enabled.
 ///
 /// This macro accepts a closure of type `|&mut Counters|`. The closure can
 /// then increment counters (or whatever) in accordance with what one wants
 /// to track.
 macro_rules! instrument {
     ($fun:expr) => {
         #[cfg(feature = "internal-instrument-pikevm")]
         {
             let fun: &mut dyn FnMut(&mut Counters) = &mut $fun;
             COUNTERS.with(|c: &RefCell<Counters>| fun(&mut *c.borrow_mut()));
         }
     };
 }

 #[cfg(feature = "internal-instrument-pikevm")]
 std::thread_local! {
     /// Effectively global state used to keep track of instrumentation
     /// counters. The "proper" way to do this is to thread it through the
     /// PikeVM, but it makes the code quite icky. Since this is just a
     /// debugging feature, we're content to relegate it to thread local
     /// state. When instrumentation is enabled, the counters are reset at the
     /// beginning of every search and printed (with the 'trace' log level) at
     /// the end of every search.
     static COUNTERS: RefCell<Counters> = RefCell::new(Counters::empty());
 }

 /// The configuration used for building a [`PikeVM`].
 ///
 /// A PikeVM configuration is a simple data object that is typically used with
 /// [`Builder::configure`]. It can be cheaply cloned.
 ///
 /// A default configuration can be created either with `Config::new`, or
 /// perhaps more conveniently, with [`PikeVM::config`].
 #[derive(Clone, Debug, Default)]
 pub struct Config {
     match_kind: Option<MatchKind>,
     pre: Option<Option<Prefilter>>,
 }

 impl Config {
     /// Return a new default PikeVM configuration.
     pub fn new() -> Config {
         Config::default()
     }

     /// Set the desired match semantics.
     ///
     /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the
     /// match semantics of Perl-like regex engines. That is, when multiple
     /// patterns would match at the same leftmost position, the pattern that
     /// appears first in the concrete syntax is chosen.
     ///
     /// Currently, the only other kind of match semantics supported is
     /// [`MatchKind::All`]. This corresponds to "classical DFA" construction
     /// where all possible matches are visited in the NFA by the `PikeVM`.
     ///
     /// Typically, `All` is used when one wants to execute an overlapping
     /// search and `LeftmostFirst` otherwise. In particular, it rarely makes
     /// sense to use `All` with the various "leftmost" find routines, since the
     /// leftmost routines depend on the `LeftmostFirst` automata construction
     /// strategy. Specifically, `LeftmostFirst` results in the `PikeVM`
     /// simulating dead states as a way to terminate the search and report a
     /// match. `LeftmostFirst` also supports non-greedy matches using this
     /// strategy where as `All` does not.
     pub fn match_kind(mut self, kind: MatchKind) -> Config {
         self.match_kind = Some(kind);
         self
     }

     /// Set a prefilter to be used whenever a start state is entered.
     ///
     /// A [`Prefilter`] in this context is meant to accelerate searches by
     /// looking for literal prefixes that every match for the corresponding
     /// pattern (or patterns) must start with. Once a prefilter produces a
     /// match, the underlying search routine continues on to try and confirm
     /// the match.
     ///
     /// Be warned that setting a prefilter does not guarantee that the search
     /// will be faster. While it's usually a good bet, if the prefilter
     /// produces a lot of false positive candidates (i.e., positions matched
     /// by the prefilter but not by the regex), then the overall result can
     /// be slower than if you had just executed the regex engine without any
     /// prefilters.
     ///
     /// By default no prefilter is set.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{
     ///     nfa::thompson::pikevm::PikeVM,
     ///     util::prefilter::Prefilter,
     ///     Input, Match, MatchKind,
     /// };
     ///
     /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]);
     /// let re = PikeVM::builder()
     ///     .configure(PikeVM::config().prefilter(pre))
     ///     .build(r"(foo|bar)[a-z]+")?;
     /// let mut cache = re.create_cache();
     /// let input = Input::new("foo1 barfox bar");
     /// assert_eq!(Some(Match::must(0, 5..11)), re.find(&mut cache, input));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// Be warned though that an incorrect prefilter can lead to incorrect
     /// results!
     ///
     /// ```
     /// use regex_automata::{
     ///     nfa::thompson::pikevm::PikeVM,
     ///     util::prefilter::Prefilter,
     ///     Input, HalfMatch, MatchKind,
     /// };
     ///
     /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]);
     /// let re = PikeVM::builder()
     ///     .configure(PikeVM::config().prefilter(pre))
     ///     .build(r"(foo|bar)[a-z]+")?;
     /// let mut cache = re.create_cache();
     /// let input = Input::new("foo1 barfox bar");
     /// // No match reported even though there clearly is one!
     /// assert_eq!(None, re.find(&mut cache, input));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn prefilter(mut self, pre: Option<Prefilter>) -> Config {
         self.pre = Some(pre);
         self
     }

     /// Returns the match semantics set in this configuration.
     pub fn get_match_kind(&self) -> MatchKind {
         self.match_kind.unwrap_or(MatchKind::LeftmostFirst)
     }

     /// Returns the prefilter set in this configuration, if one at all.
     pub fn get_prefilter(&self) -> Option<&Prefilter> {
         self.pre.as_ref().unwrap_or(&None).as_ref()
     }

     /// Overwrite the default configuration such that the options in `o` are
     /// always used. If an option in `o` is not set, then the corresponding
     /// option in `self` is used. If it's not set in `self` either, then it
     /// remains not set.
     pub(crate) fn overwrite(&self, o: Config) -> Config {
         Config {
             match_kind: o.match_kind.or(self.match_kind),
             pre: o.pre.or_else(|| self.pre.clone()),
         }
     }
 }

 /// A builder for a `PikeVM`.
 ///
 /// This builder permits configuring options for the syntax of a pattern,
 /// the NFA construction and the `PikeVM` construction. This builder is
 /// different from a general purpose regex builder in that it permits fine
 /// grain configuration of the construction process. The trade off for this is
 /// complexity, and the possibility of setting a configuration that might not
 /// make sense. For example, there are two different UTF-8 modes:
 ///
 /// * [`util::syntax::Config::utf8`](crate::util::syntax::Config::utf8)
 /// controls whether the pattern itself can contain sub-expressions that match
 /// invalid UTF-8.
 /// * [`thompson::Config::utf8`] controls whether empty matches that split a
 /// Unicode codepoint are reported or not.
 ///
 /// Generally speaking, callers will want to either enable all of these or
 /// disable all of these.
 ///
 /// # Example
 ///
 /// This example shows how to disable UTF-8 mode in the syntax and the regex
 /// itself. This is generally what you want for matching on arbitrary bytes.
 ///
 /// ```
 /// use regex_automata::{
 ///     nfa::thompson::{self, pikevm::PikeVM},
 ///     util::syntax,
 ///     Match,
 /// };
 ///
 /// let re = PikeVM::builder()
 ///     .syntax(syntax::Config::new().utf8(false))
 ///     .thompson(thompson::Config::new().utf8(false))
 ///     .build(r"foo(?-u:[^b])ar.*")?;
 /// let mut cache = re.create_cache();
 ///
 /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
 /// let expected = Some(Match::must(0, 1..9));
 /// let got = re.find_iter(&mut cache, haystack).next();
 /// assert_eq!(expected, got);
 /// // Notice that `(?-u:[^b])` matches invalid UTF-8,
 /// // but the subsequent `.*` does not! Disabling UTF-8
 /// // on the syntax permits this.
 /// //
 /// // N.B. This example does not show the impact of
 /// // disabling UTF-8 mode on a PikeVM Config, since that
 /// // only impacts regexes that can produce matches of
 /// // length 0.
 /// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]);
 ///
 /// # Ok::<(), Box<dyn std::error::Error>>(())
 /// ```
 #[derive(Clone, Debug)]
 pub struct Builder {
     config: Config,
     #[cfg(feature = "syntax")]
     thompson: thompson::Compiler,
 }

 impl Builder {
     /// Create a new PikeVM builder with its default configuration.
     pub fn new() -> Builder {
         Builder {
             config: Config::default(),
             #[cfg(feature = "syntax")]
             thompson: thompson::Compiler::new(),
         }
     }

     /// Build a `PikeVM` from the given pattern.
     ///
     /// If there was a problem parsing or compiling the pattern, then an error
     /// is returned.
     #[cfg(feature = "syntax")]
     pub fn build(&self, pattern: &str) -> Result<PikeVM, BuildError> {
         self.build_many(&[pattern])
     }

     /// Build a `PikeVM` from the given patterns.
     #[cfg(feature = "syntax")]
     pub fn build_many<P: AsRef<str>>(
         &self,
         patterns: &[P],
     ) -> Result<PikeVM, BuildError> {
         let nfa = self.thompson.build_many(patterns)?;
         self.build_from_nfa(nfa)
     }

     /// Build a `PikeVM` directly from its NFA.
     ///
     /// Note that when using this method, any configuration that applies to the
     /// construction of the NFA itself will of course be ignored, since the NFA
     /// given here is already built.
     pub fn build_from_nfa(&self, nfa: NFA) -> Result<PikeVM, BuildError> {
         nfa.look_set_any().available().map_err(BuildError::word)?;
         Ok(PikeVM { config: self.config.clone(), nfa })
     }

     /// Apply the given `PikeVM` configuration options to this builder.
     pub fn configure(&mut self, config: Config) -> &mut Builder {
         self.config = self.config.overwrite(config);
         self
     }

     /// Set the syntax configuration for this builder using
     /// [`syntax::Config`](crate::util::syntax::Config).
     ///
     /// This permits setting things like case insensitivity, Unicode and multi
     /// line mode.
     ///
     /// These settings only apply when constructing a PikeVM directly from a
     /// pattern.
     #[cfg(feature = "syntax")]
     pub fn syntax(
         &mut self,
         config: crate::util::syntax::Config,
     ) -> &mut Builder {
         self.thompson.syntax(config);
         self
     }

     /// Set the Thompson NFA configuration for this builder using
     /// [`nfa::thompson::Config`](crate::nfa::thompson::Config).
     ///
     /// This permits setting things like if additional time should be spent
     /// shrinking the size of the NFA.
     ///
     /// These settings only apply when constructing a PikeVM directly from a
     /// pattern.
     #[cfg(feature = "syntax")]
     pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder {
         self.thompson.configure(config);
         self
     }
 }

 /// A virtual machine for executing regex searches with capturing groups.
 ///
 /// # Infallible APIs
 ///
 /// Unlike most other regex engines in this crate, a `PikeVM` never returns an
 /// error at search time. It supports all [`Anchored`] configurations, never
 /// quits and works on haystacks of arbitrary length.
 ///
 /// There are two caveats to mention though:
 ///
 /// * If an invalid pattern ID is given to a search via [`Anchored::Pattern`],
 /// then the PikeVM will report "no match." This is consistent with all other
 /// regex engines in this crate.
 /// * When using [`PikeVM::which_overlapping_matches`] with a [`PatternSet`]
 /// that has insufficient capacity to store all valid pattern IDs, then if a
 /// match occurs for a `PatternID` that cannot be inserted, it is silently
 /// dropped as if it did not match.
 ///
 /// # Advice
 ///
 /// The `PikeVM` is generally the most "powerful" regex engine in this crate.
 /// "Powerful" in this context means that it can handle any regular expression
 /// that is parseable by `regex-syntax` and any size haystack. Regretably,
 /// the `PikeVM` is also simultaneously often the _slowest_ regex engine in
 /// practice. This results in an annoying situation where one generally tries
 /// to pick any other regex engine (or perhaps none at all) before being
 /// forced to fall back to a `PikeVM`.
 ///
 /// For example, a common strategy for dealing with capturing groups is to
 /// actually look for the overall match of the regex using a faster regex
 /// engine, like a [lazy DFA](crate::hybrid::regex::Regex). Once the overall
 /// match is found, one can then run the `PikeVM` on just the match span to
 /// find the spans of the capturing groups. In this way, the faster regex
 /// engine does the majority of the work, while the `PikeVM` only lends its
 /// power in a more limited role.
 ///
 /// Unfortunately, this isn't always possible because the faster regex engines
 /// don't support all of the regex features in `regex-syntax`. This notably
 /// includes (and is currently limited to) Unicode word boundaries. So if
 /// your pattern has Unicode word boundaries, you typically can't use a
 /// DFA-based regex engine at all (unless you [enable heuristic support for
 /// it](crate::hybrid::dfa::Config::unicode_word_boundary)). (The [one-pass
 /// DFA](crate::dfa::onepass::DFA) can handle Unicode word boundaries for
 /// anchored searches only, but in a cruel sort of joke, many Unicode features
 /// tend to result in making the regex _not_ one-pass.)
 ///
 /// # Example
 ///
 /// This example shows that the `PikeVM` implements Unicode word boundaries
 /// correctly by default.
 ///
 /// ```
 /// # if cfg!(miri) { return Ok(()); } // miri takes too long
 /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
 ///
 /// let re = PikeVM::new(r"\b\w+\b")?;
 /// let mut cache = re.create_cache();
 ///
 /// let mut it = re.find_iter(&mut cache, "Шерлок Холмс");
 /// assert_eq!(Some(Match::must(0, 0..12)), it.next());
 /// assert_eq!(Some(Match::must(0, 13..23)), it.next());
 /// assert_eq!(None, it.next());
 /// # Ok::<(), Box<dyn std::error::Error>>(())
 /// ```
 #[derive(Clone, Debug)]
 pub struct PikeVM {
     config: Config,
     nfa: NFA,
 }

 impl PikeVM {
     /// Parse the given regular expression using the default configuration and
     /// return the corresponding `PikeVM`.
     ///
     /// If you want a non-default configuration, then use the [`Builder`] to
     /// set your own configuration.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re = PikeVM::new("foo[0-9]+bar")?;
     /// let mut cache = re.create_cache();
     /// assert_eq!(
     ///     Some(Match::must(0, 3..14)),
     ///     re.find_iter(&mut cache, "zzzfoo12345barzzz").next(),
     /// );
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[cfg(feature = "syntax")]
     pub fn new(pattern: &str) -> Result<PikeVM, BuildError> {
         PikeVM::builder().build(pattern)
     }

     /// Like `new`, but parses multiple patterns into a single "multi regex."
     /// This similarly uses the default regex configuration.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re = PikeVM::new_many(&["[a-z]+", "[0-9]+"])?;
     /// let mut cache = re.create_cache();
     ///
     /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux");
     /// assert_eq!(Some(Match::must(0, 0..3)), it.next());
     /// assert_eq!(Some(Match::must(1, 4..5)), it.next());
     /// assert_eq!(Some(Match::must(0, 6..9)), it.next());
     /// assert_eq!(Some(Match::must(1, 10..14)), it.next());
     /// assert_eq!(Some(Match::must(1, 15..16)), it.next());
     /// assert_eq!(Some(Match::must(0, 17..21)), it.next());
     /// assert_eq!(None, it.next());
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[cfg(feature = "syntax")]
     pub fn new_many<P: AsRef<str>>(
         patterns: &[P],
     ) -> Result<PikeVM, BuildError> {
         PikeVM::builder().build_many(patterns)
     }

     /// Like `new`, but builds a PikeVM directly from an NFA. This is useful
     /// if you already have an NFA, or even if you hand-assembled the NFA.
     ///
     /// # Example
     ///
     /// This shows how to hand assemble a regular expression via its HIR,
     /// compile an NFA from it and build a PikeVM from the NFA.
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match};
     /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange};
     ///
     /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![
     ///     ClassBytesRange::new(b'0', b'9'),
     ///     ClassBytesRange::new(b'A', b'Z'),
     ///     ClassBytesRange::new(b'_', b'_'),
     ///     ClassBytesRange::new(b'a', b'z'),
     /// ])));
     ///
     /// let config = NFA::config().nfa_size_limit(Some(1_000));
     /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?;
     ///
     /// let re = PikeVM::new_from_nfa(nfa)?;
     /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
     /// let expected = Some(Match::must(0, 3..4));
     /// re.captures(&mut cache, "!@#A#@!", &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn new_from_nfa(nfa: NFA) -> Result<PikeVM, BuildError> {
         PikeVM::builder().build_from_nfa(nfa)
     }

     /// Create a new `PikeVM` that matches every input.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re = PikeVM::always_match()?;
     /// let mut cache = re.create_cache();
     ///
     /// let expected = Match::must(0, 0..0);
     /// assert_eq!(Some(expected), re.find_iter(&mut cache, "").next());
     /// assert_eq!(Some(expected), re.find_iter(&mut cache, "foo").next());
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn always_match() -> Result<PikeVM, BuildError> {
         let nfa = thompson::NFA::always_match();
         PikeVM::new_from_nfa(nfa)
     }

     /// Create a new `PikeVM` that never matches any input.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::nfa::thompson::pikevm::PikeVM;
     ///
     /// let re = PikeVM::never_match()?;
     /// let mut cache = re.create_cache();
     ///
     /// assert_eq!(None, re.find_iter(&mut cache, "").next());
     /// assert_eq!(None, re.find_iter(&mut cache, "foo").next());
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn never_match() -> Result<PikeVM, BuildError> {
         let nfa = thompson::NFA::never_match();
         PikeVM::new_from_nfa(nfa)
     }

     /// Return a default configuration for a `PikeVM`.
     ///
     /// This is a convenience routine to avoid needing to import the `Config`
     /// type when customizing the construction of a `PikeVM`.
     ///
     /// # Example
     ///
     /// This example shows how to disable UTF-8 mode. When UTF-8 mode is
     /// disabled, zero-width matches that split a codepoint are allowed.
     /// Otherwise they are never reported.
     ///
     /// In the code below, notice that `""` is permitted to match positions
     /// that split the encoding of a codepoint.
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::{self, pikevm::PikeVM}, Match};
     ///
     /// let re = PikeVM::builder()
     ///     .thompson(thompson::Config::new().utf8(false))
     ///     .build(r"")?;
     /// let mut cache = re.create_cache();
     ///
     /// let haystack = "a☃z";
     /// let mut it = re.find_iter(&mut cache, haystack);
     /// assert_eq!(Some(Match::must(0, 0..0)), it.next());
     /// assert_eq!(Some(Match::must(0, 1..1)), it.next());
     /// assert_eq!(Some(Match::must(0, 2..2)), it.next());
     /// assert_eq!(Some(Match::must(0, 3..3)), it.next());
     /// assert_eq!(Some(Match::must(0, 4..4)), it.next());
     /// assert_eq!(Some(Match::must(0, 5..5)), it.next());
     /// assert_eq!(None, it.next());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn config() -> Config {
         Config::new()
     }

     /// Return a builder for configuring the construction of a `PikeVM`.
     ///
     /// This is a convenience routine to avoid needing to import the
     /// [`Builder`] type in common cases.
     ///
     /// # Example
     ///
     /// This example shows how to use the builder to disable UTF-8 mode
     /// everywhere.
     ///
     /// ```
     /// use regex_automata::{
     ///     nfa::thompson::{self, pikevm::PikeVM},
     ///     util::syntax,
     ///     Match,
     /// };
     ///
     /// let re = PikeVM::builder()
     ///     .syntax(syntax::Config::new().utf8(false))
     ///     .thompson(thompson::Config::new().utf8(false))
     ///     .build(r"foo(?-u:[^b])ar.*")?;
     /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
     ///
     /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n";
     /// let expected = Some(Match::must(0, 1..9));
     /// re.captures(&mut cache, haystack, &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn builder() -> Builder {
         Builder::new()
     }

     /// Create a new empty set of capturing groups that is guaranteed to be
     /// valid for the search APIs on this `PikeVM`.
     ///
     /// A `Captures` value created for a specific `PikeVM` cannot be used with
     /// any other `PikeVM`.
     ///
     /// This is a convenience function for [`Captures::all`]. See the
     /// [`Captures`] documentation for an explanation of its alternative
     /// constructors that permit the `PikeVM` to do less work during a search,
     /// and thus might make it faster.
     pub fn create_captures(&self) -> Captures {
         Captures::all(self.get_nfa().group_info().clone())
     }

     /// Create a new cache for this `PikeVM`.
     ///
     /// The cache returned should only be used for searches for this
     /// `PikeVM`. If you want to reuse the cache for another `PikeVM`, then
     /// you must call [`Cache::reset`] with that `PikeVM` (or, equivalently,
     /// [`PikeVM::reset_cache`]).
     pub fn create_cache(&self) -> Cache {
         Cache::new(self)
     }

     /// Reset the given cache such that it can be used for searching with the
     /// this `PikeVM` (and only this `PikeVM`).
     ///
     /// A cache reset permits reusing memory already allocated in this cache
     /// with a different `PikeVM`.
     ///
     /// # Example
     ///
     /// This shows how to re-purpose a cache for use with a different `PikeVM`.
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re1 = PikeVM::new(r"\w")?;
     /// let re2 = PikeVM::new(r"\W")?;
     ///
     /// let mut cache = re1.create_cache();
     /// assert_eq!(
     ///     Some(Match::must(0, 0..2)),
     ///     re1.find_iter(&mut cache, "Δ").next(),
     /// );
     ///
     /// // Using 'cache' with re2 is not allowed. It may result in panics or
     /// // incorrect results. In order to re-purpose the cache, we must reset
     /// // it with the PikeVM we'd like to use it with.
     /// //
     /// // Similarly, after this reset, using the cache with 're1' is also not
     /// // allowed.
     /// re2.reset_cache(&mut cache);
     /// assert_eq!(
     ///     Some(Match::must(0, 0..3)),
     ///     re2.find_iter(&mut cache, "☃").next(),
     /// );
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn reset_cache(&self, cache: &mut Cache) {
         cache.reset(self);
     }

     /// Returns the total number of patterns compiled into this `PikeVM`.
     ///
     /// In the case of a `PikeVM` that contains no patterns, this returns `0`.
     ///
     /// # Example
     ///
     /// This example shows the pattern length for a `PikeVM` that never
     /// matches:
     ///
     /// ```
     /// use regex_automata::nfa::thompson::pikevm::PikeVM;
     ///
     /// let re = PikeVM::never_match()?;
     /// assert_eq!(re.pattern_len(), 0);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// And another example for a `PikeVM` that matches at every position:
     ///
     /// ```
     /// use regex_automata::nfa::thompson::pikevm::PikeVM;
     ///
     /// let re = PikeVM::always_match()?;
     /// assert_eq!(re.pattern_len(), 1);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// And finally, a `PikeVM` that was constructed from multiple patterns:
     ///
     /// ```
     /// use regex_automata::nfa::thompson::pikevm::PikeVM;
     ///
     /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?;
     /// assert_eq!(re.pattern_len(), 3);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn pattern_len(&self) -> usize {
         self.nfa.pattern_len()
     }

     /// Return the config for this `PikeVM`.
     #[inline]
     pub fn get_config(&self) -> &Config {
         &self.config
     }

     /// Returns a reference to the underlying NFA.
     #[inline]
     pub fn get_nfa(&self) -> &NFA {
         &self.nfa
     }
 }

 impl PikeVM {
     /// Returns true if and only if this `PikeVM` matches the given haystack.
     ///
     /// This routine may short circuit if it knows that scanning future
     /// input will never lead to a different result. In particular, if the
     /// underlying NFA enters a match state, then this routine will return
     /// `true` immediately without inspecting any future input. (Consider how
     /// this might make a difference given the regex `a+` on the haystack
     /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`,
     /// but routines like `find` need to continue searching because `+` is
     /// greedy by default.)
     ///
     /// # Example
     ///
     /// This shows basic usage:
     ///
     /// ```
     /// use regex_automata::nfa::thompson::pikevm::PikeVM;
     ///
     /// let re = PikeVM::new("foo[0-9]+bar")?;
     /// let mut cache = re.create_cache();
     ///
     /// assert!(re.is_match(&mut cache, "foo12345bar"));
     /// assert!(!re.is_match(&mut cache, "foobar"));
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// # Example: consistency with search APIs
     ///
     /// `is_match` is guaranteed to return `true` whenever `find` returns a
     /// match. This includes searches that are executed entirely within a
     /// codepoint:
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Input};
     ///
     /// let re = PikeVM::new("a*")?;
     /// let mut cache = re.create_cache();
     ///
     /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2)));
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// Notice that when UTF-8 mode is disabled, then the above reports a
     /// match because the restriction against zero-width matches that split a
     /// codepoint has been lifted:
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::{pikevm::PikeVM, NFA}, Input};
     ///
     /// let re = PikeVM::builder()
     ///     .thompson(NFA::config().utf8(false))
     ///     .build("a*")?;
     /// let mut cache = re.create_cache();
     ///
     /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2)));
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn is_match<'h, I: Into<Input<'h>>>(
         &self,
         cache: &mut Cache,
         input: I,
     ) -> bool {
         let input = input.into().earliest(true);
         self.search_slots(cache, &input, &mut []).is_some()
     }

     /// Executes a leftmost forward search and returns a `Match` if one exists.
     ///
     /// This routine only includes the overall match span. To get access to the
     /// individual spans of each capturing group, use [`PikeVM::captures`].
     ///
     /// # Example
     ///
     /// Leftmost first match semantics corresponds to the match with the
     /// smallest starting offset, but where the end offset is determined by
     /// preferring earlier branches in the original regular expression. For
     /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam`
     /// will match `Samwise` in `Samwise`.
     ///
     /// Generally speaking, the "leftmost first" match is how most backtracking
     /// regular expressions tend to work. This is in contrast to POSIX-style
     /// regular expressions that yield "leftmost longest" matches. Namely,
     /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using
     /// leftmost longest semantics. (This crate does not currently support
     /// leftmost longest semantics.)
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re = PikeVM::new("foo[0-9]+")?;
     /// let mut cache = re.create_cache();
     /// let expected = Match::must(0, 0..8);
     /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345"));
     ///
     /// // Even though a match is found after reading the first byte (`a`),
     /// // the leftmost first match semantics demand that we find the earliest
     /// // match that prefers earlier parts of the pattern over later parts.
     /// let re = PikeVM::new("abc|a")?;
     /// let mut cache = re.create_cache();
     /// let expected = Match::must(0, 0..3);
     /// assert_eq!(Some(expected), re.find(&mut cache, "abc"));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn find<'h, I: Into<Input<'h>>>(
         &self,
         cache: &mut Cache,
         input: I,
     ) -> Option<Match> {
         let input = input.into();
         if self.get_nfa().pattern_len() == 1 {
             let mut slots = [None, None];
             let pid = self.search_slots(cache, &input, &mut slots)?;
             let start = slots[0]?.get();
             let end = slots[1]?.get();
             return Some(Match::new(pid, Span { start, end }));
         }
         let ginfo = self.get_nfa().group_info();
         let slots_len = ginfo.implicit_slot_len();
         let mut slots = vec![None; slots_len];
         let pid = self.search_slots(cache, &input, &mut slots)?;
         let start = slots[pid.as_usize() * 2]?.get();
         let end = slots[pid.as_usize() * 2 + 1]?.get();
         Some(Match::new(pid, Span { start, end }))
     }

     /// Executes a leftmost forward search and writes the spans of capturing
     /// groups that participated in a match into the provided [`Captures`]
     /// value. If no match was found, then [`Captures::is_match`] is guaranteed
     /// to return `false`.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
     ///
     /// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?;
     /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
     ///
     /// re.captures(&mut cache, "2010-03-14", &mut caps);
     /// assert!(caps.is_match());
     /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1));
     /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2));
     /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn captures<'h, I: Into<Input<'h>>>(
         &self,
         cache: &mut Cache,
         input: I,
         caps: &mut Captures,
     ) {
         self.search(cache, &input.into(), caps)
     }

     /// Returns an iterator over all non-overlapping leftmost matches in the
     /// given bytes. If no match exists, then the iterator yields no elements.
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re = PikeVM::new("foo[0-9]+")?;
     /// let mut cache = re.create_cache();
     ///
     /// let text = "foo1 foo12 foo123";
     /// let matches: Vec<Match> = re.find_iter(&mut cache, text).collect();
     /// assert_eq!(matches, vec![
     ///     Match::must(0, 0..4),
     ///     Match::must(0, 5..10),
     ///     Match::must(0, 11..17),
     /// ]);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn find_iter<'r, 'c, 'h, I: Into<Input<'h>>>(
         &'r self,
         cache: &'c mut Cache,
         input: I,
     ) -> FindMatches<'r, 'c, 'h> {
         let caps = Captures::matches(self.get_nfa().group_info().clone());
         let it = iter::Searcher::new(input.into());
         FindMatches { re: self, cache, caps, it }
     }

     /// Returns an iterator over all non-overlapping `Captures` values. If no
     /// match exists, then the iterator yields no elements.
     ///
     /// This yields the same matches as [`PikeVM::find_iter`], but it includes
     /// the spans of all capturing groups that participate in each match.
     ///
     /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for
     /// how to correctly iterate over all matches in a haystack while avoiding
     /// the creation of a new `Captures` value for every match. (Which you are
     /// forced to do with an `Iterator`.)
     ///
     /// # Example
     ///
     /// ```
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span};
     ///
     /// let re = PikeVM::new("foo(?P<numbers>[0-9]+)")?;
     /// let mut cache = re.create_cache();
     ///
     /// let text = "foo1 foo12 foo123";
     /// let matches: Vec<Span> = re
     ///     .captures_iter(&mut cache, text)
     ///     // The unwrap is OK since 'numbers' matches if the pattern matches.
     ///     .map(|caps| caps.get_group_by_name("numbers").unwrap())
     ///     .collect();
     /// assert_eq!(matches, vec![
     ///     Span::from(3..4),
     ///     Span::from(8..10),
     ///     Span::from(14..17),
     /// ]);
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn captures_iter<'r, 'c, 'h, I: Into<Input<'h>>>(
         &'r self,
         cache: &'c mut Cache,
         input: I,
     ) -> CapturesMatches<'r, 'c, 'h> {
         let caps = self.create_captures();
         let it = iter::Searcher::new(input.into());
         CapturesMatches { re: self, cache, caps, it }
     }
 }

 impl PikeVM {
     /// Executes a leftmost forward search and writes the spans of capturing
     /// groups that participated in a match into the provided [`Captures`]
     /// value. If no match was found, then [`Captures::is_match`] is guaranteed
     /// to return `false`.
     ///
     /// This is like [`PikeVM::captures`], but it accepts a concrete `&Input`
     /// instead of an `Into<Input>`.
     ///
     /// # Example: specific pattern search
     ///
     /// This example shows how to build a multi-PikeVM that permits searching
     /// for specific patterns.
     ///
     /// ```
     /// use regex_automata::{
     ///     nfa::thompson::pikevm::PikeVM,
     ///     Anchored, Match, PatternID, Input,
     /// };
     ///
     /// let re = PikeVM::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?;
     /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
     /// let haystack = "foo123";
     ///
     /// // Since we are using the default leftmost-first match and both
     /// // patterns match at the same starting position, only the first pattern
     /// // will be returned in this case when doing a search for any of the
     /// // patterns.
     /// let expected = Some(Match::must(0, 0..6));
     /// re.search(&mut cache, &Input::new(haystack), &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// // But if we want to check whether some other pattern matches, then we
     /// // can provide its pattern ID.
     /// let expected = Some(Match::must(1, 0..6));
     /// let input = Input::new(haystack)
     ///     .anchored(Anchored::Pattern(PatternID::must(1)));
     /// re.search(&mut cache, &input, &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     ///
     /// # Example: specifying the bounds of a search
     ///
     /// This example shows how providing the bounds of a search can produce
     /// different results than simply sub-slicing the haystack.
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input};
     ///
     /// let re = PikeVM::new(r"\b[0-9]{3}\b")?;
     /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures());
     /// let haystack = "foo123bar";
     ///
     /// // Since we sub-slice the haystack, the search doesn't know about
     /// // the larger context and assumes that `123` is surrounded by word
     /// // boundaries. And of course, the match position is reported relative
     /// // to the sub-slice as well, which means we get `0..3` instead of
     /// // `3..6`.
     /// let expected = Some(Match::must(0, 0..3));
     /// re.search(&mut cache, &Input::new(&haystack[3..6]), &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// // But if we provide the bounds of the search within the context of the
     /// // entire haystack, then the search can take the surrounding context
     /// // into account. (And if we did find a match, it would be reported
     /// // as a valid offset into `haystack` instead of its sub-slice.)
     /// let expected = None;
     /// let input = Input::new(haystack).range(3..6);
     /// re.search(&mut cache, &input, &mut caps);
     /// assert_eq!(expected, caps.get_match());
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn search(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         caps: &mut Captures,
     ) {
         caps.set_pattern(None);
         let pid = self.search_slots(cache, input, caps.slots_mut());
         caps.set_pattern(pid);
     }

     /// Executes a leftmost forward search and writes the spans of capturing
     /// groups that participated in a match into the provided `slots`, and
     /// returns the matching pattern ID. The contents of the slots for patterns
     /// other than the matching pattern are unspecified. If no match was found,
     /// then `None` is returned and the contents of `slots` is unspecified.
     ///
     /// This is like [`PikeVM::search`], but it accepts a raw slots slice
     /// instead of a `Captures` value. This is useful in contexts where you
     /// don't want or need to allocate a `Captures`.
     ///
     /// It is legal to pass _any_ number of slots to this routine. If the regex
     /// engine would otherwise write a slot offset that doesn't fit in the
     /// provided slice, then it is simply skipped. In general though, there are
     /// usually three slice lengths you might want to use:
     ///
     /// * An empty slice, if you only care about which pattern matched.
     /// * A slice with
     /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len)
     /// slots, if you only care about the overall match spans for each matching
     /// pattern.
     /// * A slice with
     /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which
     /// permits recording match offsets for every capturing group in every
     /// pattern.
     ///
     /// # Example
     ///
     /// This example shows how to find the overall match offsets in a
     /// multi-pattern search without allocating a `Captures` value. Indeed, we
     /// can put our slots right on the stack.
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID, Input};
     ///
     /// let re = PikeVM::new_many(&[
     ///     r"\pL+",
     ///     r"\d+",
     /// ])?;
     /// let mut cache = re.create_cache();
     /// let input = Input::new("!@#123");
     ///
     /// // We only care about the overall match offsets here, so we just
     /// // allocate two slots for each pattern. Each slot records the start
     /// // and end of the match.
     /// let mut slots = [None; 4];
     /// let pid = re.search_slots(&mut cache, &input, &mut slots);
     /// assert_eq!(Some(PatternID::must(1)), pid);
     ///
     /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'.
     /// // See 'GroupInfo' for more details on the mapping between groups and
     /// // slot indices.
     /// let slot_start = pid.unwrap().as_usize() * 2;
     /// let slot_end = slot_start + 1;
     /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get()));
     /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get()));
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn search_slots(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<PatternID> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
         if !utf8empty {
             let hm = self.search_slots_imp(cache, input, slots)?;
             return Some(hm.pattern());
         }
         // There is an unfortunate special case where if the regex can
         // match the empty string and UTF-8 mode is enabled, the search
         // implementation requires that the slots have at least as much space
         // to report the bounds of any match. This is so zero-width matches
         // that split a codepoint can be filtered out.
         //
         // Note that if utf8empty is true, we specialize the case for when
         // the number of patterns is 1. In that case, we can just use a stack
         // allocation. Otherwise we resort to a heap allocation, which we
         // convince ourselves we're fine with due to the pathological nature of
         // this case.
         let min = self.get_nfa().group_info().implicit_slot_len();
         if slots.len() >= min {
             let hm = self.search_slots_imp(cache, input, slots)?;
             return Some(hm.pattern());
         }
         if self.get_nfa().pattern_len() == 1 {
             let mut enough = [None, None];
             let got = self.search_slots_imp(cache, input, &mut enough);
             // This is OK because we know `enough` is strictly bigger than
             // `slots`, otherwise this special case isn't reached.
             slots.copy_from_slice(&enough[..slots.len()]);
             return got.map(|hm| hm.pattern());
         }
         let mut enough = vec![None; min];
         let got = self.search_slots_imp(cache, input, &mut enough);
         // This is OK because we know `enough` is strictly bigger than `slots`,
         // otherwise this special case isn't reached.
         slots.copy_from_slice(&enough[..slots.len()]);
         got.map(|hm| hm.pattern())
     }

     /// This is the actual implementation of `search_slots_imp` that
     /// doesn't account for the special case when 1) the NFA has UTF-8 mode
     /// enabled, 2) the NFA can match the empty string and 3) the caller has
     /// provided an insufficient number of slots to record match offsets.
     #[inline(never)]
     fn search_slots_imp(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<HalfMatch> {
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
         let hm = match self.search_imp(cache, input, slots) {
             None => return None,
             Some(hm) if !utf8empty => return Some(hm),
             Some(hm) => hm,
         };
         empty::skip_splits_fwd(input, hm, hm.offset(), |input| {
             Ok(self
                 .search_imp(cache, input, slots)
                 .map(|hm| (hm, hm.offset())))
         })
         // OK because the PikeVM never errors.
         .unwrap()
     }

     /// Writes the set of patterns that match anywhere in the given search
     /// configuration to `patset`. If multiple patterns match at the same
     /// position and this `PikeVM` was configured with [`MatchKind::All`]
     /// semantics, then all matching patterns are written to the given set.
     ///
     /// Unless all of the patterns in this `PikeVM` are anchored, then
     /// generally speaking, this will visit every byte in the haystack.
     ///
     /// This search routine *does not* clear the pattern set. This gives some
     /// flexibility to the caller (e.g., running multiple searches with the
     /// same pattern set), but does make the API bug-prone if you're reusing
     /// the same pattern set for multiple searches but intended them to be
     /// independent.
     ///
     /// If a pattern ID matched but the given `PatternSet` does not have
     /// sufficient capacity to store it, then it is not inserted and silently
     /// dropped.
     ///
     /// # Example
     ///
     /// This example shows how to find all matching patterns in a haystack,
     /// even when some patterns match at the same position as other patterns.
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{
     ///     nfa::thompson::pikevm::PikeVM,
     ///     Input, MatchKind, PatternSet,
     /// };
     ///
     /// let patterns = &[
     ///     r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar",
     /// ];
     /// let re = PikeVM::builder()
     ///     .configure(PikeVM::config().match_kind(MatchKind::All))
     ///     .build_many(patterns)?;
     /// let mut cache = re.create_cache();
     ///
     /// let input = Input::new("foobar");
     /// let mut patset = PatternSet::new(re.pattern_len());
     /// re.which_overlapping_matches(&mut cache, &input, &mut patset);
     /// let expected = vec![0, 2, 3, 4, 6];
     /// let got: Vec<usize> = patset.iter().map(|p| p.as_usize()).collect();
     /// assert_eq!(expected, got);
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     #[inline]
     pub fn which_overlapping_matches(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         patset: &mut PatternSet,
     ) {
         self.which_overlapping_imp(cache, input, patset)
     }
 }

 impl PikeVM {
     /// The implementation of standard leftmost search.
     ///
     /// Capturing group spans are written to `slots`, but only if requested.
     /// `slots` can be any length. Any slot in the NFA that is activated but
     /// which is out of bounds for the given `slots` is ignored.
     fn search_imp(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<HalfMatch> {
         cache.setup_search(slots.len());
         if input.is_done() {
             return None;
         }
         // Why do we even care about this? Well, in our 'Captures'
         // representation, we use usize::MAX as a sentinel to indicate "no
         // match." This isn't problematic so long as our haystack doesn't have
         // a maximal length. Byte slices are guaranteed by Rust to have a
         // length that fits into isize, and so this assert should always pass.
         // But we put it here to make our assumption explicit.
         assert!(
             input.haystack().len() < core::usize::MAX,
             "byte slice lengths must be less than usize MAX",
         );
         instrument!(|c| c.reset(&self.nfa));

         // Whether we want to visit all match states instead of emulating the
         // 'leftmost' semantics of typical backtracking regex engines.
         let allmatches =
             self.config.get_match_kind().continue_past_first_match();
         let (anchored, start_id) = match self.start_config(input) {
             None => return None,
             Some(config) => config,
         };

         let pre =
             if anchored { None } else { self.get_config().get_prefilter() };
         let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
         let mut hm = None;
         // Yes, our search doesn't end at input.end(), but includes it. This
         // is necessary because matches are delayed by one byte, just like
         // how the DFA engines work. The delay is used to handle look-behind
         // assertions. In the case of the PikeVM, the delay is implemented
         // by not considering a match to exist until it is visited in
         // 'steps'. Technically, we know a match exists in the previous
         // iteration via 'epsilon_closure'. (It's the same thing in NFA-to-DFA
         // determinization. We don't mark a DFA state as a match state if it
         // contains an NFA match state, but rather, whether the DFA state was
         // generated by a transition from a DFA state that contains an NFA
         // match state.)
         let mut at = input.start();
         while at <= input.end() {
             // If we have no states left to visit, then there are some cases
             // where we know we can quit early or even skip ahead.
             if curr.set.is_empty() {
                 // We have a match and we haven't been instructed to continue
                 // on even after finding a match, so we can quit.
                 if hm.is_some() && !allmatches {
                     break;
                 }
                 // If we're running an anchored search and we've advanced
                 // beyond the start position with no other states to try, then
                 // we will never observe a match and thus can stop.
                 if anchored && at > input.start() {
                     break;
                 }
                 // If there no states left to explore at this position and we
                 // know we can't terminate early, then we are effectively at
                 // the starting state of the NFA. If we fell through here,
                 // we'd end up adding our '(?s-u:.)*?' prefix and it would be
                 // the only thing in 'curr'. So we might as well just skip
                 // ahead until we find something that we know might advance us
                 // forward.
                 if let Some(ref pre) = pre {
                     let span = Span::from(at..input.end());
                     match pre.find(input.haystack(), span) {
                         None => break,
                         Some(ref span) => at = span.start,
                     }
                 }
             }
             // Instead of using the NFA's unanchored start state, we actually
             // always use its anchored starting state. As a result, when doing
             // an unanchored search, we need to simulate our own '(?s-u:.)*?'
             // prefix, to permit a match to appear anywhere.
             //
             // Now, we don't *have* to do things this way. We could use the
             // NFA's unanchored starting state and do one 'epsilon_closure'
             // call from that starting state before the main loop here. And
             // that is just as correct. However, it turns out to be slower
             // than our approach here because it slightly increases the cost
             // of processing each byte by requiring us to visit more NFA
             // states to deal with the additional NFA states in the unanchored
             // prefix. By simulating it explicitly here, we lower those costs
             // substantially. The cost is itself small, but it adds up for
             // large haystacks.
             //
             // In order to simulate the '(?s-u:.)*?' prefix---which is not
             // greedy---we are careful not to perform an epsilon closure on
             // the start state if we already have a match. Namely, if we
             // did otherwise, we would never reach a terminating condition
             // because there would always be additional states to process.
             // In effect, the exclusion of running 'epsilon_closure' when
             // we have a match corresponds to the "dead" states we have in
             // our DFA regex engines. Namely, in a DFA, match states merely
             // instruct the search execution to record the current offset as
             // the most recently seen match. It is the dead state that actually
             // indicates when to stop the search (other than EOF or quit
             // states).
             //
             // However, when 'allmatches' is true, the caller has asked us to
             // leave in every possible match state. This tends not to make a
             // whole lot of sense in unanchored searches, because it means the
             // search really cannot terminate until EOF. And often, in that
             // case, you wind up skipping over a bunch of matches and are left
             // with the "last" match. Arguably, it just doesn't make a lot of
             // sense to run a 'leftmost' search (which is what this routine is)
             // with 'allmatches' set to true. But the DFAs support it and this
             // matches their behavior. (Generally, 'allmatches' is useful for
             // overlapping searches or leftmost anchored searches to find the
             // longest possible match by ignoring match priority.)
             //
             // Additionally, when we're running an anchored search, this
             // epsilon closure should only be computed at the beginning of the
             // search. If we re-computed it at every position, we would be
             // simulating an unanchored search when we were tasked to perform
             // an anchored search.
             if (!hm.is_some() || allmatches)
                 && (!anchored || at == input.start())
             {
                 // Since we are adding to the 'curr' active states and since
                 // this is for the start ID, we use a slots slice that is
                 // guaranteed to have the right length but where every element
                 // is absent. This is exactly what we want, because this
                 // epsilon closure is responsible for simulating an unanchored
                 // '(?s:.)*?' prefix. It is specifically outside of any
                 // capturing groups, and thus, using slots that are always
                 // absent is correct.
                 //
                 // Note though that we can't just use '&mut []' here, since
                 // this epsilon closure may traverse through 'Captures' epsilon
                 // transitions, and thus must be able to write offsets to the
                 // slots given which are later copied to slot values in 'curr'.
                 let slots = next.slot_table.all_absent();
                 self.epsilon_closure(stack, slots, curr, input, at, start_id);
             }
             if let Some(pid) = self.nexts(stack, curr, next, input, at, slots)
             {
                 hm = Some(HalfMatch::new(pid, at));
             }
             // Unless the caller asked us to return early, we need to mush on
             // to see if we can extend our match. (But note that 'nexts' will
             // quit right after seeing a match when match_kind==LeftmostFirst,
             // as is consistent with leftmost-first match priority.)
             if input.get_earliest() && hm.is_some() {
                 break;
             }
             core::mem::swap(curr, next);
             next.set.clear();
             at += 1;
         }
         instrument!(|c| c.eprint(&self.nfa));
         hm
     }

     /// The implementation for the 'which_overlapping_matches' API. Basically,
     /// we do a single scan through the entire haystack (unless our regex
     /// or search is anchored) and record every pattern that matched. In
     /// particular, when MatchKind::All is used, this supports overlapping
     /// matches. So if we have the regexes 'sam' and 'samwise', they will
     /// *both* be reported in the pattern set when searching the haystack
     /// 'samwise'.
     fn which_overlapping_imp(
         &self,
         cache: &mut Cache,
         input: &Input<'_>,
         patset: &mut PatternSet,
     ) {
         // NOTE: This is effectively a copy of 'search_imp' above, but with no
         // captures support and instead writes patterns that matched directly
         // to 'patset'. See that routine for better commentary about what's
         // going on in this routine. We probably could unify the routines using
         // generics or more helper routines, but I'm not sure it's worth it.
         //
         // NOTE: We somewhat go out of our way here to support things like
         // 'input.get_earliest()' and 'leftmost-first' match semantics. Neither
         // of those seem particularly relevant to this routine, but they are
         // both supported by the DFA analogs of this routine by construction
         // and composition, so it seems like good sense to have the PikeVM
         // match that behavior.

         cache.setup_search(0);
         if input.is_done() {
             return;
         }
         assert!(
             input.haystack().len() < core::usize::MAX,
             "byte slice lengths must be less than usize MAX",
         );
         instrument!(|c| c.reset(&self.nfa));

         let allmatches =
             self.config.get_match_kind().continue_past_first_match();
         let (anchored, start_id) = match self.start_config(input) {
             None => return,
             Some(config) => config,
         };

         let Cache { ref mut stack, ref mut curr, ref mut next } = cache;
         for at in input.start()..=input.end() {
             let any_matches = !patset.is_empty();
             if curr.set.is_empty() {
                 if any_matches && !allmatches {
                     break;
                 }
                 if anchored && at > input.start() {
                     break;
                 }
             }
             if !any_matches || allmatches {
                 let slots = &mut [];
                 self.epsilon_closure(stack, slots, curr, input, at, start_id);
             }
             self.nexts_overlapping(stack, curr, next, input, at, patset);
             // If we found a match and filled our set, then there is no more
             // additional info that we can provide. Thus, we can quit. We also
             // quit if the caller asked us to stop at the earliest point that
             // we know a match exists.
             if patset.is_full() || input.get_earliest() {
                 break;
             }
             core::mem::swap(curr, next);
             next.set.clear();
         }
         instrument!(|c| c.eprint(&self.nfa));
     }

     /// Process the active states in 'curr' to find the states (written to
     /// 'next') we should process for the next byte in the haystack.
     ///
     /// 'stack' is used to perform a depth first traversal of the NFA when
     /// computing an epsilon closure.
     ///
     /// When a match is found, the slots for that match state (in 'curr') are
     /// copied to 'caps'. Moreover, once a match is seen, processing for 'curr'
     /// stops (unless the PikeVM was configured with MatchKind::All semantics).
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn nexts(
         &self,
         stack: &mut Vec<FollowEpsilon>,
         curr: &mut ActiveStates,
         next: &mut ActiveStates,
         input: &Input<'_>,
         at: usize,
         slots: &mut [Option<NonMaxUsize>],
     ) -> Option<PatternID> {
         instrument!(|c| c.record_state_set(&curr.set));
         let mut pid = None;
         let ActiveStates { ref set, ref mut slot_table } = *curr;
         for sid in set.iter() {
             pid = match self.next(stack, slot_table, next, input, at, sid) {
                 None => continue,
                 Some(pid) => Some(pid),
             };
             slots.copy_from_slice(slot_table.for_state(sid));
             if !self.config.get_match_kind().continue_past_first_match() {
                 break;
             }
         }
         pid
     }

     /// Like 'nexts', but for the overlapping case. This doesn't write any
     /// slots, and instead just writes which pattern matched in 'patset'.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn nexts_overlapping(
         &self,
         stack: &mut Vec<FollowEpsilon>,
         curr: &mut ActiveStates,
         next: &mut ActiveStates,
         input: &Input<'_>,
         at: usize,
         patset: &mut PatternSet,
     ) {
         instrument!(|c| c.record_state_set(&curr.set));
         let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8();
         let ActiveStates { ref set, ref mut slot_table } = *curr;
         for sid in set.iter() {
             let pid = match self.next(stack, slot_table, next, input, at, sid)
             {
                 None => continue,
                 Some(pid) => pid,
             };
             // This handles the case of finding a zero-width match that splits
             // a codepoint. Namely, if we're in UTF-8 mode AND we know we can
             // match the empty string, then the only valid way of getting to
             // this point with an offset that splits a codepoint is when we
             // have an empty match. Such matches, in UTF-8 mode, must not be
             // reported. So we just skip them here and pretend as if we did
             // not see a match.
             if utf8empty && !input.is_char_boundary(at) {
                 continue;
             }
             let _ = patset.try_insert(pid);
             if !self.config.get_match_kind().continue_past_first_match() {
                 break;
             }
         }
     }

     /// Starting from 'sid', if the position 'at' in the 'input' haystack has a
     /// transition defined out of 'sid', then add the state transitioned to and
     /// its epsilon closure to the 'next' set of states to explore.
     ///
     /// 'stack' is used by the epsilon closure computation to perform a depth
     /// first traversal of the NFA.
     ///
     /// 'curr_slot_table' should be the table of slots for the current set of
     /// states being explored. If there is a transition out of 'sid', then
     /// sid's row in the slot table is used to perform the epsilon closure.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn next(
         &self,
         stack: &mut Vec<FollowEpsilon>,
         curr_slot_table: &mut SlotTable,
         next: &mut ActiveStates,
         input: &Input<'_>,
         at: usize,
         sid: StateID,
     ) -> Option<PatternID> {
         instrument!(|c| c.record_step(sid));
         match *self.nfa.state(sid) {
             State::Fail
             | State::Look { .. }
             | State::Union { .. }
             | State::BinaryUnion { .. }
             | State::Capture { .. } => None,
             State::ByteRange { ref trans } => {
                 if trans.matches(input.haystack(), at) {
                     let slots = curr_slot_table.for_state(sid);
                     // OK because 'at <= haystack.len() < usize::MAX', so
                     // adding 1 will never wrap.
                     let at = at.wrapping_add(1);
                     self.epsilon_closure(
                         stack, slots, next, input, at, trans.next,
                     );
                 }
                 None
             }
             State::Sparse(ref sparse) => {
                 if let Some(next_sid) = sparse.matches(input.haystack(), at) {
                     let slots = curr_slot_table.for_state(sid);
                     // OK because 'at <= haystack.len() < usize::MAX', so
                     // adding 1 will never wrap.
                     let at = at.wrapping_add(1);
                     self.epsilon_closure(
                         stack, slots, next, input, at, next_sid,
                     );
                 }
                 None
             }
             State::Dense(ref dense) => {
                 if let Some(next_sid) = dense.matches(input.haystack(), at) {
                     let slots = curr_slot_table.for_state(sid);
                     // OK because 'at <= haystack.len() < usize::MAX', so
                     // adding 1 will never wrap.
                     let at = at.wrapping_add(1);
                     self.epsilon_closure(
                         stack, slots, next, input, at, next_sid,
                     );
                 }
                 None
             }
             State::Match { pattern_id } => Some(pattern_id),
         }
     }

     /// Compute the epsilon closure of 'sid', writing the closure into 'next'
     /// while copying slot values from 'curr_slots' into corresponding states
     /// in 'next'. 'curr_slots' should be the slot values corresponding to
     /// 'sid'.
     ///
     /// The given 'stack' is used to perform a depth first traversal of the
     /// NFA by recursively following all epsilon transitions out of 'sid'.
     /// Conditional epsilon transitions are followed if and only if they are
     /// satisfied for the position 'at' in the 'input' haystack.
     ///
     /// While this routine may write to 'curr_slots', once it returns, any
     /// writes are undone and the original values (even if absent) are
     /// restored.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn epsilon_closure(
         &self,
         stack: &mut Vec<FollowEpsilon>,
         curr_slots: &mut [Option<NonMaxUsize>],
         next: &mut ActiveStates,
         input: &Input<'_>,
         at: usize,
         sid: StateID,
     ) {
         instrument!(|c| {
             c.record_closure(sid);
             c.record_stack_push(sid);
         });
         stack.push(FollowEpsilon::Explore(sid));
         while let Some(frame) = stack.pop() {
             match frame {
                 FollowEpsilon::RestoreCapture { slot, offset: pos } => {
                     curr_slots[slot] = pos;
                 }
                 FollowEpsilon::Explore(sid) => {
                     self.epsilon_closure_explore(
                         stack, curr_slots, next, input, at, sid,
                     );
                 }
             }
         }
     }

     /// Explore all of the epsilon transitions out of 'sid'. This is mostly
     /// split out from 'epsilon_closure' in order to clearly delineate
     /// the actual work of computing an epsilon closure from the stack
     /// book-keeping.
     ///
     /// This will push any additional explorations needed on to 'stack'.
     ///
     /// 'curr_slots' should refer to the slots for the currently active NFA
     /// state. That is, the current state we are stepping through. These
     /// slots are mutated in place as new 'Captures' states are traversed
     /// during epsilon closure, but the slots are restored to their original
     /// values once the full epsilon closure is completed. The ultimate use of
     /// 'curr_slots' is to copy them to the corresponding 'next_slots', so that
     /// the capturing group spans are forwarded from the currently active state
     /// to the next.
     ///
     /// 'next' refers to the next set of active states. Computing an epsilon
     /// closure may increase the next set of active states.
     ///
     /// 'input' refers to the caller's input configuration and 'at' refers to
     /// the current position in the haystack. These are used to check whether
     /// conditional epsilon transitions (like look-around) are satisfied at
     /// the current position. If they aren't, then the epsilon closure won't
     /// include them.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn epsilon_closure_explore(
         &self,
         stack: &mut Vec<FollowEpsilon>,
         curr_slots: &mut [Option<NonMaxUsize>],
         next: &mut ActiveStates,
         input: &Input<'_>,
         at: usize,
         mut sid: StateID,
     ) {
         // We can avoid pushing some state IDs on to our stack in precisely
         // the cases where a 'push(x)' would be immediately followed by a 'x
         // = pop()'. This is achieved by this outer-loop. We simply set 'sid'
         // to be the next state ID we want to explore once we're done with
         // our initial exploration. In practice, this avoids a lot of stack
         // thrashing.
         loop {
             instrument!(|c| c.record_set_insert(sid));
             // Record this state as part of our next set of active states. If
             // we've already explored it, then no need to do it again.
             if !next.set.insert(sid) {
                 return;
             }
             match *self.nfa.state(sid) {
                 State::Fail
                 | State::Match { .. }
                 | State::ByteRange { .. }
                 | State::Sparse { .. }
                 | State::Dense { .. } => {
                     next.slot_table.for_state(sid).copy_from_slice(curr_slots);
                     return;
                 }
                 State::Look { look, next } => {
                     // OK because we don't permit building a searcher with a
                     // Unicode word boundary if the requisite Unicode data is
                     // unavailable.
                     if !self.nfa.look_matcher().matches_inline(
                         look,
                         input.haystack(),
                         at,
                     ) {
                         return;
                     }
                     sid = next;
                 }
                 State::Union { ref alternates } => {
                     sid = match alternates.get(0) {
                         None => return,
                         Some(&sid) => sid,
                     };
                     instrument!(|c| {
                         for &alt in &alternates[1..] {
                             c.record_stack_push(alt);
                         }
                     });
                     stack.extend(
                         alternates[1..]
                             .iter()
                             .copied()
                             .rev()
                             .map(FollowEpsilon::Explore),
                     );
                 }
                 State::BinaryUnion { alt1, alt2 } => {
                     sid = alt1;
                     instrument!(|c| c.record_stack_push(sid));
                     stack.push(FollowEpsilon::Explore(alt2));
                 }
                 State::Capture { next, slot, .. } => {
                     // There's no need to do anything with slots that
                     // ultimately won't be copied into the caller-provided
                     // 'Captures' value. So we just skip dealing with them at
                     // all.
                     if slot.as_usize() < curr_slots.len() {
                         instrument!(|c| c.record_stack_push(sid));
                         stack.push(FollowEpsilon::RestoreCapture {
                             slot,
                             offset: curr_slots[slot],
                         });
                         // OK because length of a slice must fit into an isize.
                         curr_slots[slot] = Some(NonMaxUsize::new(at).unwrap());
                     }
                     sid = next;
                 }
             }
         }
     }

     /// Return the starting configuration of a PikeVM search.
     ///
     /// The "start config" is basically whether the search should be anchored
     /// or not and the NFA state ID at which to begin the search. The state ID
     /// returned always corresponds to an anchored starting state even when the
     /// search is unanchored. This is because the PikeVM search loop deals with
     /// unanchored searches with an explicit epsilon closure out of the start
     /// state.
     ///
     /// This routine accounts for both the caller's `Input` configuration
     /// and the pattern itself. For example, even if the caller asks for an
     /// unanchored search, if the pattern itself is anchored, then this will
     /// always return 'true' because implementing an unanchored search in that
     /// case would be incorrect.
     ///
     /// Similarly, if the caller requests an anchored search for a particular
     /// pattern, then the starting state ID returned will reflect that.
     ///
     /// If a pattern ID is given in the input configuration that is not in
     /// this regex, then `None` is returned.
     fn start_config(&self, input: &Input<'_>) -> Option<(bool, StateID)> {
         match input.get_anchored() {
             // Only way we're unanchored is if both the caller asked for an
             // unanchored search *and* the pattern is itself not anchored.
             Anchored::No => Some((
                 self.nfa.is_always_start_anchored(),
                 self.nfa.start_anchored(),
             )),
             Anchored::Yes => Some((true, self.nfa.start_anchored())),
             Anchored::Pattern(pid) => {
                 Some((true, self.nfa.start_pattern(pid)?))
             }
         }
     }
 }

 /// An iterator over all non-overlapping matches for a particular search.
 ///
 /// The iterator yields a [`Match`] value until no more matches could be found.
 ///
 /// The lifetime parameters are as follows:
 ///
 /// * `'r` represents the lifetime of the PikeVM.
 /// * `'c` represents the lifetime of the PikeVM's cache.
 /// * `'h` represents the lifetime of the haystack being searched.
 ///
 /// This iterator can be created with the [`PikeVM::find_iter`] method.
 #[derive(Debug)]
 pub struct FindMatches<'r, 'c, 'h> {
     re: &'r PikeVM,
     cache: &'c mut Cache,
     caps: Captures,
     it: iter::Searcher<'h>,
 }

 impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> {
     type Item = Match;

     #[inline]
     fn next(&mut self) -> Option<Match> {
         // Splitting 'self' apart seems necessary to appease borrowck.
         let FindMatches { re, ref mut cache, ref mut caps, ref mut it } =
             *self;
         // 'advance' converts errors into panics, which is OK here because
         // the PikeVM can never return an error.
         it.advance(|input| {
             re.search(cache, input, caps);
             Ok(caps.get_match())
         })
     }
 }

 /// An iterator over all non-overlapping leftmost matches, with their capturing
 /// groups, for a particular search.
 ///
 /// The iterator yields a [`Captures`] value until no more matches could be
 /// found.
 ///
 /// The lifetime parameters are as follows:
 ///
 /// * `'r` represents the lifetime of the PikeVM.
 /// * `'c` represents the lifetime of the PikeVM's cache.
 /// * `'h` represents the lifetime of the haystack being searched.
 ///
 /// This iterator can be created with the [`PikeVM::captures_iter`] method.
 #[derive(Debug)]
 pub struct CapturesMatches<'r, 'c, 'h> {
     re: &'r PikeVM,
     cache: &'c mut Cache,
     caps: Captures,
     it: iter::Searcher<'h>,
 }

 impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> {
     type Item = Captures;

     #[inline]
     fn next(&mut self) -> Option<Captures> {
         // Splitting 'self' apart seems necessary to appease borrowck.
         let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } =
             *self;
         // 'advance' converts errors into panics, which is OK here because
         // the PikeVM can never return an error.
         it.advance(|input| {
             re.search(cache, input, caps);
             Ok(caps.get_match())
         });
         if caps.is_match() {
             Some(caps.clone())
         } else {
             None
         }
     }
 }

 /// A cache represents mutable state that a [`PikeVM`] requires during a
 /// search.
 ///
 /// For a given [`PikeVM`], its corresponding cache may be created either via
 /// [`PikeVM::create_cache`], or via [`Cache::new`]. They are equivalent in
 /// every way, except the former does not require explicitly importing `Cache`.
 ///
 /// A particular `Cache` is coupled with the [`PikeVM`] from which it
 /// was created. It may only be used with that `PikeVM`. A cache and its
 /// allocations may be re-purposed via [`Cache::reset`], in which case, it can
 /// only be used with the new `PikeVM` (and not the old one).
 #[derive(Clone, Debug)]
 pub struct Cache {
     /// Stack used while computing epsilon closure. This effectively lets us
     /// move what is more naturally expressed through recursion to a stack
     /// on the heap.
     stack: Vec<FollowEpsilon>,
     /// The current active states being explored for the current byte in the
     /// haystack.
     curr: ActiveStates,
     /// The next set of states we're building that will be explored for the
     /// next byte in the haystack.
     next: ActiveStates,
 }

 impl Cache {
     /// Create a new [`PikeVM`] cache.
     ///
     /// A potentially more convenient routine to create a cache is
     /// [`PikeVM::create_cache`], as it does not require also importing the
     /// `Cache` type.
     ///
     /// If you want to reuse the returned `Cache` with some other `PikeVM`,
     /// then you must call [`Cache::reset`] with the desired `PikeVM`.
     pub fn new(re: &PikeVM) -> Cache {
         Cache {
             stack: vec![],
             curr: ActiveStates::new(re),
             next: ActiveStates::new(re),
         }
     }

     /// Reset this cache such that it can be used for searching with a
     /// different [`PikeVM`].
     ///
     /// A cache reset permits reusing memory already allocated in this cache
     /// with a different `PikeVM`.
     ///
     /// # Example
     ///
     /// This shows how to re-purpose a cache for use with a different `PikeVM`.
     ///
     /// ```
     /// # if cfg!(miri) { return Ok(()); } // miri takes too long
     /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match};
     ///
     /// let re1 = PikeVM::new(r"\w")?;
     /// let re2 = PikeVM::new(r"\W")?;
     ///
     /// let mut cache = re1.create_cache();
     /// assert_eq!(
     ///     Some(Match::must(0, 0..2)),
     ///     re1.find_iter(&mut cache, "Δ").next(),
     /// );
     ///
     /// // Using 'cache' with re2 is not allowed. It may result in panics or
     /// // incorrect results. In order to re-purpose the cache, we must reset
     /// // it with the PikeVM we'd like to use it with.
     /// //
     /// // Similarly, after this reset, using the cache with 're1' is also not
     /// // allowed.
     /// cache.reset(&re2);
     /// assert_eq!(
     ///     Some(Match::must(0, 0..3)),
     ///     re2.find_iter(&mut cache, "☃").next(),
     /// );
     ///
     /// # Ok::<(), Box<dyn std::error::Error>>(())
     /// ```
     pub fn reset(&mut self, re: &PikeVM) {
         self.curr.reset(re);
         self.next.reset(re);
     }

     /// Returns the heap memory usage, in bytes, of this cache.
     ///
     /// This does **not** include the stack size used up by this cache. To
     /// compute that, use `std::mem::size_of::<Cache>()`.
     pub fn memory_usage(&self) -> usize {
         use core::mem::size_of;
         (self.stack.len() * size_of::<FollowEpsilon>())
             + self.curr.memory_usage()
             + self.next.memory_usage()
     }

     /// Clears this cache. This should be called at the start of every search
     /// to ensure we start with a clean slate.
     ///
     /// This also sets the length of the capturing groups used in the current
     /// search. This permits an optimization where by 'SlotTable::for_state'
     /// only returns the number of slots equivalent to the number of slots
     /// given in the 'Captures' value. This may be less than the total number
     /// of possible slots, e.g., when one only wants to track overall match
     /// offsets. This in turn permits less copying of capturing group spans
     /// in the PikeVM.
     fn setup_search(&mut self, captures_slot_len: usize) {
         self.stack.clear();
         self.curr.setup_search(captures_slot_len);
         self.next.setup_search(captures_slot_len);
     }
 }

 /// A set of active states used to "simulate" the execution of an NFA via the
 /// PikeVM.
 ///
 /// There are two sets of these used during NFA simulation. One set corresponds
 /// to the "current" set of states being traversed for the current position
 /// in a haystack. The other set corresponds to the "next" set of states being
 /// built, which will become the new "current" set for the next position in the
 /// haystack. These two sets correspond to CLIST and NLIST in Thompson's
 /// original paper regexes: https://dl.acm.org/doi/pdf/10.1145/363347.363387
 ///
 /// In addition to representing a set of NFA states, this also maintains slot
 /// values for each state. These slot values are what turn the NFA simulation
 /// into the "Pike VM." Namely, they track capturing group values for each
 /// state. During the computation of epsilon closure, we copy slot values from
 /// states in the "current" set to the "next" set. Eventually, once a match
 /// is found, the slot values for that match state are what we write to the
 /// caller provided 'Captures' value.
 #[derive(Clone, Debug)]
 struct ActiveStates {
     /// The set of active NFA states. This set preserves insertion order, which
     /// is critical for simulating the match semantics of backtracking regex
     /// engines.
     set: SparseSet,
     /// The slots for every NFA state, where each slot stores a (possibly
     /// absent) offset. Every capturing group has two slots. One for a start
     /// offset and one for an end offset.
     slot_table: SlotTable,
 }

 impl ActiveStates {
     /// Create a new set of active states for the given PikeVM. The active
     /// states returned may only be used with the given PikeVM. (Use 'reset'
     /// to re-purpose the allocation for a different PikeVM.)
     fn new(re: &PikeVM) -> ActiveStates {
         let mut active = ActiveStates {
             set: SparseSet::new(0),
             slot_table: SlotTable::new(),
         };
         active.reset(re);
         active
     }

     /// Reset this set of active states such that it can be used with the given
     /// PikeVM (and only that PikeVM).
     fn reset(&mut self, re: &PikeVM) {
         self.set.resize(re.get_nfa().states().len());
         self.slot_table.reset(re);
     }

     /// Return the heap memory usage, in bytes, used by this set of active
     /// states.
     ///
     /// This does not include the stack size of this value.
     fn memory_usage(&self) -> usize {
         self.set.memory_usage() + self.slot_table.memory_usage()
     }

     /// Setup this set of active states for a new search. The given slot
     /// length should be the number of slots in a caller provided 'Captures'
     /// (and may be zero).
     fn setup_search(&mut self, captures_slot_len: usize) {
         self.set.clear();
         self.slot_table.setup_search(captures_slot_len);
     }
 }

 /// A table of slots, where each row represent a state in an NFA. Thus, the
 /// table has room for storing slots for every single state in an NFA.
 ///
 /// This table is represented with a single contiguous allocation. In general,
 /// the notion of "capturing group" doesn't really exist at this level of
 /// abstraction, hence the name "slot" instead. (Indeed, every capturing group
 /// maps to a pair of slots, one for the start offset and one for the end
 /// offset.) Slots are indexed by the 'Captures' NFA state.
 ///
 /// N.B. Not every state actually needs a row of slots. Namely, states that
 /// only have epsilon transitions currently never have anything written to
 /// their rows in this table. Thus, the table is somewhat wasteful in its heap
 /// usage. However, it is important to maintain fast random access by state
 /// ID, which means one giant table tends to work well. RE2 takes a different
 /// approach here and allocates each row as its own reference counted thing.
 /// I explored such a strategy at one point here, but couldn't get it to work
 /// well using entirely safe code. (To the ambitious reader: I encourage you to
 /// re-litigate that experiment.) I very much wanted to stick to safe code, but
 /// could be convinced otherwise if there was a solid argument and the safety
 /// was encapsulated well.
 #[derive(Clone, Debug)]
 struct SlotTable {
     /// The actual table of offsets.
     table: Vec<Option<NonMaxUsize>>,
     /// The number of slots per state, i.e., the table's stride or the length
     /// of each row.
     slots_per_state: usize,
     /// The number of slots in the caller-provided 'Captures' value for the
     /// current search. Setting this to 'slots_per_state' is always correct,
     /// but may be wasteful.
     slots_for_captures: usize,
 }

 impl SlotTable {
     /// Create a new slot table.
     ///
     /// One should call 'reset' with the corresponding PikeVM before use.
     fn new() -> SlotTable {
         SlotTable { table: vec![], slots_for_captures: 0, slots_per_state: 0 }
     }

     /// Reset this slot table such that it can be used with the given PikeVM
     /// (and only that PikeVM).
     fn reset(&mut self, re: &PikeVM) {
         let nfa = re.get_nfa();
         self.slots_per_state = nfa.group_info().slot_len();
         // This is always correct, but may be reduced for a particular search
         // if a 'Captures' has fewer slots, e.g., none at all or only slots
         // for tracking the overall match instead of all slots for every
         // group.
         self.slots_for_captures = core::cmp::max(
             self.slots_per_state,
             nfa.pattern_len().checked_mul(2).unwrap(),
         );
         let len = nfa
             .states()
             .len()
             .checked_mul(self.slots_per_state)
             // Add space to account for scratch space used during a search.
             .and_then(|x| x.checked_add(self.slots_for_captures))
             // It seems like this could actually panic on legitimate inputs on
             // 32-bit targets, and very likely to panic on 16-bit. Should we
             // somehow convert this to an error? What about something similar
             // for the lazy DFA cache? If you're tripping this assert, please
             // file a bug.
             .expect("slot table length doesn't overflow");
         // This happens about as often as a regex is compiled, so it probably
         // should be at debug level, but I found it quite distracting and not
         // particularly useful.
         trace!(
             "resizing PikeVM active states table to {} entries \
              (slots_per_state={})",
             len,
             self.slots_per_state,
         );
         self.table.resize(len, None);
     }

     /// Return the heap memory usage, in bytes, used by this slot table.
     ///
     /// This does not include the stack size of this value.
     fn memory_usage(&self) -> usize {
         self.table.len() * core::mem::size_of::<Option<NonMaxUsize>>()
     }

     /// Perform any per-search setup for this slot table.
     ///
     /// In particular, this sets the length of the number of slots used in the
     /// 'Captures' given by the caller (if any at all). This number may be
     /// smaller than the total number of slots available, e.g., when the caller
     /// is only interested in tracking the overall match and not the spans of
     /// every matching capturing group. Only tracking the overall match can
     /// save a substantial amount of time copying capturing spans during a
     /// search.
     fn setup_search(&mut self, captures_slot_len: usize) {
         self.slots_for_captures = captures_slot_len;
     }

     /// Return a mutable slice of the slots for the given state.
     ///
     /// Note that the length of the slice returned may be less than the total
     /// number of slots available for this state. In particular, the length
     /// always matches the number of slots indicated via 'setup_search'.
     fn for_state(&mut self, sid: StateID) -> &mut [Option<NonMaxUsize>] {
         let i = sid.as_usize() * self.slots_per_state;
         &mut self.table[i..i + self.slots_for_captures]
     }

     /// Return a slice of slots of appropriate length where every slot offset
     /// is guaranteed to be absent. This is useful in cases where you need to
     /// compute an epsilon closure outside of the user supplied regex, and thus
     /// never want it to have any capturing slots set.
     fn all_absent(&mut self) -> &mut [Option<NonMaxUsize>] {
         let i = self.table.len() - self.slots_for_captures;
         &mut self.table[i..i + self.slots_for_captures]
     }
 }

 /// Represents a stack frame for use while computing an epsilon closure.
 ///
 /// (An "epsilon closure" refers to the set of reachable NFA states from a
 /// single state without consuming any input. That is, the set of all epsilon
 /// transitions not only from that single state, but from every other state
 /// reachable by an epsilon transition as well. This is why it's called a
 /// "closure." Computing an epsilon closure is also done during DFA
 /// determinization! Compare and contrast the epsilon closure here in this
 /// PikeVM and the one used for determinization in crate::util::determinize.)
 ///
 /// Computing the epsilon closure in a Thompson NFA proceeds via a depth
 /// first traversal over all epsilon transitions from a particular state.
 /// (A depth first traversal is important because it emulates the same priority
 /// of matches that is typically found in backtracking regex engines.) This
 /// depth first traversal is naturally expressed using recursion, but to avoid
 /// a call stack size proportional to the size of a regex, we put our stack on
 /// the heap instead.
 ///
 /// This stack thus consists of call frames. The typical call frame is
 /// `Explore`, which instructs epsilon closure to explore the epsilon
 /// transitions from that state. (Subsequent epsilon transitions are then
 /// pushed on to the stack as more `Explore` frames.) If the state ID being
 /// explored has no epsilon transitions, then the capturing group slots are
 /// copied from the original state that sparked the epsilon closure (from the
 /// 'step' routine) to the state ID being explored. This way, capturing group
 /// slots are forwarded from the previous state to the next.
 ///
 /// The other stack frame, `RestoreCaptures`, instructs the epsilon closure to
 /// set the position for a particular slot back to some particular offset. This
 /// frame is pushed when `Explore` sees a `Capture` transition. `Explore` will
 /// set the offset of the slot indicated in `Capture` to the current offset,
 /// and then push the old offset on to the stack as a `RestoreCapture` frame.
 /// Thus, the new offset is only used until the epsilon closure reverts back to
 /// the `RestoreCapture` frame. In effect, this gives the `Capture` epsilon
 /// transition its "scope" to only states that come "after" it during depth
 /// first traversal.
 #[derive(Clone, Debug)]
 enum FollowEpsilon {
     /// Explore the epsilon transitions from a state ID.
     Explore(StateID),
     /// Reset the given `slot` to the given `offset` (which might be `None`).
     RestoreCapture { slot: SmallIndex, offset: Option<NonMaxUsize> },
 }

 /// A set of counters that "instruments" a PikeVM search. To enable this, you
 /// must enable the 'internal-instrument-pikevm' feature. Then run your Rust
 /// program with RUST_LOG=regex_automata::nfa::thompson::pikevm=trace set in
 /// the environment. The metrics collected will be dumped automatically for
 /// every search executed by the PikeVM.
 ///
 /// NOTE: When 'internal-instrument-pikevm' is enabled, it will likely cause an
 /// absolute decrease in wall-clock performance, even if the 'trace' log level
 /// isn't enabled. (Although, we do try to avoid extra costs when 'trace' isn't
 /// enabled.) The main point of instrumentation is to get counts of various
 /// events that occur during the PikeVM's execution.
 ///
 /// This is a somewhat hacked together collection of metrics that are useful
 /// to gather from a PikeVM search. In particular, it lets us scrutinize the
 /// performance profile of a search beyond what general purpose profiling tools
 /// give us. Namely, we orient the profiling data around the specific states of
 /// the NFA.
 ///
 /// In other words, this lets us see which parts of the NFA graph are most
 /// frequently activated. This then provides direction for optimization
 /// opportunities.
 ///
 /// The really sad part about this is that it absolutely clutters up the PikeVM
 /// implementation. :'( Another approach would be to just manually add this
 /// code in whenever I want this kind of profiling data, but it's complicated
 /// and tedious enough that I went with this approach... for now.
 ///
 /// When instrumentation is enabled (which also turns on 'logging'), then a
 /// `Counters` is initialized for every search and `trace`'d just before the
 /// search returns to the caller.
 ///
 /// Tip: When debugging performance problems with the PikeVM, it's best to try
 /// to work with an NFA that is as small as possible. Otherwise the state graph
 /// is likely to be too big to digest.
 #[cfg(feature = "internal-instrument-pikevm")]
 #[derive(Clone, Debug)]
 struct Counters {
     /// The number of times the NFA is in a particular permutation of states.
     state_sets: alloc::collections::BTreeMap<Vec<StateID>, u64>,
     /// The number of times 'step' is called for a particular state ID (which
     /// indexes this array).
     steps: Vec<u64>,
     /// The number of times an epsilon closure was computed for a state.
     closures: Vec<u64>,
     /// The number of times a particular state ID is pushed on to a stack while
     /// computing an epsilon closure.
     stack_pushes: Vec<u64>,
     /// The number of times a particular state ID is inserted into a sparse set
     /// while computing an epsilon closure.
     set_inserts: Vec<u64>,
 }

 #[cfg(feature = "internal-instrument-pikevm")]
 impl Counters {
     fn empty() -> Counters {
         Counters {
             state_sets: alloc::collections::BTreeMap::new(),
             steps: vec![],
             closures: vec![],
             stack_pushes: vec![],
             set_inserts: vec![],
         }
     }

     fn reset(&mut self, nfa: &NFA) {
         let len = nfa.states().len();

         self.state_sets.clear();

         self.steps.clear();
         self.steps.resize(len, 0);

         self.closures.clear();
         self.closures.resize(len, 0);

         self.stack_pushes.clear();
         self.stack_pushes.resize(len, 0);

         self.set_inserts.clear();
         self.set_inserts.resize(len, 0);
     }

     fn eprint(&self, nfa: &NFA) {
         trace!("===== START PikeVM Instrumentation Output =====");
         // We take the top-K most occurring state sets. Otherwise the output
         // is likely to be overwhelming. And we probably only care about the
         // most frequently occurring ones anyway.
         const LIMIT: usize = 20;
         let mut set_counts =
             self.state_sets.iter().collect::<Vec<(&Vec<StateID>, &u64)>>();
         set_counts.sort_by_key(|(_, &count)| core::cmp::Reverse(count));
         trace!("## PikeVM frequency of state sets (top {})", LIMIT);
         for (set, count) in set_counts.iter().take(LIMIT) {
             trace!("{:?}: {}", set, count);
         }
         if set_counts.len() > LIMIT {
             trace!(
                 "... {} sets omitted (out of {} total)",
                 set_counts.len() - LIMIT,
                 set_counts.len(),
             );
         }

         trace!("");
         trace!("## PikeVM total frequency of events");
         trace!(
             "steps: {}, closures: {}, stack-pushes: {}, set-inserts: {}",
             self.steps.iter().copied().sum::<u64>(),
             self.closures.iter().copied().sum::<u64>(),
             self.stack_pushes.iter().copied().sum::<u64>(),
             self.set_inserts.iter().copied().sum::<u64>(),
         );

         trace!("");
         trace!("## PikeVM frequency of events broken down by state");
         for sid in 0..self.steps.len() {
             trace!(
                 "{:06}: steps: {}, closures: {}, \
                  stack-pushes: {}, set-inserts: {}",
                 sid,
                 self.steps[sid],
                 self.closures[sid],
                 self.stack_pushes[sid],
                 self.set_inserts[sid],
             );
         }

         trace!("");
         trace!("## NFA debug display");
         trace!("{:?}", nfa);
         trace!("===== END PikeVM Instrumentation Output =====");
     }

     fn record_state_set(&mut self, set: &SparseSet) {
         let set = set.iter().collect::<Vec<StateID>>();
         *self.state_sets.entry(set).or_insert(0) += 1;
     }

     fn record_step(&mut self, sid: StateID) {
         self.steps[sid] += 1;
     }

     fn record_closure(&mut self, sid: StateID) {
         self.closures[sid] += 1;
     }

     fn record_stack_push(&mut self, sid: StateID) {
         self.stack_pushes[sid] += 1;
     }

     fn record_set_insert(&mut self, sid: StateID) {
         self.set_inserts[sid] += 1;
     }
 }