vendor/regex-lite/src/hir/mod.rs - toolchain/rustc - Git at Google

 use alloc::{boxed::Box, string::String, vec, vec::Vec};

 use crate::{error::Error, utf8};

 mod parse;

 /// Escapes all regular expression meta characters in `pattern`.
 ///
 /// The string returned may be safely used as a literal in a regular
 /// expression.
 pub fn escape(pattern: &str) -> String {
     let mut buf = String::new();
     buf.reserve(pattern.len());
     for ch in pattern.chars() {
         if is_meta_character(ch) {
             buf.push('\\');
         }
         buf.push(ch);
     }
     buf
 }

 /// Returns true if the given character has significance in a regex.
 ///
 /// Generally speaking, these are the only characters which _must_ be escaped
 /// in order to match their literal meaning. For example, to match a literal
 /// `|`, one could write `\|`. Sometimes escaping isn't always necessary. For
 /// example, `-` is treated as a meta character because of its significance
 /// for writing ranges inside of character classes, but the regex `-` will
 /// match a literal `-` because `-` has no special meaning outside of character
 /// classes.
 ///
 /// In order to determine whether a character may be escaped at all, the
 /// [`is_escapeable_character`] routine should be used. The difference between
 /// `is_meta_character` and `is_escapeable_character` is that the latter will
 /// return true for some characters that are _not_ meta characters. For
 /// example, `%` and `\%` both match a literal `%` in all contexts. In other
 /// words, `is_escapeable_character` includes "superfluous" escapes.
 ///
 /// Note that the set of characters for which this function returns `true` or
 /// `false` is fixed and won't change in a semver compatible release. (In this
 /// case, "semver compatible release" actually refers to the `regex` crate
 /// itself, since reducing or expanding the set of meta characters would be a
 /// breaking change for not just `regex-syntax` but also `regex` itself.)
 fn is_meta_character(c: char) -> bool {
     match c {
         '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{'
         | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
         _ => false,
     }
 }

 /// Returns true if the given character can be escaped in a regex.
 ///
 /// This returns true in all cases that `is_meta_character` returns true, but
 /// also returns true in some cases where `is_meta_character` returns false.
 /// For example, `%` is not a meta character, but it is escapeable. That is,
 /// `%` and `\%` both match a literal `%` in all contexts.
 ///
 /// The purpose of this routine is to provide knowledge about what characters
 /// may be escaped. Namely, most regex engines permit "superfluous" escapes
 /// where characters without any special significance may be escaped even
 /// though there is no actual _need_ to do so.
 ///
 /// This will return false for some characters. For example, `e` is not
 /// escapeable. Therefore, `\e` will either result in a parse error (which is
 /// true today), or it could backwards compatibly evolve into a new construct
 /// with its own meaning. Indeed, that is the purpose of banning _some_
 /// superfluous escapes: it provides a way to evolve the syntax in a compatible
 /// manner.
 fn is_escapeable_character(c: char) -> bool {
     // Certainly escapeable if it's a meta character.
     if is_meta_character(c) {
         return true;
     }
     // Any character that isn't ASCII is definitely not escapeable. There's
     // no real need to allow things like \☃ right?
     if !c.is_ascii() {
         return false;
     }
     // Otherwise, we basically say that everything is escapeable unless it's a
     // letter or digit. Things like \3 are either octal (when enabled) or an
     // error, and we should keep it that way. Otherwise, letters are reserved
     // for adding new syntax in a backwards compatible way.
     match c {
         '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
         // While not currently supported, we keep these as not escapeable to
         // give us some flexibility with respect to supporting the \< and
         // \> word boundary assertions in the future. By rejecting them as
         // escapeable, \< and \> will result in a parse error. Thus, we can
         // turn them into something else in the future without it being a
         // backwards incompatible change.
         '<' | '>' => false,
         _ => true,
     }
 }

 /// The configuration for a regex parser.
 #[derive(Clone, Copy, Debug)]
 pub(crate) struct Config {
     /// The maximum number of times we're allowed to recurse.
     ///
     /// Note that unlike the regex-syntax parser, we actually use recursion in
     /// this parser for simplicity. My hope is that by setting a conservative
     /// default call limit and providing a way to configure it, that we can
     /// keep this simplification. But if we must, we can re-work the parser to
     /// put the call stack on the heap like regex-syntax does.
     pub(crate) nest_limit: u32,
     /// Various flags that control how a pattern is interpreted.
     pub(crate) flags: Flags,
 }

 impl Default for Config {
     fn default() -> Config {
         Config { nest_limit: 50, flags: Flags::default() }
     }
 }

 /// Various flags that control the interpretation of the pattern.
 ///
 /// These can be set via explicit configuration in code, or change dynamically
 /// during parsing via inline flags. For example, `foo(?i:bar)baz` will match
 /// `foo` and `baz` case sensitiviely and `bar` case insensitively (assuming a
 /// default configuration).
 #[derive(Clone, Copy, Debug, Default)]
 pub(crate) struct Flags {
     /// Whether to match case insensitively.
     ///
     /// This is the `i` flag.
     pub(crate) case_insensitive: bool,
     /// Whether `^` and `$` should be treated as line anchors or not.
     ///
     /// This is the `m` flag.
     pub(crate) multi_line: bool,
     /// Whether `.` should match line terminators or not.
     ///
     /// This is the `s` flag.
     pub(crate) dot_matches_new_line: bool,
     /// Whether to swap the meaning of greedy and non-greedy operators.
     ///
     /// This is the `U` flag.
     pub(crate) swap_greed: bool,
     /// Whether to enable CRLF mode.
     ///
     /// This is the `R` flag.
     pub(crate) crlf: bool,
     /// Whether to ignore whitespace. i.e., verbose mode.
     ///
     /// This is the `x` flag.
     pub(crate) ignore_whitespace: bool,
 }

 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) struct Hir {
     kind: HirKind,
     is_start_anchored: bool,
     is_match_empty: bool,
     static_explicit_captures_len: Option<usize>,
 }

 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) enum HirKind {
     Empty,
     Char(char),
     Class(Class),
     Look(Look),
     Repetition(Repetition),
     Capture(Capture),
     Concat(Vec<Hir>),
     Alternation(Vec<Hir>),
 }

 impl Hir {
     /// Parses the given pattern string with the given configuration into a
     /// structured representation. If the pattern is invalid, then an error
     /// is returned.
     pub(crate) fn parse(config: Config, pattern: &str) -> Result<Hir, Error> {
         self::parse::Parser::new(config, pattern).parse()
     }

     /// Returns the underlying kind of this high-level intermediate
     /// representation.
     ///
     /// Note that there is explicitly no way to build an `Hir` directly from
     /// an `HirKind`. If you need to do that, then you must do case analysis
     /// on the `HirKind` and call the appropriate smart constructor on `Hir`.
     pub(crate) fn kind(&self) -> &HirKind {
         &self.kind
     }

     /// Returns true if and only if this Hir expression can only match at the
     /// beginning of a haystack.
     pub(crate) fn is_start_anchored(&self) -> bool {
         self.is_start_anchored
     }

     /// Returns true if and only if this Hir expression can match the empty
     /// string.
     pub(crate) fn is_match_empty(&self) -> bool {
         self.is_match_empty
     }

     /// If the pattern always reports the same number of matching capture groups
     /// for every match, then this returns the number of those groups. This
     /// doesn't include the implicit group found in every pattern.
     pub(crate) fn static_explicit_captures_len(&self) -> Option<usize> {
         self.static_explicit_captures_len
     }

     fn fail() -> Hir {
         let kind = HirKind::Class(Class { ranges: vec![] });
         Hir {
             kind,
             is_start_anchored: false,
             is_match_empty: false,
             static_explicit_captures_len: Some(0),
         }
     }

     fn empty() -> Hir {
         let kind = HirKind::Empty;
         Hir {
             kind,
             is_start_anchored: false,
             is_match_empty: true,
             static_explicit_captures_len: Some(0),
         }
     }

     fn char(ch: char) -> Hir {
         let kind = HirKind::Char(ch);
         Hir {
             kind,
             is_start_anchored: false,
             is_match_empty: false,
             static_explicit_captures_len: Some(0),
         }
     }

     fn class(class: Class) -> Hir {
         let kind = HirKind::Class(class);
         Hir {
             kind,
             is_start_anchored: false,
             is_match_empty: false,
             static_explicit_captures_len: Some(0),
         }
     }

     fn look(look: Look) -> Hir {
         let kind = HirKind::Look(look);
         Hir {
             kind,
             is_start_anchored: matches!(look, Look::Start),
             is_match_empty: true,
             static_explicit_captures_len: Some(0),
         }
     }

     fn repetition(rep: Repetition) -> Hir {
         if rep.min == 0 && rep.max == Some(0) {
             return Hir::empty();
         } else if rep.min == 1 && rep.max == Some(1) {
             return *rep.sub;
         }
         let is_start_anchored = rep.min > 0 && rep.sub.is_start_anchored;
         let is_match_empty = rep.min == 0 || rep.sub.is_match_empty;
         let mut static_explicit_captures_len =
             rep.sub.static_explicit_captures_len;
         // If the static captures len of the sub-expression is not known or
         // is greater than zero, then it automatically propagates to the
         // repetition, regardless of the repetition. Otherwise, it might
         // change, but only when the repetition can match 0 times.
         if rep.min == 0
             && static_explicit_captures_len.map_or(false, |len| len > 0)
         {
             // If we require a match 0 times, then our captures len is
             // guaranteed to be zero. Otherwise, if we *can* match the empty
             // string, then it's impossible to know how many captures will be
             // in the resulting match.
             if rep.max == Some(0) {
                 static_explicit_captures_len = Some(0);
             } else {
                 static_explicit_captures_len = None;
             }
         }
         Hir {
             kind: HirKind::Repetition(rep),
             is_start_anchored,
             is_match_empty,
             static_explicit_captures_len,
         }
     }

     fn capture(cap: Capture) -> Hir {
         let is_start_anchored = cap.sub.is_start_anchored;
         let is_match_empty = cap.sub.is_match_empty;
         let static_explicit_captures_len = cap
             .sub
             .static_explicit_captures_len
             .map(|len| len.saturating_add(1));
         let kind = HirKind::Capture(cap);
         Hir {
             kind,
             is_start_anchored,
             is_match_empty,
             static_explicit_captures_len,
         }
     }

     fn concat(mut subs: Vec<Hir>) -> Hir {
         if subs.is_empty() {
             Hir::empty()
         } else if subs.len() == 1 {
             subs.pop().unwrap()
         } else {
             let is_start_anchored = subs[0].is_start_anchored;
             let mut is_match_empty = true;
             let mut static_explicit_captures_len = Some(0usize);
             for sub in subs.iter() {
                 is_match_empty = is_match_empty && sub.is_match_empty;
                 static_explicit_captures_len = static_explicit_captures_len
                     .and_then(|len1| {
                         Some((len1, sub.static_explicit_captures_len?))
                     })
                     .and_then(|(len1, len2)| Some(len1.saturating_add(len2)));
             }
             Hir {
                 kind: HirKind::Concat(subs),
                 is_start_anchored,
                 is_match_empty,
                 static_explicit_captures_len,
             }
         }
     }

     fn alternation(mut subs: Vec<Hir>) -> Hir {
         if subs.is_empty() {
             Hir::fail()
         } else if subs.len() == 1 {
             subs.pop().unwrap()
         } else {
             let mut it = subs.iter().peekable();
             let mut is_start_anchored =
                 it.peek().map_or(false, |sub| sub.is_start_anchored);
             let mut is_match_empty =
                 it.peek().map_or(false, |sub| sub.is_match_empty);
             let mut static_explicit_captures_len =
                 it.peek().and_then(|sub| sub.static_explicit_captures_len);
             for sub in it {
                 is_start_anchored = is_start_anchored && sub.is_start_anchored;
                 is_match_empty = is_match_empty || sub.is_match_empty;
                 if static_explicit_captures_len
                     != sub.static_explicit_captures_len
                 {
                     static_explicit_captures_len = None;
                 }
             }
             Hir {
                 kind: HirKind::Alternation(subs),
                 is_start_anchored,
                 is_match_empty,
                 static_explicit_captures_len,
             }
         }
     }
 }

 impl HirKind {
     /// Returns a slice of this kind's sub-expressions, if any.
     fn subs(&self) -> &[Hir] {
         use core::slice::from_ref;

         match *self {
             HirKind::Empty
             | HirKind::Char(_)
             | HirKind::Class(_)
             | HirKind::Look(_) => &[],
             HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub),
             HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub),
             HirKind::Concat(ref subs) => subs,
             HirKind::Alternation(ref subs) => subs,
         }
     }
 }

 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) struct Class {
     pub(crate) ranges: Vec<ClassRange>,
 }

 impl Class {
     /// Create a new class from the given ranges. The ranges may be provided
     /// in any order or may even overlap. They will be automatically
     /// canonicalized.
     fn new<I: IntoIterator<Item = ClassRange>>(ranges: I) -> Class {
         let mut class = Class { ranges: ranges.into_iter().collect() };
         class.canonicalize();
         class
     }

     /// Expand this class such that it matches the ASCII codepoints in this set
     /// case insensitively.
     fn ascii_case_fold(&mut self) {
         let len = self.ranges.len();
         for i in 0..len {
             if let Some(folded) = self.ranges[i].ascii_case_fold() {
                 self.ranges.push(folded);
             }
         }
         self.canonicalize();
     }

     /// Negate this set.
     ///
     /// For all `x` where `x` is any element, if `x` was in this set, then it
     /// will not be in this set after negation.
     fn negate(&mut self) {
         const MIN: char = '\x00';
         const MAX: char = char::MAX;

         if self.ranges.is_empty() {
             self.ranges.push(ClassRange { start: MIN, end: MAX });
             return;
         }

         // There should be a way to do this in-place with constant memory,
         // but I couldn't figure out a simple way to do it. So just append
         // the negation to the end of this range, and then drain it before
         // we're done.
         let drain_end = self.ranges.len();

         // If our class doesn't start the minimum possible char, then negation
         // needs to include all codepoints up to the minimum in this set.
         if self.ranges[0].start > MIN {
             self.ranges.push(ClassRange {
                 start: MIN,
                 // OK because we know it's bigger than MIN.
                 end: prev_char(self.ranges[0].start).unwrap(),
             });
         }
         for i in 1..drain_end {
             // let lower = self.ranges[i - 1].upper().increment();
             // let upper = self.ranges[i].lower().decrement();
             // self.ranges.push(I::create(lower, upper));
             self.ranges.push(ClassRange {
                 // OK because we know i-1 is never the last range and therefore
                 // there must be a range greater than it. It therefore follows
                 // that 'end' can never be char::MAX, and thus there must be
                 // a next char.
                 start: next_char(self.ranges[i - 1].end).unwrap(),
                 // Since 'i' is guaranteed to never be the first range, it
                 // follows that there is always a range before this and thus
                 // 'start' can never be '\x00'. Thus, there must be a previous
                 // char.
                 end: prev_char(self.ranges[i].start).unwrap(),
             });
         }
         if self.ranges[drain_end - 1].end < MAX {
             // let lower = self.ranges[drain_end - 1].upper().increment();
             // self.ranges.push(I::create(lower, I::Bound::max_value()));
             self.ranges.push(ClassRange {
                 // OK because we know 'end' is less than char::MAX, and thus
                 // there is a next char.
                 start: next_char(self.ranges[drain_end - 1].end).unwrap(),
                 end: MAX,
             });
         }
         self.ranges.drain(..drain_end);
         // We don't need to canonicalize because we processed the ranges above
         // in canonical order and the new ranges we added based on those are
         // also necessarily in canonical order.
     }

     /// Converts this set into a canonical ordering.
     fn canonicalize(&mut self) {
         if self.is_canonical() {
             return;
         }
         self.ranges.sort();
         assert!(!self.ranges.is_empty());

         // Is there a way to do this in-place with constant memory? I couldn't
         // figure out a way to do it. So just append the canonicalization to
         // the end of this range, and then drain it before we're done.
         let drain_end = self.ranges.len();
         for oldi in 0..drain_end {
             // If we've added at least one new range, then check if we can
             // merge this range in the previously added range.
             if self.ranges.len() > drain_end {
                 let (last, rest) = self.ranges.split_last_mut().unwrap();
                 if let Some(union) = last.union(&rest[oldi]) {
                     *last = union;
                     continue;
                 }
             }
             self.ranges.push(self.ranges[oldi]);
         }
         self.ranges.drain(..drain_end);
     }

     /// Returns true if and only if this class is in a canonical ordering.
     fn is_canonical(&self) -> bool {
         for pair in self.ranges.windows(2) {
             if pair[0] >= pair[1] {
                 return false;
             }
             if pair[0].is_contiguous(&pair[1]) {
                 return false;
             }
         }
         true
     }
 }

 #[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
 pub(crate) struct ClassRange {
     pub(crate) start: char,
     pub(crate) end: char,
 }

 impl ClassRange {
     /// Apply simple case folding to this byte range. Only ASCII case mappings
     /// (for A-Za-z) are applied.
     ///
     /// Additional ranges are appended to the given vector. Canonical ordering
     /// is *not* maintained in the given vector.
     fn ascii_case_fold(&self) -> Option<ClassRange> {
         if !(ClassRange { start: 'a', end: 'z' }).is_intersection_empty(self) {
             let start = core::cmp::max(self.start, 'a');
             let end = core::cmp::min(self.end, 'z');
             return Some(ClassRange {
                 start: char::try_from(u32::from(start) - 32).unwrap(),
                 end: char::try_from(u32::from(end) - 32).unwrap(),
             });
         }
         if !(ClassRange { start: 'A', end: 'Z' }).is_intersection_empty(self) {
             let start = core::cmp::max(self.start, 'A');
             let end = core::cmp::min(self.end, 'Z');
             return Some(ClassRange {
                 start: char::try_from(u32::from(start) + 32).unwrap(),
                 end: char::try_from(u32::from(end) + 32).unwrap(),
             });
         }
         None
     }

     /// Union the given overlapping range into this range.
     ///
     /// If the two ranges aren't contiguous, then this returns `None`.
     fn union(&self, other: &ClassRange) -> Option<ClassRange> {
         if !self.is_contiguous(other) {
             return None;
         }
         let start = core::cmp::min(self.start, other.start);
         let end = core::cmp::max(self.end, other.end);
         Some(ClassRange { start, end })
     }

     /// Returns true if and only if the two ranges are contiguous. Two ranges
     /// are contiguous if and only if the ranges are either overlapping or
     /// adjacent.
     fn is_contiguous(&self, other: &ClassRange) -> bool {
         let (s1, e1) = (u32::from(self.start), u32::from(self.end));
         let (s2, e2) = (u32::from(other.start), u32::from(other.end));
         core::cmp::max(s1, s2) <= core::cmp::min(e1, e2).saturating_add(1)
     }

     /// Returns true if and only if the intersection of this range and the
     /// other range is empty.
     fn is_intersection_empty(&self, other: &ClassRange) -> bool {
         let (s1, e1) = (self.start, self.end);
         let (s2, e2) = (other.start, other.end);
         core::cmp::max(s1, s2) > core::cmp::min(e1, e2)
     }
 }

 /// The high-level intermediate representation for a look-around assertion.
 ///
 /// An assertion match is always zero-length. Also called an "empty match."
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub(crate) enum Look {
     /// Match the beginning of text. Specifically, this matches at the starting
     /// position of the input.
     Start = 1 << 0,
     /// Match the end of text. Specifically, this matches at the ending
     /// position of the input.
     End = 1 << 1,
     /// Match the beginning of a line or the beginning of text. Specifically,
     /// this matches at the starting position of the input, or at the position
     /// immediately following a `\n` character.
     StartLF = 1 << 2,
     /// Match the end of a line or the end of text. Specifically, this matches
     /// at the end position of the input, or at the position immediately
     /// preceding a `\n` character.
     EndLF = 1 << 3,
     /// Match the beginning of a line or the beginning of text. Specifically,
     /// this matches at the starting position of the input, or at the position
     /// immediately following either a `\r` or `\n` character, but never after
     /// a `\r` when a `\n` follows.
     StartCRLF = 1 << 4,
     /// Match the end of a line or the end of text. Specifically, this matches
     /// at the end position of the input, or at the position immediately
     /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r`
     /// precedes it.
     EndCRLF = 1 << 5,
     /// Match an ASCII-only word boundary. That is, this matches a position
     /// where the left adjacent character and right adjacent character
     /// correspond to a word and non-word or a non-word and word character.
     Word = 1 << 6,
     /// Match an ASCII-only negation of a word boundary.
     WordNegate = 1 << 7,
     /// Match the start of an ASCII-only word boundary. That is, this matches a
     /// position at either the beginning of the haystack or where the previous
     /// character is not a word character and the following character is a word
     /// character.
     WordStart = 1 << 8,
     /// Match the end of an ASCII-only word boundary. That is, this matches
     /// a position at either the end of the haystack or where the previous
     /// character is a word character and the following character is not a word
     /// character.
     WordEnd = 1 << 9,
     /// Match the start half of an ASCII-only word boundary. That is, this
     /// matches a position at either the beginning of the haystack or where the
     /// previous character is not a word character.
     WordStartHalf = 1 << 10,
     /// Match the end half of an ASCII-only word boundary. That is, this
     /// matches a position at either the end of the haystack or where the
     /// following character is not a word character.
     WordEndHalf = 1 << 11,
 }

 impl Look {
     /// Returns true if the given position in the given haystack matches this
     /// look-around assertion.
     pub(crate) fn is_match(&self, haystack: &[u8], at: usize) -> bool {
         use self::Look::*;

         match *self {
             Start => at == 0,
             End => at == haystack.len(),
             StartLF => at == 0 || haystack[at - 1] == b'\n',
             EndLF => at == haystack.len() || haystack[at] == b'\n',
             StartCRLF => {
                 at == 0
                     || haystack[at - 1] == b'\n'
                     || (haystack[at - 1] == b'\r'
                         && (at >= haystack.len() || haystack[at] != b'\n'))
             }
             EndCRLF => {
                 at == haystack.len()
                     || haystack[at] == b'\r'
                     || (haystack[at] == b'\n'
                         && (at == 0 || haystack[at - 1] != b'\r'))
             }
             Word => {
                 let word_before =
                     at > 0 && utf8::is_word_byte(haystack[at - 1]);
                 let word_after =
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 word_before != word_after
             }
             WordNegate => {
                 let word_before =
                     at > 0 && utf8::is_word_byte(haystack[at - 1]);
                 let word_after =
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 word_before == word_after
             }
             WordStart => {
                 let word_before =
                     at > 0 && utf8::is_word_byte(haystack[at - 1]);
                 let word_after =
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 !word_before && word_after
             }
             WordEnd => {
                 let word_before =
                     at > 0 && utf8::is_word_byte(haystack[at - 1]);
                 let word_after =
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 word_before && !word_after
             }
             WordStartHalf => {
                 let word_before =
                     at > 0 && utf8::is_word_byte(haystack[at - 1]);
                 !word_before
             }
             WordEndHalf => {
                 let word_after =
                     at < haystack.len() && utf8::is_word_byte(haystack[at]);
                 !word_after
             }
         }
     }
 }

 /// The high-level intermediate representation of a repetition operator.
 ///
 /// A repetition operator permits the repetition of an arbitrary
 /// sub-expression.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) struct Repetition {
     /// The minimum range of the repetition.
     ///
     /// Note that special cases like `?`, `+` and `*` all get translated into
     /// the ranges `{0,1}`, `{1,}` and `{0,}`, respectively.
     ///
     /// When `min` is zero, this expression can match the empty string
     /// regardless of what its sub-expression is.
     pub(crate) min: u32,
     /// The maximum range of the repetition.
     ///
     /// Note that when `max` is `None`, `min` acts as a lower bound but where
     /// there is no upper bound. For something like `x{5}` where the min and
     /// max are equivalent, `min` will be set to `5` and `max` will be set to
     /// `Some(5)`.
     pub(crate) max: Option<u32>,
     /// Whether this repetition operator is greedy or not. A greedy operator
     /// will match as much as it can. A non-greedy operator will match as
     /// little as it can.
     ///
     /// Typically, operators are greedy by default and are only non-greedy when
     /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
     /// not. However, this can be inverted via the `U` "ungreedy" flag.
     pub(crate) greedy: bool,
     /// The expression being repeated.
     pub(crate) sub: Box<Hir>,
 }

 /// The high-level intermediate representation for a capturing group.
 ///
 /// A capturing group always has an index and a child expression. It may
 /// also have a name associated with it (e.g., `(?P<foo>\w)`), but it's not
 /// necessary.
 ///
 /// Note that there is no explicit representation of a non-capturing group
 /// in a `Hir`. Instead, non-capturing grouping is handled automatically by
 /// the recursive structure of the `Hir` itself.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub(crate) struct Capture {
     /// The capture index of the capture.
     pub(crate) index: u32,
     /// The name of the capture, if it exists.
     pub(crate) name: Option<Box<str>>,
     /// The expression inside the capturing group, which may be empty.
     pub(crate) sub: Box<Hir>,
 }

 fn next_char(ch: char) -> Option<char> {
     // Skip over the surrogate range.
     if ch == '\u{D7FF}' {
         return Some('\u{E000}');
     }
     // OK because char::MAX < u32::MAX and we handle U+D7FF above.
     char::from_u32(u32::from(ch).checked_add(1).unwrap())
 }

 fn prev_char(ch: char) -> Option<char> {
     // Skip over the surrogate range.
     if ch == '\u{E000}' {
         return Some('\u{D7FF}');
     }
     // OK because subtracting 1 from any valid scalar value other than 0
     // and U+E000 yields a valid scalar value.
     Some(char::from_u32(u32::from(ch).checked_sub(1)?).unwrap())
 }

 impl Drop for Hir {
     fn drop(&mut self) {
         use core::mem;

         match *self.kind() {
             HirKind::Empty
             | HirKind::Char(_)
             | HirKind::Class(_)
             | HirKind::Look(_) => return,
             HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return,
             HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => {
                 return
             }
             HirKind::Concat(ref x) if x.is_empty() => return,
             HirKind::Alternation(ref x) if x.is_empty() => return,
             _ => {}
         }

         let mut stack = vec![mem::replace(self, Hir::empty())];
         while let Some(mut expr) = stack.pop() {
             match expr.kind {
                 HirKind::Empty
                 | HirKind::Char(_)
                 | HirKind::Class(_)
                 | HirKind::Look(_) => {}
                 HirKind::Capture(ref mut x) => {
                     stack.push(mem::replace(&mut x.sub, Hir::empty()));
                 }
                 HirKind::Repetition(ref mut x) => {
                     stack.push(mem::replace(&mut x.sub, Hir::empty()));
                 }
                 HirKind::Concat(ref mut x) => {
                     stack.extend(x.drain(..));
                 }
                 HirKind::Alternation(ref mut x) => {
                     stack.extend(x.drain(..));
                 }
             }
         }
     }
 }