vendor/regex-automata-0.2.0/src/hybrid/search.rs - toolchain/rustc - Git at Google

 use crate::{
     hybrid::{
         dfa::{Cache, DFA},
         id::{LazyStateID, OverlappingState, StateMatch},
     },
     nfa::thompson,
     util::{
         id::PatternID,
         matchtypes::{HalfMatch, MatchError},
         prefilter, MATCH_OFFSET,
     },
 };

 #[inline(never)]
 pub(crate) fn find_earliest_fwd(
     pre: Option<&mut prefilter::Scanner>,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     // Searching with a pattern ID is always anchored, so we should never use
     // a prefilter.
     if pre.is_some() && pattern_id.is_none() {
         find_fwd(pre, true, dfa, cache, pattern_id, bytes, start, end)
     } else {
         find_fwd(None, true, dfa, cache, pattern_id, bytes, start, end)
     }
 }

 #[inline(never)]
 pub(crate) fn find_leftmost_fwd(
     pre: Option<&mut prefilter::Scanner>,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     // Searching with a pattern ID is always anchored, so we should never use
     // a prefilter.
     if pre.is_some() && pattern_id.is_none() {
         find_fwd(pre, false, dfa, cache, pattern_id, bytes, start, end)
     } else {
         find_fwd(None, false, dfa, cache, pattern_id, bytes, start, end)
     }
 }

 #[inline(always)]
 fn find_fwd(
     mut pre: Option<&mut prefilter::Scanner>,
     earliest: bool,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     haystack: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     assert!(start <= end);
     assert!(start <= haystack.len());
     assert!(end <= haystack.len());

     // Why do this? This lets 'bytes[at]' work without bounds checks below.
     // It seems the assert on 'end <= haystack.len()' above is otherwise
     // not enough. Why not just make 'bytes' scoped this way anyway? Well,
     // 'eoi_fwd' (below) might actually want to try to access the byte at 'end'
     // for resolving look-ahead.
     let bytes = &haystack[..end];

     let mut sid = init_fwd(dfa, cache, pattern_id, haystack, start, end)?;
     let mut last_match = None;
     let mut at = start;
     if let Some(ref mut pre) = pre {
         // If a prefilter doesn't report false positives, then we don't need to
         // touch the DFA at all. However, since all matches include the pattern
         // ID, and the prefilter infrastructure doesn't report pattern IDs, we
         // limit this optimization to cases where there is exactly one pattern.
         // In that case, any match must be the 0th pattern.
         if dfa.pattern_count() == 1 && !pre.reports_false_positives() {
             return Ok(pre.next_candidate(bytes, at).into_option().map(
                 |offset| HalfMatch { pattern: PatternID::ZERO, offset },
             ));
         } else if pre.is_effective(at) {
             match pre.next_candidate(bytes, at).into_option() {
                 None => return Ok(None),
                 Some(i) => {
                     at = i;
                 }
             }
         }
     }
     while at < end {
         if sid.is_tagged() {
             sid = dfa
                 .next_state(cache, sid, bytes[at])
                 .map_err(|_| gave_up(at))?;
             at += 1;
         } else {
             // SAFETY: There are two safety invariants we need to uphold
             // here in the loop below: that 'sid' is a valid state ID for
             // this DFA, and that 'at' is a valid index into 'bytes'. For
             // the former, we rely on the invariant that next_state* and
             // start_state_forward always returns a valid state ID (given a
             // valid state ID in the former case), and that we are only at this
             // place in the code if 'sid' is untagged. Moreover, every call to
             // next_state_untagged_unchecked below is guarded by a check that
             // sid is untagged. For the latter safety invariant, we always
             // guard unchecked access with a check that 'at' is less than
             // 'end', where 'end == bytes.len()'.
             //
             // For justification, this gives us a ~10% bump in search time.
             // This was used for a benchmark:
             //
             //     regex-cli find hybrid regex @/some/big/file '(?m)^.+$' -UBb
             //
             // With bounds checked: ~881.4ms. Without: ~775ms. For input, I
             // used OpenSubtitles2018.raw.sample.medium.en.
             let mut prev_sid = sid;
             while at < end {
                 prev_sid = sid;
                 sid = unsafe {
                     dfa.next_state_untagged_unchecked(
                         cache,
                         sid,
                         *bytes.get_unchecked(at),
                     )
                 };
                 at += 1;
                 if sid.is_tagged() {
                     break;
                 }
                 // SAFETY: we make four unguarded accesses to 'bytes[at]'
                 // below, and each are safe because we know that 'at + 4' is
                 // in bounds. Moreover, while we don't check whether 'sid' is
                 // untagged directly, we know it is because of the check above.
                 // And the unrolled loop below quits when the next state is not
                 // equal to the previous state.
                 //
                 // PERF: For justification for eliminating bounds checks,
                 // see above. For justification for the unrolling, we use
                 // two tests. The one above with regex '(?m)^.+$', and also
                 // '(?m)^.{40}$'. The former is kinda the best case for
                 // unrolling, and gives a 1.67 boost primarily because the DFA
                 // spends most of its time munching through the input in the
                 // same state. But the latter pattern rarely spends time in the
                 // same state through subsequent transitions, so unrolling is
                 // pretty much always ineffective in that it craps out on the
                 // first 'sid != next' check below. However, without unrolling,
                 // search is only 1.03 times faster than with unrolling on the
                 // latter pattern, which we deem to be an acceptable loss in
                 // favor of optimizing the more common case of having a "hot"
                 // state somewhere in the DFA.
                 while at + 4 < end {
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at += 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at += 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at += 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at += 1;
                 }
             }
             if sid.is_unknown() {
                 sid = dfa
                     .next_state(cache, prev_sid, bytes[at - 1])
                     .map_err(|_| gave_up(at - 1))?;
             }
         }
         if sid.is_tagged() {
             if sid.is_start() {
                 if let Some(ref mut pre) = pre {
                     if pre.is_effective(at) {
                         match pre.next_candidate(bytes, at).into_option() {
                             None => return Ok(None),
                             Some(i) => {
                                 at = i;
                             }
                         }
                     }
                 }
             } else if sid.is_match() {
                 last_match = Some(HalfMatch {
                     pattern: dfa.match_pattern(cache, sid, 0),
                     offset: at - MATCH_OFFSET,
                 });
                 if earliest {
                     return Ok(last_match);
                 }
             } else if sid.is_dead() {
                 return Ok(last_match);
             } else if sid.is_quit() {
                 if last_match.is_some() {
                     return Ok(last_match);
                 }
                 let offset = at - 1;
                 return Err(MatchError::Quit { byte: bytes[offset], offset });
             } else {
                 debug_assert!(sid.is_unknown());
                 unreachable!("sid being unknown is a bug");
             }
         }
     }
     // We are careful to use 'haystack' here, which contains the full context
     // that we might want to inspect.
     Ok(eoi_fwd(dfa, cache, haystack, end, &mut sid)?.or(last_match))
 }

 #[inline(never)]
 pub(crate) fn find_earliest_rev(
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     find_rev(true, dfa, cache, pattern_id, bytes, start, end)
 }

 #[inline(never)]
 pub(crate) fn find_leftmost_rev(
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     find_rev(false, dfa, cache, pattern_id, bytes, start, end)
 }

 #[inline(always)]
 fn find_rev(
     earliest: bool,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     haystack: &[u8],
     start: usize,
     end: usize,
 ) -> Result<Option<HalfMatch>, MatchError> {
     assert!(start <= end);
     assert!(start <= haystack.len());
     assert!(end <= haystack.len());

     // Why do this? This lets 'bytes[at]' work without bounds checks below.
     // It seems the assert on 'end <= haystack.len()' above is otherwise
     // not enough. Why not just make 'bytes' scoped this way anyway? Well,
     // 'eoi_fwd' (below) might actually want to try to access the byte at 'end'
     // for resolving look-ahead.
     let bytes = &haystack[start..];

     let mut sid = init_rev(dfa, cache, pattern_id, haystack, start, end)?;
     let mut last_match = None;
     let mut at = end - start;
     while at > 0 {
         if sid.is_tagged() {
             at -= 1;
             sid = dfa
                 .next_state(cache, sid, bytes[at])
                 .map_err(|_| gave_up(at))?;
         } else {
             // SAFETY: See comments in 'find_fwd' for both a safety argument
             // and a justification from a performance perspective as to 1) why
             // we elide bounds checks and 2) why we do a specialized version of
             // unrolling below.
             let mut prev_sid = sid;
             while at > 0 && !sid.is_tagged() {
                 prev_sid = sid;
                 at -= 1;
                 while at > 3 {
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at -= 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at -= 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at -= 1;
                     let next = unsafe {
                         dfa.next_state_untagged_unchecked(
                             cache,
                             sid,
                             *bytes.get_unchecked(at),
                         )
                     };
                     if sid != next {
                         break;
                     }
                     at -= 1;
                 }
                 sid = unsafe {
                     dfa.next_state_untagged_unchecked(
                         cache,
                         sid,
                         *bytes.get_unchecked(at),
                     )
                 };
             }
             if sid.is_unknown() {
                 sid = dfa
                     .next_state(cache, prev_sid, bytes[at])
                     .map_err(|_| gave_up(at))?;
             }
         }
         if sid.is_tagged() {
             if sid.is_start() {
                 continue;
             } else if sid.is_match() {
                 last_match = Some(HalfMatch {
                     pattern: dfa.match_pattern(cache, sid, 0),
                     offset: start + at + MATCH_OFFSET,
                 });
                 if earliest {
                     return Ok(last_match);
                 }
             } else if sid.is_dead() {
                 return Ok(last_match);
             } else {
                 debug_assert!(sid.is_quit());
                 if last_match.is_some() {
                     return Ok(last_match);
                 }
                 return Err(MatchError::Quit { byte: bytes[at], offset: at });
             }
         }
     }
     Ok(eoi_rev(dfa, cache, haystack, start, sid)?.or(last_match))
 }

 #[inline(never)]
 pub(crate) fn find_overlapping_fwd(
     pre: Option<&mut prefilter::Scanner>,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
     caller_state: &mut OverlappingState,
 ) -> Result<Option<HalfMatch>, MatchError> {
     // Searching with a pattern ID is always anchored, so we should only ever
     // use a prefilter when no pattern ID is given.
     if pre.is_some() && pattern_id.is_none() {
         find_overlapping_fwd_imp(
             pre,
             dfa,
             cache,
             pattern_id,
             bytes,
             start,
             end,
             caller_state,
         )
     } else {
         find_overlapping_fwd_imp(
             None,
             dfa,
             cache,
             pattern_id,
             bytes,
             start,
             end,
             caller_state,
         )
     }
 }

 #[inline(always)]
 fn find_overlapping_fwd_imp(
     mut pre: Option<&mut prefilter::Scanner>,
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     mut start: usize,
     end: usize,
     caller_state: &mut OverlappingState,
 ) -> Result<Option<HalfMatch>, MatchError> {
     assert!(start <= end);
     assert!(start <= bytes.len());
     assert!(end <= bytes.len());

     let mut sid = match caller_state.id() {
         None => init_fwd(dfa, cache, pattern_id, bytes, start, end)?,
         Some(sid) => {
             if let Some(last) = caller_state.last_match() {
                 let match_count = dfa.match_count(cache, sid);
                 if last.match_index < match_count {
                     let m = HalfMatch {
                         pattern: dfa.match_pattern(
                             cache,
                             sid,
                             last.match_index,
                         ),
                         offset: last.offset,
                     };
                     last.match_index += 1;
                     return Ok(Some(m));
                 }
             }

             // This is a subtle but critical detail. If the caller provides a
             // non-None state ID, then it must be the case that the state ID
             // corresponds to one set by this function. The state ID therefore
             // corresponds to a match state, a dead state or some other state.
             // However, "some other" state _only_ occurs when the input has
             // been exhausted because the only way to stop before then is to
             // see a match or a dead/quit state.
             //
             // If the input is exhausted or if it's a dead state, then
             // incrementing the starting position has no relevance on
             // correctness, since the loop below will either not execute
             // at all or will immediately stop due to being in a dead state.
             // (Once in a dead state it is impossible to leave it.)
             //
             // Therefore, the only case we need to consider is when
             // caller_state is a match state. In this case, since our machines
             // support the ability to delay a match by a certain number of
             // bytes (to support look-around), it follows that we actually
             // consumed that many additional bytes on our previous search. When
             // the caller resumes their search to find subsequent matches, they
             // will use the ending location from the previous match as the next
             // starting point, which is `match_offset` bytes PRIOR to where
             // we scanned to on the previous search. Therefore, we need to
             // compensate by bumping `start` up by `MATCH_OFFSET` bytes.
             //
             // Incidentally, since MATCH_OFFSET is non-zero, this also makes
             // dealing with empty matches convenient. Namely, callers needn't
             // special case them when implementing an iterator. Instead, this
             // ensures that forward progress is always made.
             start += MATCH_OFFSET;
             sid
         }
     };

     let mut at = start;
     while at < end {
         let byte = bytes[at];
         sid = dfa.next_state(cache, sid, byte).map_err(|_| gave_up(at))?;
         at += 1;
         if sid.is_tagged() {
             caller_state.set_id(sid);
             if sid.is_start() {
                 if let Some(ref mut pre) = pre {
                     if pre.is_effective(at) {
                         match pre.next_candidate(bytes, at).into_option() {
                             None => return Ok(None),
                             Some(i) => {
                                 at = i;
                             }
                         }
                     }
                 }
             } else if sid.is_match() {
                 let offset = at - MATCH_OFFSET;
                 caller_state
                     .set_last_match(StateMatch { match_index: 1, offset });
                 return Ok(Some(HalfMatch {
                     pattern: dfa.match_pattern(cache, sid, 0),
                     offset,
                 }));
             } else if sid.is_dead() {
                 return Ok(None);
             } else {
                 debug_assert!(sid.is_quit());
                 return Err(MatchError::Quit { byte, offset: at - 1 });
             }
         }
     }

     let result = eoi_fwd(dfa, cache, bytes, end, &mut sid);
     caller_state.set_id(sid);
     if let Ok(Some(ref last_match)) = result {
         caller_state.set_last_match(StateMatch {
             // '1' is always correct here since if we get to this point, this
             // always corresponds to the first (index '0') match discovered at
             // this position. So the next match to report at this position (if
             // it exists) is at index '1'.
             match_index: 1,
             offset: last_match.offset(),
         });
     }
     result
 }

 #[inline(always)]
 fn init_fwd(
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<LazyStateID, MatchError> {
     let sid = dfa
         .start_state_forward(cache, pattern_id, bytes, start, end)
         .map_err(|_| gave_up(start))?;
     // Start states can never be match states, since all matches are delayed
     // by 1 byte.
     assert!(!sid.is_match());
     Ok(sid)
 }

 #[inline(always)]
 fn init_rev(
     dfa: &DFA,
     cache: &mut Cache,
     pattern_id: Option<PatternID>,
     bytes: &[u8],
     start: usize,
     end: usize,
 ) -> Result<LazyStateID, MatchError> {
     let sid = dfa
         .start_state_reverse(cache, pattern_id, bytes, start, end)
         .map_err(|_| gave_up(end))?;
     // Start states can never be match states, since all matches are delayed
     // by 1 byte.
     assert!(!sid.is_match());
     Ok(sid)
 }

 #[inline(always)]
 fn eoi_fwd(
     dfa: &DFA,
     cache: &mut Cache,
     bytes: &[u8],
     end: usize,
     sid: &mut LazyStateID,
 ) -> Result<Option<HalfMatch>, MatchError> {
     match bytes.get(end) {
         Some(&b) => {
             *sid = dfa.next_state(cache, *sid, b).map_err(|_| gave_up(end))?;
             if sid.is_match() {
                 Ok(Some(HalfMatch {
                     pattern: dfa.match_pattern(cache, *sid, 0),
                     offset: end,
                 }))
             } else {
                 Ok(None)
             }
         }
         None => {
             *sid = dfa
                 .next_eoi_state(cache, *sid)
                 .map_err(|_| gave_up(bytes.len()))?;
             if sid.is_match() {
                 Ok(Some(HalfMatch {
                     pattern: dfa.match_pattern(cache, *sid, 0),
                     offset: bytes.len(),
                 }))
             } else {
                 Ok(None)
             }
         }
     }
 }

 #[inline(always)]
 fn eoi_rev(
     dfa: &DFA,
     cache: &mut Cache,
     bytes: &[u8],
     start: usize,
     state: LazyStateID,
 ) -> Result<Option<HalfMatch>, MatchError> {
     if start > 0 {
         let sid = dfa
             .next_state(cache, state, bytes[start - 1])
             .map_err(|_| gave_up(start))?;
         if sid.is_match() {
             Ok(Some(HalfMatch {
                 pattern: dfa.match_pattern(cache, sid, 0),
                 offset: start,
             }))
         } else {
             Ok(None)
         }
     } else {
         let sid =
             dfa.next_eoi_state(cache, state).map_err(|_| gave_up(start))?;
         if sid.is_match() {
             Ok(Some(HalfMatch {
                 pattern: dfa.match_pattern(cache, sid, 0),
                 offset: 0,
             }))
         } else {
             Ok(None)
         }
     }
 }

 /// A convenience routine for constructing a "gave up" match error.
 #[inline(always)]
 fn gave_up(offset: usize) -> MatchError {
     MatchError::GaveUp { offset }
 }
	use crate::{
	hybrid::{
	dfa::{Cache, DFA},
	id::{LazyStateID, OverlappingState, StateMatch},
	},
	nfa::thompson,
	util::{
	id::PatternID,
	matchtypes::{HalfMatch, MatchError},
	prefilter, MATCH_OFFSET,
	},
	};

	#[inline(never)]
	pub(crate) fn find_earliest_fwd(
	pre: Option<&mut prefilter::Scanner>,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	// Searching with a pattern ID is always anchored, so we should never use
	// a prefilter.
	if pre.is_some() && pattern_id.is_none() {
	find_fwd(pre, true, dfa, cache, pattern_id, bytes, start, end)
	} else {
	find_fwd(None, true, dfa, cache, pattern_id, bytes, start, end)
	}
	}

	#[inline(never)]
	pub(crate) fn find_leftmost_fwd(
	pre: Option<&mut prefilter::Scanner>,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	// Searching with a pattern ID is always anchored, so we should never use
	// a prefilter.
	if pre.is_some() && pattern_id.is_none() {
	find_fwd(pre, false, dfa, cache, pattern_id, bytes, start, end)
	} else {
	find_fwd(None, false, dfa, cache, pattern_id, bytes, start, end)
	}
	}

	#[inline(always)]
	fn find_fwd(
	mut pre: Option<&mut prefilter::Scanner>,
	earliest: bool,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	haystack: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	assert!(start <= end);
	assert!(start <= haystack.len());
	assert!(end <= haystack.len());

	// Why do this? This lets 'bytes[at]' work without bounds checks below.
	// It seems the assert on 'end <= haystack.len()' above is otherwise
	// not enough. Why not just make 'bytes' scoped this way anyway? Well,
	// 'eoi_fwd' (below) might actually want to try to access the byte at 'end'
	// for resolving look-ahead.
	let bytes = &haystack[..end];

	let mut sid = init_fwd(dfa, cache, pattern_id, haystack, start, end)?;
	let mut last_match = None;
	let mut at = start;
	if let Some(ref mut pre) = pre {
	// If a prefilter doesn't report false positives, then we don't need to
	// touch the DFA at all. However, since all matches include the pattern
	// ID, and the prefilter infrastructure doesn't report pattern IDs, we
	// limit this optimization to cases where there is exactly one pattern.
	// In that case, any match must be the 0th pattern.
	if dfa.pattern_count() == 1 && !pre.reports_false_positives() {
	return Ok(pre.next_candidate(bytes, at).into_option().map(
	\|offset\| HalfMatch { pattern: PatternID::ZERO, offset },
	));
	} else if pre.is_effective(at) {
	match pre.next_candidate(bytes, at).into_option() {
	None => return Ok(None),
	Some(i) => {
	at = i;
	}
	}
	}
	}
	while at < end {
	if sid.is_tagged() {
	sid = dfa
	.next_state(cache, sid, bytes[at])
	.map_err(\|_\| gave_up(at))?;
	at += 1;
	} else {
	// SAFETY: There are two safety invariants we need to uphold
	// here in the loop below: that 'sid' is a valid state ID for
	// this DFA, and that 'at' is a valid index into 'bytes'. For
	// the former, we rely on the invariant that next_state* and
	// start_state_forward always returns a valid state ID (given a
	// valid state ID in the former case), and that we are only at this
	// place in the code if 'sid' is untagged. Moreover, every call to
	// next_state_untagged_unchecked below is guarded by a check that
	// sid is untagged. For the latter safety invariant, we always
	// guard unchecked access with a check that 'at' is less than
	// 'end', where 'end == bytes.len()'.
	//
	// For justification, this gives us a ~10% bump in search time.
	// This was used for a benchmark:
	//
	// regex-cli find hybrid regex @/some/big/file '(?m)^.+$' -UBb
	//
	// With bounds checked: ~881.4ms. Without: ~775ms. For input, I
	// used OpenSubtitles2018.raw.sample.medium.en.
	let mut prev_sid = sid;
	while at < end {
	prev_sid = sid;
	sid = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	at += 1;
	if sid.is_tagged() {
	break;
	}
	// SAFETY: we make four unguarded accesses to 'bytes[at]'
	// below, and each are safe because we know that 'at + 4' is
	// in bounds. Moreover, while we don't check whether 'sid' is
	// untagged directly, we know it is because of the check above.
	// And the unrolled loop below quits when the next state is not
	// equal to the previous state.
	//
	// PERF: For justification for eliminating bounds checks,
	// see above. For justification for the unrolling, we use
	// two tests. The one above with regex '(?m)^.+$', and also
	// '(?m)^.{40}$'. The former is kinda the best case for
	// unrolling, and gives a 1.67 boost primarily because the DFA
	// spends most of its time munching through the input in the
	// same state. But the latter pattern rarely spends time in the
	// same state through subsequent transitions, so unrolling is
	// pretty much always ineffective in that it craps out on the
	// first 'sid != next' check below. However, without unrolling,
	// search is only 1.03 times faster than with unrolling on the
	// latter pattern, which we deem to be an acceptable loss in
	// favor of optimizing the more common case of having a "hot"
	// state somewhere in the DFA.
	while at + 4 < end {
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at += 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at += 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at += 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at += 1;
	}
	}
	if sid.is_unknown() {
	sid = dfa
	.next_state(cache, prev_sid, bytes[at - 1])
	.map_err(\|_\| gave_up(at - 1))?;
	}
	}
	if sid.is_tagged() {
	if sid.is_start() {
	if let Some(ref mut pre) = pre {
	if pre.is_effective(at) {
	match pre.next_candidate(bytes, at).into_option() {
	None => return Ok(None),
	Some(i) => {
	at = i;
	}
	}
	}
	}
	} else if sid.is_match() {
	last_match = Some(HalfMatch {
	pattern: dfa.match_pattern(cache, sid, 0),
	offset: at - MATCH_OFFSET,
	});
	if earliest {
	return Ok(last_match);
	}
	} else if sid.is_dead() {
	return Ok(last_match);
	} else if sid.is_quit() {
	if last_match.is_some() {
	return Ok(last_match);
	}
	let offset = at - 1;
	return Err(MatchError::Quit { byte: bytes[offset], offset });
	} else {
	debug_assert!(sid.is_unknown());
	unreachable!("sid being unknown is a bug");
	}
	}
	}
	// We are careful to use 'haystack' here, which contains the full context
	// that we might want to inspect.
	Ok(eoi_fwd(dfa, cache, haystack, end, &mut sid)?.or(last_match))
	}

	#[inline(never)]
	pub(crate) fn find_earliest_rev(
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	find_rev(true, dfa, cache, pattern_id, bytes, start, end)
	}

	#[inline(never)]
	pub(crate) fn find_leftmost_rev(
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	find_rev(false, dfa, cache, pattern_id, bytes, start, end)
	}

	#[inline(always)]
	fn find_rev(
	earliest: bool,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	haystack: &[u8],
	start: usize,
	end: usize,
	) -> Result<Option<HalfMatch>, MatchError> {
	assert!(start <= end);
	assert!(start <= haystack.len());
	assert!(end <= haystack.len());

	// Why do this? This lets 'bytes[at]' work without bounds checks below.
	// It seems the assert on 'end <= haystack.len()' above is otherwise
	// not enough. Why not just make 'bytes' scoped this way anyway? Well,
	// 'eoi_fwd' (below) might actually want to try to access the byte at 'end'
	// for resolving look-ahead.
	let bytes = &haystack[start..];

	let mut sid = init_rev(dfa, cache, pattern_id, haystack, start, end)?;
	let mut last_match = None;
	let mut at = end - start;
	while at > 0 {
	if sid.is_tagged() {
	at -= 1;
	sid = dfa
	.next_state(cache, sid, bytes[at])
	.map_err(\|_\| gave_up(at))?;
	} else {
	// SAFETY: See comments in 'find_fwd' for both a safety argument
	// and a justification from a performance perspective as to 1) why
	// we elide bounds checks and 2) why we do a specialized version of
	// unrolling below.
	let mut prev_sid = sid;
	while at > 0 && !sid.is_tagged() {
	prev_sid = sid;
	at -= 1;
	while at > 3 {
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at -= 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at -= 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at -= 1;
	let next = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	if sid != next {
	break;
	}
	at -= 1;
	}
	sid = unsafe {
	dfa.next_state_untagged_unchecked(
	cache,
	sid,
	*bytes.get_unchecked(at),
	)
	};
	}
	if sid.is_unknown() {
	sid = dfa
	.next_state(cache, prev_sid, bytes[at])
	.map_err(\|_\| gave_up(at))?;
	}
	}
	if sid.is_tagged() {
	if sid.is_start() {
	continue;
	} else if sid.is_match() {
	last_match = Some(HalfMatch {
	pattern: dfa.match_pattern(cache, sid, 0),
	offset: start + at + MATCH_OFFSET,
	});
	if earliest {
	return Ok(last_match);
	}
	} else if sid.is_dead() {
	return Ok(last_match);
	} else {
	debug_assert!(sid.is_quit());
	if last_match.is_some() {
	return Ok(last_match);
	}
	return Err(MatchError::Quit { byte: bytes[at], offset: at });
	}
	}
	}
	Ok(eoi_rev(dfa, cache, haystack, start, sid)?.or(last_match))
	}

	#[inline(never)]
	pub(crate) fn find_overlapping_fwd(
	pre: Option<&mut prefilter::Scanner>,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	caller_state: &mut OverlappingState,
	) -> Result<Option<HalfMatch>, MatchError> {
	// Searching with a pattern ID is always anchored, so we should only ever
	// use a prefilter when no pattern ID is given.
	if pre.is_some() && pattern_id.is_none() {
	find_overlapping_fwd_imp(
	pre,
	dfa,
	cache,
	pattern_id,
	bytes,
	start,
	end,
	caller_state,
	)
	} else {
	find_overlapping_fwd_imp(
	None,
	dfa,
	cache,
	pattern_id,
	bytes,
	start,
	end,
	caller_state,
	)
	}
	}

	#[inline(always)]
	fn find_overlapping_fwd_imp(
	mut pre: Option<&mut prefilter::Scanner>,
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	mut start: usize,
	end: usize,
	caller_state: &mut OverlappingState,
	) -> Result<Option<HalfMatch>, MatchError> {
	assert!(start <= end);
	assert!(start <= bytes.len());
	assert!(end <= bytes.len());

	let mut sid = match caller_state.id() {
	None => init_fwd(dfa, cache, pattern_id, bytes, start, end)?,
	Some(sid) => {
	if let Some(last) = caller_state.last_match() {
	let match_count = dfa.match_count(cache, sid);
	if last.match_index < match_count {
	let m = HalfMatch {
	pattern: dfa.match_pattern(
	cache,
	sid,
	last.match_index,
	),
	offset: last.offset,
	};
	last.match_index += 1;
	return Ok(Some(m));
	}
	}

	// This is a subtle but critical detail. If the caller provides a
	// non-None state ID, then it must be the case that the state ID
	// corresponds to one set by this function. The state ID therefore
	// corresponds to a match state, a dead state or some other state.
	// However, "some other" state _only_ occurs when the input has
	// been exhausted because the only way to stop before then is to
	// see a match or a dead/quit state.
	//
	// If the input is exhausted or if it's a dead state, then
	// incrementing the starting position has no relevance on
	// correctness, since the loop below will either not execute
	// at all or will immediately stop due to being in a dead state.
	// (Once in a dead state it is impossible to leave it.)
	//
	// Therefore, the only case we need to consider is when
	// caller_state is a match state. In this case, since our machines
	// support the ability to delay a match by a certain number of
	// bytes (to support look-around), it follows that we actually
	// consumed that many additional bytes on our previous search. When
	// the caller resumes their search to find subsequent matches, they
	// will use the ending location from the previous match as the next
	// starting point, which is `match_offset` bytes PRIOR to where
	// we scanned to on the previous search. Therefore, we need to
	// compensate by bumping `start` up by `MATCH_OFFSET` bytes.
	//
	// Incidentally, since MATCH_OFFSET is non-zero, this also makes
	// dealing with empty matches convenient. Namely, callers needn't
	// special case them when implementing an iterator. Instead, this
	// ensures that forward progress is always made.
	start += MATCH_OFFSET;
	sid
	}
	};

	let mut at = start;
	while at < end {
	let byte = bytes[at];
	sid = dfa.next_state(cache, sid, byte).map_err(\|_\| gave_up(at))?;
	at += 1;
	if sid.is_tagged() {
	caller_state.set_id(sid);
	if sid.is_start() {
	if let Some(ref mut pre) = pre {
	if pre.is_effective(at) {
	match pre.next_candidate(bytes, at).into_option() {
	None => return Ok(None),
	Some(i) => {
	at = i;
	}
	}
	}
	}
	} else if sid.is_match() {
	let offset = at - MATCH_OFFSET;
	caller_state
	.set_last_match(StateMatch { match_index: 1, offset });
	return Ok(Some(HalfMatch {
	pattern: dfa.match_pattern(cache, sid, 0),
	offset,
	}));
	} else if sid.is_dead() {
	return Ok(None);
	} else {
	debug_assert!(sid.is_quit());
	return Err(MatchError::Quit { byte, offset: at - 1 });
	}
	}
	}

	let result = eoi_fwd(dfa, cache, bytes, end, &mut sid);
	caller_state.set_id(sid);
	if let Ok(Some(ref last_match)) = result {
	caller_state.set_last_match(StateMatch {
	// '1' is always correct here since if we get to this point, this
	// always corresponds to the first (index '0') match discovered at
	// this position. So the next match to report at this position (if
	// it exists) is at index '1'.
	match_index: 1,
	offset: last_match.offset(),
	});
	}
	result
	}

	#[inline(always)]
	fn init_fwd(
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<LazyStateID, MatchError> {
	let sid = dfa
	.start_state_forward(cache, pattern_id, bytes, start, end)
	.map_err(\|_\| gave_up(start))?;
	// Start states can never be match states, since all matches are delayed
	// by 1 byte.
	assert!(!sid.is_match());
	Ok(sid)
	}

	#[inline(always)]
	fn init_rev(
	dfa: &DFA,
	cache: &mut Cache,
	pattern_id: Option<PatternID>,
	bytes: &[u8],
	start: usize,
	end: usize,
	) -> Result<LazyStateID, MatchError> {
	let sid = dfa
	.start_state_reverse(cache, pattern_id, bytes, start, end)
	.map_err(\|_\| gave_up(end))?;
	// Start states can never be match states, since all matches are delayed
	// by 1 byte.
	assert!(!sid.is_match());
	Ok(sid)
	}

	#[inline(always)]
	fn eoi_fwd(
	dfa: &DFA,
	cache: &mut Cache,
	bytes: &[u8],
	end: usize,
	sid: &mut LazyStateID,
	) -> Result<Option<HalfMatch>, MatchError> {
	match bytes.get(end) {
	Some(&b) => {
	sid = dfa.next_state(cache, sid, b).map_err(\|_\| gave_up(end))?;
	if sid.is_match() {
	Ok(Some(HalfMatch {
	pattern: dfa.match_pattern(cache, *sid, 0),
	offset: end,
	}))
	} else {
	Ok(None)
	}
	}
	None => {
	*sid = dfa
	.next_eoi_state(cache, *sid)
	.map_err(\|_\| gave_up(bytes.len()))?;
	if sid.is_match() {
	Ok(Some(HalfMatch {
	pattern: dfa.match_pattern(cache, *sid, 0),
	offset: bytes.len(),
	}))
	} else {
	Ok(None)
	}
	}
	}
	}

	#[inline(always)]
	fn eoi_rev(
	dfa: &DFA,
	cache: &mut Cache,
	bytes: &[u8],
	start: usize,
	state: LazyStateID,
	) -> Result<Option<HalfMatch>, MatchError> {
	if start > 0 {
	let sid = dfa
	.next_state(cache, state, bytes[start - 1])
	.map_err(\|_\| gave_up(start))?;
	if sid.is_match() {
	Ok(Some(HalfMatch {
	pattern: dfa.match_pattern(cache, sid, 0),
	offset: start,
	}))
	} else {
	Ok(None)
	}
	} else {
	let sid =
	dfa.next_eoi_state(cache, state).map_err(\|_\| gave_up(start))?;
	if sid.is_match() {
	Ok(Some(HalfMatch {
	pattern: dfa.match_pattern(cache, sid, 0),
	offset: 0,
	}))
	} else {
	Ok(None)
	}
	}
	}

	/// A convenience routine for constructing a "gave up" match error.
	#[inline(always)]
	fn gave_up(offset: usize) -> MatchError {
	MatchError::GaveUp { offset }
	}