| use alloc::{boxed::Box, vec, vec::Vec}; |
| |
| /// A set of "packed pair" test seeds. Each seed serves as the base for the |
| /// generation of many other tests. In essence, the seed captures the pair of |
| /// bytes we used for a predicate and first byte among our needle. The tests |
| /// generated from each seed essentially vary the length of the needle and |
| /// haystack, while using the rare/first byte configuration from the seed. |
| /// |
| /// The purpose of this is to test many different needle/haystack lengths. |
| /// In particular, some of the vector optimizations might only have bugs |
| /// in haystacks of a certain size. |
| const SEEDS: &[Seed] = &[ |
| // Why not use different 'first' bytes? It seemed like a good idea to be |
| // able to configure it, but when I wrote the test generator below, it |
| // didn't seem necessary to use for reasons that I forget. |
| Seed { first: b'x', index1: b'y', index2: b'z' }, |
| Seed { first: b'x', index1: b'x', index2: b'z' }, |
| Seed { first: b'x', index1: b'y', index2: b'x' }, |
| Seed { first: b'x', index1: b'x', index2: b'x' }, |
| Seed { first: b'x', index1: b'y', index2: b'y' }, |
| ]; |
| |
| /// Runs a host of "packed pair" search tests. |
| /// |
| /// These tests specifically look for the occurrence of a possible substring |
| /// match based on a pair of bytes matching at the right offsets. |
| pub(crate) struct Runner { |
| fwd: Option< |
| Box< |
| dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
| >, |
| >, |
| } |
| |
| impl Runner { |
| /// Create a new test runner for "packed pair" substring search. |
| pub(crate) fn new() -> Runner { |
| Runner { fwd: None } |
| } |
| |
| /// Run all tests. This panics on the first failure. |
| /// |
| /// If the implementation being tested returns `None` for a particular |
| /// haystack/needle combination, then that test is skipped. |
| /// |
| /// This runs tests on both the forward and reverse implementations given. |
| /// If either (or both) are missing, then tests for that implementation are |
| /// skipped. |
| pub(crate) fn run(self) { |
| if let Some(mut fwd) = self.fwd { |
| for seed in SEEDS.iter() { |
| for t in seed.generate() { |
| match fwd(&t.haystack, &t.needle, t.index1, t.index2) { |
| None => continue, |
| Some(result) => { |
| assert_eq!( |
| t.fwd, result, |
| "FORWARD, needle: {:?}, haystack: {:?}, \ |
| index1: {:?}, index2: {:?}", |
| t.needle, t.haystack, t.index1, t.index2, |
| ) |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| /// Set the implementation for forward "packed pair" substring search. |
| /// |
| /// If the closure returns `None`, then it is assumed that the given |
| /// test cannot be applied to the particular implementation and it is |
| /// skipped. For example, if a particular implementation only supports |
| /// needles or haystacks for some minimum length. |
| /// |
| /// If this is not set, then forward "packed pair" search is not tested. |
| pub(crate) fn fwd( |
| mut self, |
| search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
| ) -> Runner { |
| self.fwd = Some(Box::new(search)); |
| self |
| } |
| } |
| |
| /// A test that represents the input and expected output to a "packed pair" |
| /// search function. The test should be able to run with any "packed pair" |
| /// implementation and get the expected output. |
| struct Test { |
| haystack: Vec<u8>, |
| needle: Vec<u8>, |
| index1: u8, |
| index2: u8, |
| fwd: Option<usize>, |
| } |
| |
| impl Test { |
| /// Create a new "packed pair" test from a seed and some given offsets to |
| /// the pair of bytes to use as a predicate in the seed's needle. |
| /// |
| /// If a valid test could not be constructed, then None is returned. |
| /// (Currently, we take the approach of massaging tests to be valid |
| /// instead of rejecting them outright.) |
| fn new( |
| seed: Seed, |
| index1: usize, |
| index2: usize, |
| haystack_len: usize, |
| needle_len: usize, |
| fwd: Option<usize>, |
| ) -> Option<Test> { |
| let mut index1: u8 = index1.try_into().unwrap(); |
| let mut index2: u8 = index2.try_into().unwrap(); |
| // The '#' byte is never used in a haystack (unless we're expecting |
| // a match), while the '@' byte is never used in a needle. |
| let mut haystack = vec![b'@'; haystack_len]; |
| let mut needle = vec![b'#'; needle_len]; |
| needle[0] = seed.first; |
| needle[index1 as usize] = seed.index1; |
| needle[index2 as usize] = seed.index2; |
| // If we're expecting a match, then make sure the needle occurs |
| // in the haystack at the expected position. |
| if let Some(i) = fwd { |
| haystack[i..i + needle.len()].copy_from_slice(&needle); |
| } |
| // If the operations above lead to rare offsets pointing to the |
| // non-first occurrence of a byte, then adjust it. This might lead |
| // to redundant tests, but it's simpler than trying to change the |
| // generation process I think. |
| if let Some(i) = crate::memchr(seed.index1, &needle) { |
| index1 = u8::try_from(i).unwrap(); |
| } |
| if let Some(i) = crate::memchr(seed.index2, &needle) { |
| index2 = u8::try_from(i).unwrap(); |
| } |
| Some(Test { haystack, needle, index1, index2, fwd }) |
| } |
| } |
| |
| /// Data that describes a single prefilter test seed. |
| #[derive(Clone, Copy)] |
| struct Seed { |
| first: u8, |
| index1: u8, |
| index2: u8, |
| } |
| |
| impl Seed { |
| const NEEDLE_LENGTH_LIMIT: usize = { |
| #[cfg(not(miri))] |
| { |
| 33 |
| } |
| #[cfg(miri)] |
| { |
| 5 |
| } |
| }; |
| |
| const HAYSTACK_LENGTH_LIMIT: usize = { |
| #[cfg(not(miri))] |
| { |
| 65 |
| } |
| #[cfg(miri)] |
| { |
| 8 |
| } |
| }; |
| |
| /// Generate a series of prefilter tests from this seed. |
| fn generate(self) -> impl Iterator<Item = Test> { |
| let len_start = 2; |
| // The iterator below generates *a lot* of tests. The number of |
| // tests was chosen somewhat empirically to be "bearable" when |
| // running the test suite. |
| // |
| // We use an iterator here because the collective haystacks of all |
| // these test cases add up to enough memory to OOM a conservative |
| // sandbox or a small laptop. |
| (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { |
| let index_start = len_start - 1; |
| (index_start..needle_len).flat_map(move |index1| { |
| (index1..needle_len).flat_map(move |index2| { |
| (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( |
| move |haystack_len| { |
| Test::new( |
| self, |
| index1, |
| index2, |
| haystack_len, |
| needle_len, |
| None, |
| ) |
| .into_iter() |
| .chain( |
| (0..=(haystack_len - needle_len)).flat_map( |
| move |output| { |
| Test::new( |
| self, |
| index1, |
| index2, |
| haystack_len, |
| needle_len, |
| Some(output), |
| ) |
| }, |
| ), |
| ) |
| }, |
| ) |
| }) |
| }) |
| }) |
| } |
| } |