blob: 0564ad4fbb8a19fd269b1aa658a6e93c6b9c633b [file] [log] [blame]
use alloc::{
string::{String, ToString},
vec,
vec::Vec,
};
use crate::ext::Byte;
pub(crate) mod naive;
#[macro_use]
pub(crate) mod prop;
const SEEDS: &'static [Seed] = &[
Seed { haystack: "a", needles: &[b'a'], positions: &[0] },
Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] },
Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] },
Seed { haystack: "", needles: &[b'a'], positions: &[] },
Seed { haystack: "z", needles: &[b'a'], positions: &[] },
Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
Seed { haystack: "zza", needles: &[b'a'], positions: &[2] },
Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] },
Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] },
Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] },
Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] },
Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] },
Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] },
Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] },
Seed {
haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
needles: &[b'a'],
positions: &[32],
},
// two needles (applied to memchr2 + memchr3)
Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] },
Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] },
Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] },
Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] },
// three needles (applied to memchr3)
Seed {
haystack: "xyz",
needles: &[b'x', b'y', b'z'],
positions: &[0, 1, 2],
},
Seed {
haystack: "zxy",
needles: &[b'x', b'y', b'z'],
positions: &[0, 1, 2],
},
Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] },
Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] },
Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] },
];
/// Runs a host of substring search tests.
///
/// This has support for "partial" substring search implementations only work
/// for a subset of needles/haystacks. For example, the "packed pair" substring
/// search implementation only works for haystacks of some minimum length based
/// of the pair of bytes selected and the size of the vector used.
pub(crate) struct Runner {
needle_len: usize,
}
impl Runner {
/// Create a new test runner for forward and reverse byte search
/// implementations.
///
/// The `needle_len` given must be at most `3` and at least `1`. It
/// corresponds to the number of needle bytes to search for.
pub(crate) fn new(needle_len: usize) -> Runner {
assert!(needle_len >= 1, "needle_len must be at least 1");
assert!(needle_len <= 3, "needle_len must be at most 3");
Runner { needle_len }
}
/// Run all tests. This panics on the first failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
pub(crate) fn forward_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let results = match test(t.haystack.as_bytes(), &t.needles) {
None => continue,
Some(results) => results,
};
assert_eq!(
t.expected,
results,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Run all tests in the reverse direction. This panics on the first
/// failure.
///
/// If the implementation being tested returns `None` for a particular
/// haystack/needle combination, then that test is skipped.
pub(crate) fn reverse_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let mut results = match test(t.haystack.as_bytes(), &t.needles)
{
None => continue,
Some(results) => results,
};
results.reverse();
assert_eq!(
t.expected,
results,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Run all tests as counting tests. This panics on the first failure.
///
/// That is, this only checks that the number of matches is correct and
/// not whether the offsets of each match are.
pub(crate) fn count_iter<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
{
for seed in SEEDS.iter() {
if seed.needles.len() > self.needle_len {
continue;
}
for t in seed.generate() {
let got = match test(t.haystack.as_bytes(), &t.needles) {
None => continue,
Some(got) => got,
};
assert_eq!(
t.expected.len(),
got,
"needles: {:?}, haystack: {:?}",
t.needles
.iter()
.map(|&b| b.to_char())
.collect::<Vec<char>>(),
t.haystack,
);
}
}
}
/// Like `Runner::forward`, but for a function that returns only the next
/// match and not all matches.
///
/// If the function returns `None`, then it is skipped.
pub(crate) fn forward_oneshot<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
{
self.forward_iter(move |haystack, needles| {
let mut start = 0;
let mut results = vec![];
while let Some(i) = test(&haystack[start..], needles)? {
results.push(start + i);
start += i + 1;
}
Some(results)
})
}
/// Like `Runner::reverse`, but for a function that returns only the last
/// match and not all matches.
///
/// If the function returns `None`, then it is skipped.
pub(crate) fn reverse_oneshot<F>(self, mut test: F)
where
F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
{
self.reverse_iter(move |haystack, needles| {
let mut end = haystack.len();
let mut results = vec![];
while let Some(i) = test(&haystack[..end], needles)? {
results.push(i);
end = i;
}
Some(results)
})
}
}
/// A single test for memr?chr{,2,3}.
#[derive(Clone, Debug)]
struct Test {
/// The string to search in.
haystack: String,
/// The needles to look for.
needles: Vec<u8>,
/// The offsets that are expected to be found for all needles in the
/// forward direction.
expected: Vec<usize>,
}
impl Test {
fn new(seed: &Seed) -> Test {
Test {
haystack: seed.haystack.to_string(),
needles: seed.needles.to_vec(),
expected: seed.positions.to_vec(),
}
}
}
/// Data that can be expanded into many memchr tests by padding out the corpus.
#[derive(Clone, Debug)]
struct Seed {
/// The thing to search. We use `&str` instead of `&[u8]` because they
/// are nicer to write in tests, and we don't miss much since memchr
/// doesn't care about UTF-8.
///
/// Corpora cannot contain either '%' or '#'. We use these bytes when
/// expanding test cases into many test cases, and we assume they are not
/// used. If they are used, `memchr_tests` will panic.
haystack: &'static str,
/// The needles to search for. This is intended to be an alternation of
/// needles. The number of needles may cause this test to be skipped for
/// some memchr variants. For example, a test with 2 needles cannot be used
/// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
/// However, a test with only 1 needle can be used to test all of `memchr`,
/// `memchr2` and `memchr3`. We achieve this by filling in the needles with
/// bytes that we never used in the corpus (such as '#').
needles: &'static [u8],
/// The positions expected to match for all of the needles.
positions: &'static [usize],
}
impl Seed {
/// Controls how much we expand the haystack on either side for each test.
/// We lower this on Miri because otherwise running the tests would take
/// forever.
const EXPAND_LEN: usize = {
#[cfg(not(miri))]
{
515
}
#[cfg(miri)]
{
6
}
};
/// Expand this test into many variations of the same test.
///
/// In particular, this will generate more tests with larger corpus sizes.
/// The expected positions are updated to maintain the integrity of the
/// test.
///
/// This is important in testing a memchr implementation, because there are
/// often different cases depending on the length of the corpus.
///
/// Note that we extend the corpus by adding `%` bytes, which we
/// don't otherwise use as a needle.
fn generate(&self) -> impl Iterator<Item = Test> {
let mut more = vec![];
// Add bytes to the start of the corpus.
for i in 0..Seed::EXPAND_LEN {
let mut t = Test::new(self);
let mut new: String = core::iter::repeat('%').take(i).collect();
new.push_str(&t.haystack);
t.haystack = new;
t.expected = t.expected.into_iter().map(|p| p + i).collect();
more.push(t);
}
// Add bytes to the end of the corpus.
for i in 1..Seed::EXPAND_LEN {
let mut t = Test::new(self);
let padding: String = core::iter::repeat('%').take(i).collect();
t.haystack.push_str(&padding);
more.push(t);
}
more.into_iter()
}
}