/build/cargo-vendor-dir/memchr-2.7.1/src/arch/all/packedpair/mod.rs

Source
/*!
Provides an architecture independent implementation of the "packed pair"
algorithm.

The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
difference is that it (by default) uses a background distribution of byte
frequencies to heuristically select the pair of bytes to search for. Note that
this module provides an architecture independent version that doesn't do as
good of a job keeping the search for candidates inside a SIMD hot path. It
however can be good enough in many circumstances.

[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
*/

use crate::memchr;

mod default_rank;

/// An architecture independent "packed pair" finder.
///
/// This finder picks two bytes that it believes have high predictive power for
/// indicating an overall match of a needle. At search time, it reports offsets
/// where the needle could match based on whether the pair of bytes it chose
/// match.
///
/// This is architecture independent because it utilizes `memchr` to find the
/// occurrence of one of the bytes in the pair, and then checks whether the
/// second byte matches. If it does, in the case of [`Finder::find_prefilter`],
/// the location at which the needle could match is returned.
///
/// It is generally preferred to use architecture specific routines for a
/// "packed pair" prefilter, but this can be a useful fallback when the
/// architecture independent routines are unavailable.
#[derive(Clone, Copy, Debug)]
pub struct Finder {
    pair: Pair,
    byte1: u8,
    byte2: u8,
}

impl Finder {
    /// Create a new prefilter that reports possible locations where the given
    /// needle matches.
    #[inline]
    pub fn new(needle: &[u8]) -> Option<Finder> {
        Finder::with_pair(needle, Pair::new(needle)?)
    }

    /// Create a new prefilter using the pair given.
    ///
    /// If the prefilter could not be constructed, then `None` is returned.
    ///
    /// This constructor permits callers to control precisely which pair of
    /// bytes is used as a predicate.
    #[inline]
    pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
        let byte1 = needle[usize::from(pair.index1())];
        let byte2 = needle[usize::from(pair.index2())];
        // Currently this can never fail so we could just return a Finder,
        // but it's conceivable this could change.
        Some(Finder { pair, byte1, byte2 })
    }

    /// Run this finder on the given haystack as a prefilter.
    ///
    /// If a candidate match is found, then an offset where the needle *could*
    /// begin in the haystack is returned.
    #[inline]
    pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
        let mut i = 0;
        let index1 = usize::from(self.pair.index1());
        let index2 = usize::from(self.pair.index2());
        loop {
            // Use a fast vectorized implementation to skip to the next
            // occurrence of the rarest byte (heuristically chosen) in the
            // needle.
            i += memchr(self.byte1, &haystack[i..])?;
            let found = i;
            i += 1;

            // If we can't align our first byte match with the haystack, then a
            // match is impossible.
            let aligned1 = match found.checked_sub(index1) {
                None => continue,
                Some(aligned1) => aligned1,
            };

            // Now align the second byte match with the haystack. A mismatch
            // means that a match is impossible.
            let aligned2 = match aligned1.checked_add(index2) {
                None => continue,
                Some(aligned_index2) => aligned_index2,
            };
            if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) {
                continue;
            }

            // We've done what we can. There might be a match here.
            return Some(aligned1);
        }
    }

    /// Returns the pair of offsets (into the needle) used to check as a
    /// predicate before confirming whether a needle exists at a particular
    /// position.
    #[inline]
    pub fn pair(&self) -> &Pair {
        &self.pair
    }
}

/// A pair of byte offsets into a needle to use as a predicate.
///
/// This pair is used as a predicate to quickly filter out positions in a
/// haystack in which a needle cannot match. In some cases, this pair can even
/// be used in vector algorithms such that the vector algorithm only switches
/// over to scalar code once this pair has been found.
///
/// A pair of offsets can be used in both substring search implementations and
/// in prefilters. The former will report matches of a needle in a haystack
/// where as the latter will only report possible matches of a needle.
///
/// The offsets are limited each to a maximum of 255 to keep memory usage low.
/// Moreover, it's rarely advantageous to create a predicate using offsets
/// greater than 255 anyway.
///
/// The only guarantee enforced on the pair of offsets is that they are not
/// equivalent. It is not necessarily the case that `index1 < index2` for
/// example. By convention, `index1` corresponds to the byte in the needle
/// that is believed to be most the predictive. Note also that because of the
/// requirement that the indices be both valid for the needle used to build
/// the pair and not equal, it follows that a pair can only be constructed for
/// needles with length at least 2.
#[derive(Clone, Copy, Debug)]
pub struct Pair {
    index1: u8,
    index2: u8,
}

impl Pair {
    /// Create a new pair of offsets from the given needle.
    ///
    /// If a pair could not be created (for example, if the needle is too
    /// short), then `None` is returned.
    ///
    /// This chooses the pair in the needle that is believed to be as
    /// predictive of an overall match of the needle as possible.
    #[inline]
    pub fn new(needle: &[u8]) -> Option<Pair> {
        Pair::with_ranker(needle, DefaultFrequencyRank)
    }

    /// Create a new pair of offsets from the given needle and ranker.
    ///
    /// This permits the caller to choose a background frequency distribution
    /// with which bytes are selected. The idea is to select a pair of bytes
    /// that is believed to strongly predict a match in the haystack. This
    /// usually means selecting bytes that occur rarely in a haystack.
    ///
    /// If a pair could not be created (for example, if the needle is too
    /// short), then `None` is returned.
    #[inline]
    pub fn with_ranker<R: HeuristicFrequencyRank>(
        needle: &[u8],
        ranker: R,
    ) -> Option<Pair> {
        if needle.len() <= 1 {
            return None;
        }
        // Find the rarest two bytes. We make them distinct indices by
        // construction. (The actual byte value may be the same in degenerate
        // cases, but that's OK.)
        let (mut rare1, mut index1) = (needle[0], 0);
        let (mut rare2, mut index2) = (needle[1], 1);
        if ranker.rank(rare2) < ranker.rank(rare1) {
            core::mem::swap(&mut rare1, &mut rare2);
            core::mem::swap(&mut index1, &mut index2);
        }
        let max = usize::from(core::u8::MAX);
        for (i, &b) in needle.iter().enumerate().take(max).skip(2) {
            if ranker.rank(b) < ranker.rank(rare1) {
                rare2 = rare1;
                index2 = index1;
                rare1 = b;
                index1 = u8::try_from(i).unwrap();
            } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) {
                rare2 = b;
                index2 = u8::try_from(i).unwrap();
            }
        }
        // While not strictly required for how a Pair is normally used, we
        // really don't want these to be equivalent. If they were, it would
        // reduce the effectiveness of candidate searching using these rare
        // bytes by increasing the rate of false positives.
        assert_ne!(index1, index2);
        Some(Pair { index1, index2 })
    }

    /// Create a new pair using the offsets given for the needle given.
    ///
    /// This bypasses any sort of heuristic process for choosing the offsets
    /// and permits the caller to choose the offsets themselves.
    ///
    /// Indices are limited to valid `u8` values so that a `Pair` uses less
    /// memory. It is not possible to create a `Pair` with offsets bigger than
    /// `u8::MAX`. It's likely that such a thing is not needed, but if it is,
    /// it's suggested to build your own bespoke algorithm because you're
    /// likely working on a very niche case. (File an issue if this suggestion
    /// does not make sense to you.)
    ///
    /// If a pair could not be created (for example, if the needle is too
    /// short), then `None` is returned.
    #[inline]
    pub fn with_indices(
        needle: &[u8],
        index1: u8,
        index2: u8,
    ) -> Option<Pair> {
        // While not strictly required for how a Pair is normally used, we
        // really don't want these to be equivalent. If they were, it would
        // reduce the effectiveness of candidate searching using these rare
        // bytes by increasing the rate of false positives.
        if index1 == index2 {
            return None;
        }
        // Similarly, invalid indices means the Pair is invalid too.
        if usize::from(index1) >= needle.len() {
            return None;
        }
        if usize::from(index2) >= needle.len() {
            return None;
        }
        Some(Pair { index1, index2 })
    }

    /// Returns the first offset of the pair.
    #[inline]
    pub fn index1(&self) -> u8 {
        self.index1
    }

    /// Returns the second offset of the pair.
    #[inline]
    pub fn index2(&self) -> u8 {
        self.index2
    }
}

/// This trait allows the user to customize the heuristic used to determine the
/// relative frequency of a given byte in the dataset being searched.
///
/// The use of this trait can have a dramatic impact on performance depending
/// on the type of data being searched. The details of why are explained in the
/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses
/// a prefilter to quickly identify candidate matches that are later verified
/// more slowly. This prefilter is implemented in terms of trying to find
/// `rare` bytes at specific offsets that will occur less frequently in the
/// dataset. While the concept of a `rare` byte is similar for most datasets,
/// there are some specific datasets (like binary executables) that have
/// dramatically different byte distributions. For these datasets customizing
/// the byte frequency heuristic can have a massive impact on performance, and
/// might even need to be done at runtime.
///
/// The default implementation of `HeuristicFrequencyRank` reads from the
/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This
/// is optimal for most inputs, so if you are unsure of the impact of using a
/// custom `HeuristicFrequencyRank` you should probably just use the default.
///
/// # Example
///
/// ```
/// use memchr::{
///     arch::all::packedpair::HeuristicFrequencyRank,
///     memmem::FinderBuilder,
/// };
///
/// /// A byte-frequency table that is good for scanning binary executables.
/// struct Binary;
///
/// impl HeuristicFrequencyRank for Binary {
///     fn rank(&self, byte: u8) -> u8 {
///         const TABLE: [u8; 256] = [
///             255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17,
///             89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
///             68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11,
///             9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
///             48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27,
///             4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
///             19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24,
///             1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
///             51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15,
///             0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
///             12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0,
///             2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
///             46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
///             3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
///             16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5,
///             8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
///         ];
///         TABLE[byte as usize]
///     }
/// }
/// // Create a new finder with the custom heuristic.
/// let finder = FinderBuilder::new()
///     .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd");
/// // Find needle with custom heuristic.
/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
/// ```
pub trait HeuristicFrequencyRank {
    /// Return the heuristic frequency rank of the given byte. A lower rank
    /// means the byte is believed to occur less frequently in the haystack.
    ///
    /// Some uses of this heuristic may treat arbitrary absolute rank values as
    /// significant. For example, an implementation detail in this crate may
    /// determine that heuristic prefilters are inappropriate if every byte in
    /// the needle has a "high" rank.
    fn rank(&self, byte: u8) -> u8;
}

/// The default byte frequency heuristic that is good for most haystacks.
pub(crate) struct DefaultFrequencyRank;

impl HeuristicFrequencyRank for DefaultFrequencyRank {
    fn rank(&self, byte: u8) -> u8 {
        self::default_rank::RANK[usize::from(byte)]
    }
}

/// This permits passing any implementation of `HeuristicFrequencyRank` as a
/// borrowed version of itself.
impl<'a, R> HeuristicFrequencyRank for &'a R
where
    R: HeuristicFrequencyRank,
{
    fn rank(&self, byte: u8) -> u8 {
        (**self).rank(byte)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn forward_packedpair() {
        fn find(
            haystack: &[u8],
            needle: &[u8],
            _index1: u8,
            _index2: u8,
        ) -> Option<Option<usize>> {
            // We ignore the index positions requested since it winds up making
            // this test too slow overall.
            let f = Finder::new(needle)?;
            Some(f.find_prefilter(haystack))
        }
        crate::tests::packedpair::Runner::new().fwd(find).run()
    }
}

Coverage Report

Created: 2025-03-22 00:52

Line	Count	Source
1		/*!
2		Provides an architecture independent implementation of the "packed pair"
3		algorithm.
4
5		The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
6		difference is that it (by default) uses a background distribution of byte
7		frequencies to heuristically select the pair of bytes to search for. Note that
8		this module provides an architecture independent version that doesn't do as
9		good of a job keeping the search for candidates inside a SIMD hot path. It
10		however can be good enough in many circumstances.
11
12		[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
13		*/
14
15		use crate::memchr;
16
17		mod default_rank;
18
19		/// An architecture independent "packed pair" finder.
20		///
21		/// This finder picks two bytes that it believes have high predictive power for
22		/// indicating an overall match of a needle. At search time, it reports offsets
23		/// where the needle could match based on whether the pair of bytes it chose
24		/// match.
25		///
26		/// This is architecture independent because it utilizes `memchr` to find the
27		/// occurrence of one of the bytes in the pair, and then checks whether the
28		/// second byte matches. If it does, in the case of [`Finder::find_prefilter`],
29		/// the location at which the needle could match is returned.
30		///
31		/// It is generally preferred to use architecture specific routines for a
32		/// "packed pair" prefilter, but this can be a useful fallback when the
33		/// architecture independent routines are unavailable.
34		#[derive(Clone, Copy, Debug)]
35		pub struct Finder {
36		pair: Pair,
37		byte1: u8,
38		byte2: u8,
39		}
40
41		impl Finder {
42		/// Create a new prefilter that reports possible locations where the given
43		/// needle matches.
44		#[inline]
45	0	pub fn new(needle: &[u8]) -> Option<Finder> {
46	0	Finder::with_pair(needle, Pair::new(needle)?)
47	0	}
48
49		/// Create a new prefilter using the pair given.
50		///
51		/// If the prefilter could not be constructed, then `None` is returned.
52		///
53		/// This constructor permits callers to control precisely which pair of
54		/// bytes is used as a predicate.
55		#[inline]
56	0	pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
57	0	let byte1 = needle[usize::from(pair.index1())];
58	0	let byte2 = needle[usize::from(pair.index2())];
59	0	// Currently this can never fail so we could just return a Finder,
60	0	// but it's conceivable this could change.
61	0	Some(Finder { pair, byte1, byte2 })
62	0	}
63
64		/// Run this finder on the given haystack as a prefilter.
65		///
66		/// If a candidate match is found, then an offset where the needle could
67		/// begin in the haystack is returned.
68		#[inline]
69	0	pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
70	0	let mut i = 0;
71	0	let index1 = usize::from(self.pair.index1());
72	0	let index2 = usize::from(self.pair.index2());
73		loop {
74		// Use a fast vectorized implementation to skip to the next
75		// occurrence of the rarest byte (heuristically chosen) in the
76		// needle.
77	0	i += memchr(self.byte1, &haystack[i..])?;
78	0	let found = i;
79	0	i += 1;
80
81		// If we can't align our first byte match with the haystack, then a
82		// match is impossible.
83	0	let aligned1 = match found.checked_sub(index1) {
84	0	None => continue,
85	0	Some(aligned1) => aligned1,
86		};
87
88		// Now align the second byte match with the haystack. A mismatch
89		// means that a match is impossible.
90	0	let aligned2 = match aligned1.checked_add(index2) {
91	0	None => continue,
92	0	Some(aligned_index2) => aligned_index2,
93	0	};
94	0	if haystack.get(aligned2).map_or(true, \|&b\| b != self.byte2) {
95	0	continue;
96	0	}
97	0
98	0	// We've done what we can. There might be a match here.
99	0	return Some(aligned1);
100		}
101	0	}
102
103		/// Returns the pair of offsets (into the needle) used to check as a
104		/// predicate before confirming whether a needle exists at a particular
105		/// position.
106		#[inline]
107	0	pub fn pair(&self) -> &Pair {
108	0	&self.pair
109	0	}
110		}
111
112		/// A pair of byte offsets into a needle to use as a predicate.
113		///
114		/// This pair is used as a predicate to quickly filter out positions in a
115		/// haystack in which a needle cannot match. In some cases, this pair can even
116		/// be used in vector algorithms such that the vector algorithm only switches
117		/// over to scalar code once this pair has been found.
118		///
119		/// A pair of offsets can be used in both substring search implementations and
120		/// in prefilters. The former will report matches of a needle in a haystack
121		/// where as the latter will only report possible matches of a needle.
122		///
123		/// The offsets are limited each to a maximum of 255 to keep memory usage low.
124		/// Moreover, it's rarely advantageous to create a predicate using offsets
125		/// greater than 255 anyway.
126		///
127		/// The only guarantee enforced on the pair of offsets is that they are not
128		/// equivalent. It is not necessarily the case that `index1 < index2` for
129		/// example. By convention, `index1` corresponds to the byte in the needle
130		/// that is believed to be most the predictive. Note also that because of the
131		/// requirement that the indices be both valid for the needle used to build
132		/// the pair and not equal, it follows that a pair can only be constructed for
133		/// needles with length at least 2.
134		#[derive(Clone, Copy, Debug)]
135		pub struct Pair {
136		index1: u8,
137		index2: u8,
138		}
139
140		impl Pair {
141		/// Create a new pair of offsets from the given needle.
142		///
143		/// If a pair could not be created (for example, if the needle is too
144		/// short), then `None` is returned.
145		///
146		/// This chooses the pair in the needle that is believed to be as
147		/// predictive of an overall match of the needle as possible.
148		#[inline]
149	0	pub fn new(needle: &[u8]) -> Option<Pair> {
150	0	Pair::with_ranker(needle, DefaultFrequencyRank)
151	0	}
152
153		/// Create a new pair of offsets from the given needle and ranker.
154		///
155		/// This permits the caller to choose a background frequency distribution
156		/// with which bytes are selected. The idea is to select a pair of bytes
157		/// that is believed to strongly predict a match in the haystack. This
158		/// usually means selecting bytes that occur rarely in a haystack.
159		///
160		/// If a pair could not be created (for example, if the needle is too
161		/// short), then `None` is returned.
162		#[inline]
163	0	pub fn with_ranker<R: HeuristicFrequencyRank>(
164	0	needle: &[u8],
165	0	ranker: R,
166	0	) -> Option<Pair> {
167	0	if needle.len() <= 1 {
168	0	return None;
169	0	}
170	0	// Find the rarest two bytes. We make them distinct indices by
171	0	// construction. (The actual byte value may be the same in degenerate
172	0	// cases, but that's OK.)
173	0	let (mut rare1, mut index1) = (needle[0], 0);
174	0	let (mut rare2, mut index2) = (needle[1], 1);
175	0	if ranker.rank(rare2) < ranker.rank(rare1) {
176	0	core::mem::swap(&mut rare1, &mut rare2);
177	0	core::mem::swap(&mut index1, &mut index2);
178	0	}
179	0	let max = usize::from(core::u8::MAX);
180	0	for (i, &b) in needle.iter().enumerate().take(max).skip(2) {
181	0	if ranker.rank(b) < ranker.rank(rare1) {
182	0	rare2 = rare1;
183	0	index2 = index1;
184	0	rare1 = b;
185	0	index1 = u8::try_from(i).unwrap();
186	0	} else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) {
187	0	rare2 = b;
188	0	index2 = u8::try_from(i).unwrap();
189	0	}
190		}
191		// While not strictly required for how a Pair is normally used, we
192		// really don't want these to be equivalent. If they were, it would
193		// reduce the effectiveness of candidate searching using these rare
194		// bytes by increasing the rate of false positives.
195	0	assert_ne!(index1, index2);
196	0	Some(Pair { index1, index2 })
197	0	}
198
199		/// Create a new pair using the offsets given for the needle given.
200		///
201		/// This bypasses any sort of heuristic process for choosing the offsets
202		/// and permits the caller to choose the offsets themselves.
203		///
204		/// Indices are limited to valid `u8` values so that a `Pair` uses less
205		/// memory. It is not possible to create a `Pair` with offsets bigger than
206		/// `u8::MAX`. It's likely that such a thing is not needed, but if it is,
207		/// it's suggested to build your own bespoke algorithm because you're
208		/// likely working on a very niche case. (File an issue if this suggestion
209		/// does not make sense to you.)
210		///
211		/// If a pair could not be created (for example, if the needle is too
212		/// short), then `None` is returned.
213		#[inline]
214	0	pub fn with_indices(
215	0	needle: &[u8],
216	0	index1: u8,
217	0	index2: u8,
218	0	) -> Option<Pair> {
219	0	// While not strictly required for how a Pair is normally used, we
220	0	// really don't want these to be equivalent. If they were, it would
221	0	// reduce the effectiveness of candidate searching using these rare
222	0	// bytes by increasing the rate of false positives.
223	0	if index1 == index2 {
224	0	return None;
225	0	}
226	0	// Similarly, invalid indices means the Pair is invalid too.
227	0	if usize::from(index1) >= needle.len() {
228	0	return None;
229	0	}
230	0	if usize::from(index2) >= needle.len() {
231	0	return None;
232	0	}
233	0	Some(Pair { index1, index2 })
234	0	}
235
236		/// Returns the first offset of the pair.
237		#[inline]
238	0	pub fn index1(&self) -> u8 {
239	0	self.index1
240	0	}
241
242		/// Returns the second offset of the pair.
243		#[inline]
244	0	pub fn index2(&self) -> u8 {
245	0	self.index2
246	0	}
247		}
248
249		/// This trait allows the user to customize the heuristic used to determine the
250		/// relative frequency of a given byte in the dataset being searched.
251		///
252		/// The use of this trait can have a dramatic impact on performance depending
253		/// on the type of data being searched. The details of why are explained in the
254		/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses
255		/// a prefilter to quickly identify candidate matches that are later verified
256		/// more slowly. This prefilter is implemented in terms of trying to find
257		/// `rare` bytes at specific offsets that will occur less frequently in the
258		/// dataset. While the concept of a `rare` byte is similar for most datasets,
259		/// there are some specific datasets (like binary executables) that have
260		/// dramatically different byte distributions. For these datasets customizing
261		/// the byte frequency heuristic can have a massive impact on performance, and
262		/// might even need to be done at runtime.
263		///
264		/// The default implementation of `HeuristicFrequencyRank` reads from the
265		/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This
266		/// is optimal for most inputs, so if you are unsure of the impact of using a
267		/// custom `HeuristicFrequencyRank` you should probably just use the default.
268		///
269		/// # Example
270		///
271		/// ```
272		/// use memchr::{
273		/// arch::all::packedpair::HeuristicFrequencyRank,
274		/// memmem::FinderBuilder,
275		/// };
276		///
277		/// /// A byte-frequency table that is good for scanning binary executables.
278		/// struct Binary;
279		///
280		/// impl HeuristicFrequencyRank for Binary {
281		/// fn rank(&self, byte: u8) -> u8 {
282		/// const TABLE: [u8; 256] = [
283		/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17,
284		/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
285		/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11,
286		/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
287		/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27,
288		/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
289		/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24,
290		/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
291		/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15,
292		/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
293		/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0,
294		/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
295		/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
296		/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
297		/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5,
298		/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
299		/// ];
300		/// TABLE[byte as usize]
301		/// }
302		/// }
303		/// // Create a new finder with the custom heuristic.
304		/// let finder = FinderBuilder::new()
305		/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd");
306		/// // Find needle with custom heuristic.
307		/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
308		/// ```
309		pub trait HeuristicFrequencyRank {
310		/// Return the heuristic frequency rank of the given byte. A lower rank
311		/// means the byte is believed to occur less frequently in the haystack.
312		///
313		/// Some uses of this heuristic may treat arbitrary absolute rank values as
314		/// significant. For example, an implementation detail in this crate may
315		/// determine that heuristic prefilters are inappropriate if every byte in
316		/// the needle has a "high" rank.
317		fn rank(&self, byte: u8) -> u8;
318		}
319
320		/// The default byte frequency heuristic that is good for most haystacks.
321		pub(crate) struct DefaultFrequencyRank;
322
323		impl HeuristicFrequencyRank for DefaultFrequencyRank {
324	0	fn rank(&self, byte: u8) -> u8 {
325	0	self::default_rank::RANK[usize::from(byte)]
326	0	}
327		}
328
329		/// This permits passing any implementation of `HeuristicFrequencyRank` as a
330		/// borrowed version of itself.
331		impl<'a, R> HeuristicFrequencyRank for &'a R
332		where
333		R: HeuristicFrequencyRank,
334		{
335	0	fn rank(&self, byte: u8) -> u8 {
336	0	(**self).rank(byte)
337	0	}
338		}
339
340		#[cfg(test)]
341		mod tests {
342		use super::*;
343
344		#[test]
345		fn forward_packedpair() {
346		fn find(
347		haystack: &[u8],
348		needle: &[u8],
349		_index1: u8,
350		_index2: u8,
351		) -> Option<Option<usize>> {
352		// We ignore the index positions requested since it winds up making
353		// this test too slow overall.
354		let f = Finder::new(needle)?;
355		Some(f.find_prefilter(haystack))
356		}
357		crate::tests::packedpair::Runner::new().fwd(find).run()
358		}
359		}