/build/cargo-vendor-dir/memchr-2.7.1/src/arch/all/packedpair/mod.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | Provides an architecture independent implementation of the "packed pair" |
3 | | algorithm. |
4 | | |
5 | | The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main |
6 | | difference is that it (by default) uses a background distribution of byte |
7 | | frequencies to heuristically select the pair of bytes to search for. Note that |
8 | | this module provides an architecture independent version that doesn't do as |
9 | | good of a job keeping the search for candidates inside a SIMD hot path. It |
10 | | however can be good enough in many circumstances. |
11 | | |
12 | | [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last |
13 | | */ |
14 | | |
15 | | use crate::memchr; |
16 | | |
17 | | mod default_rank; |
18 | | |
19 | | /// An architecture independent "packed pair" finder. |
20 | | /// |
21 | | /// This finder picks two bytes that it believes have high predictive power for |
22 | | /// indicating an overall match of a needle. At search time, it reports offsets |
23 | | /// where the needle could match based on whether the pair of bytes it chose |
24 | | /// match. |
25 | | /// |
26 | | /// This is architecture independent because it utilizes `memchr` to find the |
27 | | /// occurrence of one of the bytes in the pair, and then checks whether the |
28 | | /// second byte matches. If it does, in the case of [`Finder::find_prefilter`], |
29 | | /// the location at which the needle could match is returned. |
30 | | /// |
31 | | /// It is generally preferred to use architecture specific routines for a |
32 | | /// "packed pair" prefilter, but this can be a useful fallback when the |
33 | | /// architecture independent routines are unavailable. |
34 | | #[derive(Clone, Copy, Debug)] |
35 | | pub struct Finder { |
36 | | pair: Pair, |
37 | | byte1: u8, |
38 | | byte2: u8, |
39 | | } |
40 | | |
41 | | impl Finder { |
42 | | /// Create a new prefilter that reports possible locations where the given |
43 | | /// needle matches. |
44 | | #[inline] |
45 | 0 | pub fn new(needle: &[u8]) -> Option<Finder> { |
46 | 0 | Finder::with_pair(needle, Pair::new(needle)?) |
47 | 0 | } |
48 | | |
49 | | /// Create a new prefilter using the pair given. |
50 | | /// |
51 | | /// If the prefilter could not be constructed, then `None` is returned. |
52 | | /// |
53 | | /// This constructor permits callers to control precisely which pair of |
54 | | /// bytes is used as a predicate. |
55 | | #[inline] |
56 | 0 | pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { |
57 | 0 | let byte1 = needle[usize::from(pair.index1())]; |
58 | 0 | let byte2 = needle[usize::from(pair.index2())]; |
59 | 0 | // Currently this can never fail so we could just return a Finder, |
60 | 0 | // but it's conceivable this could change. |
61 | 0 | Some(Finder { pair, byte1, byte2 }) |
62 | 0 | } |
63 | | |
64 | | /// Run this finder on the given haystack as a prefilter. |
65 | | /// |
66 | | /// If a candidate match is found, then an offset where the needle *could* |
67 | | /// begin in the haystack is returned. |
68 | | #[inline] |
69 | 0 | pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { |
70 | 0 | let mut i = 0; |
71 | 0 | let index1 = usize::from(self.pair.index1()); |
72 | 0 | let index2 = usize::from(self.pair.index2()); |
73 | | loop { |
74 | | // Use a fast vectorized implementation to skip to the next |
75 | | // occurrence of the rarest byte (heuristically chosen) in the |
76 | | // needle. |
77 | 0 | i += memchr(self.byte1, &haystack[i..])?; |
78 | 0 | let found = i; |
79 | 0 | i += 1; |
80 | | |
81 | | // If we can't align our first byte match with the haystack, then a |
82 | | // match is impossible. |
83 | 0 | let aligned1 = match found.checked_sub(index1) { |
84 | 0 | None => continue, |
85 | 0 | Some(aligned1) => aligned1, |
86 | | }; |
87 | | |
88 | | // Now align the second byte match with the haystack. A mismatch |
89 | | // means that a match is impossible. |
90 | 0 | let aligned2 = match aligned1.checked_add(index2) { |
91 | 0 | None => continue, |
92 | 0 | Some(aligned_index2) => aligned_index2, |
93 | 0 | }; |
94 | 0 | if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) { |
95 | 0 | continue; |
96 | 0 | } |
97 | 0 |
|
98 | 0 | // We've done what we can. There might be a match here. |
99 | 0 | return Some(aligned1); |
100 | | } |
101 | 0 | } |
102 | | |
103 | | /// Returns the pair of offsets (into the needle) used to check as a |
104 | | /// predicate before confirming whether a needle exists at a particular |
105 | | /// position. |
106 | | #[inline] |
107 | 0 | pub fn pair(&self) -> &Pair { |
108 | 0 | &self.pair |
109 | 0 | } |
110 | | } |
111 | | |
112 | | /// A pair of byte offsets into a needle to use as a predicate. |
113 | | /// |
114 | | /// This pair is used as a predicate to quickly filter out positions in a |
115 | | /// haystack in which a needle cannot match. In some cases, this pair can even |
116 | | /// be used in vector algorithms such that the vector algorithm only switches |
117 | | /// over to scalar code once this pair has been found. |
118 | | /// |
119 | | /// A pair of offsets can be used in both substring search implementations and |
120 | | /// in prefilters. The former will report matches of a needle in a haystack |
121 | | /// where as the latter will only report possible matches of a needle. |
122 | | /// |
123 | | /// The offsets are limited each to a maximum of 255 to keep memory usage low. |
124 | | /// Moreover, it's rarely advantageous to create a predicate using offsets |
125 | | /// greater than 255 anyway. |
126 | | /// |
127 | | /// The only guarantee enforced on the pair of offsets is that they are not |
128 | | /// equivalent. It is not necessarily the case that `index1 < index2` for |
129 | | /// example. By convention, `index1` corresponds to the byte in the needle |
130 | | /// that is believed to be most the predictive. Note also that because of the |
131 | | /// requirement that the indices be both valid for the needle used to build |
132 | | /// the pair and not equal, it follows that a pair can only be constructed for |
133 | | /// needles with length at least 2. |
134 | | #[derive(Clone, Copy, Debug)] |
135 | | pub struct Pair { |
136 | | index1: u8, |
137 | | index2: u8, |
138 | | } |
139 | | |
140 | | impl Pair { |
141 | | /// Create a new pair of offsets from the given needle. |
142 | | /// |
143 | | /// If a pair could not be created (for example, if the needle is too |
144 | | /// short), then `None` is returned. |
145 | | /// |
146 | | /// This chooses the pair in the needle that is believed to be as |
147 | | /// predictive of an overall match of the needle as possible. |
148 | | #[inline] |
149 | 0 | pub fn new(needle: &[u8]) -> Option<Pair> { |
150 | 0 | Pair::with_ranker(needle, DefaultFrequencyRank) |
151 | 0 | } |
152 | | |
153 | | /// Create a new pair of offsets from the given needle and ranker. |
154 | | /// |
155 | | /// This permits the caller to choose a background frequency distribution |
156 | | /// with which bytes are selected. The idea is to select a pair of bytes |
157 | | /// that is believed to strongly predict a match in the haystack. This |
158 | | /// usually means selecting bytes that occur rarely in a haystack. |
159 | | /// |
160 | | /// If a pair could not be created (for example, if the needle is too |
161 | | /// short), then `None` is returned. |
162 | | #[inline] |
163 | 0 | pub fn with_ranker<R: HeuristicFrequencyRank>( |
164 | 0 | needle: &[u8], |
165 | 0 | ranker: R, |
166 | 0 | ) -> Option<Pair> { |
167 | 0 | if needle.len() <= 1 { |
168 | 0 | return None; |
169 | 0 | } |
170 | 0 | // Find the rarest two bytes. We make them distinct indices by |
171 | 0 | // construction. (The actual byte value may be the same in degenerate |
172 | 0 | // cases, but that's OK.) |
173 | 0 | let (mut rare1, mut index1) = (needle[0], 0); |
174 | 0 | let (mut rare2, mut index2) = (needle[1], 1); |
175 | 0 | if ranker.rank(rare2) < ranker.rank(rare1) { |
176 | 0 | core::mem::swap(&mut rare1, &mut rare2); |
177 | 0 | core::mem::swap(&mut index1, &mut index2); |
178 | 0 | } |
179 | 0 | let max = usize::from(core::u8::MAX); |
180 | 0 | for (i, &b) in needle.iter().enumerate().take(max).skip(2) { |
181 | 0 | if ranker.rank(b) < ranker.rank(rare1) { |
182 | 0 | rare2 = rare1; |
183 | 0 | index2 = index1; |
184 | 0 | rare1 = b; |
185 | 0 | index1 = u8::try_from(i).unwrap(); |
186 | 0 | } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) { |
187 | 0 | rare2 = b; |
188 | 0 | index2 = u8::try_from(i).unwrap(); |
189 | 0 | } |
190 | | } |
191 | | // While not strictly required for how a Pair is normally used, we |
192 | | // really don't want these to be equivalent. If they were, it would |
193 | | // reduce the effectiveness of candidate searching using these rare |
194 | | // bytes by increasing the rate of false positives. |
195 | 0 | assert_ne!(index1, index2); |
196 | 0 | Some(Pair { index1, index2 }) |
197 | 0 | } |
198 | | |
199 | | /// Create a new pair using the offsets given for the needle given. |
200 | | /// |
201 | | /// This bypasses any sort of heuristic process for choosing the offsets |
202 | | /// and permits the caller to choose the offsets themselves. |
203 | | /// |
204 | | /// Indices are limited to valid `u8` values so that a `Pair` uses less |
205 | | /// memory. It is not possible to create a `Pair` with offsets bigger than |
206 | | /// `u8::MAX`. It's likely that such a thing is not needed, but if it is, |
207 | | /// it's suggested to build your own bespoke algorithm because you're |
208 | | /// likely working on a very niche case. (File an issue if this suggestion |
209 | | /// does not make sense to you.) |
210 | | /// |
211 | | /// If a pair could not be created (for example, if the needle is too |
212 | | /// short), then `None` is returned. |
213 | | #[inline] |
214 | 0 | pub fn with_indices( |
215 | 0 | needle: &[u8], |
216 | 0 | index1: u8, |
217 | 0 | index2: u8, |
218 | 0 | ) -> Option<Pair> { |
219 | 0 | // While not strictly required for how a Pair is normally used, we |
220 | 0 | // really don't want these to be equivalent. If they were, it would |
221 | 0 | // reduce the effectiveness of candidate searching using these rare |
222 | 0 | // bytes by increasing the rate of false positives. |
223 | 0 | if index1 == index2 { |
224 | 0 | return None; |
225 | 0 | } |
226 | 0 | // Similarly, invalid indices means the Pair is invalid too. |
227 | 0 | if usize::from(index1) >= needle.len() { |
228 | 0 | return None; |
229 | 0 | } |
230 | 0 | if usize::from(index2) >= needle.len() { |
231 | 0 | return None; |
232 | 0 | } |
233 | 0 | Some(Pair { index1, index2 }) |
234 | 0 | } |
235 | | |
236 | | /// Returns the first offset of the pair. |
237 | | #[inline] |
238 | 0 | pub fn index1(&self) -> u8 { |
239 | 0 | self.index1 |
240 | 0 | } |
241 | | |
242 | | /// Returns the second offset of the pair. |
243 | | #[inline] |
244 | 0 | pub fn index2(&self) -> u8 { |
245 | 0 | self.index2 |
246 | 0 | } |
247 | | } |
248 | | |
249 | | /// This trait allows the user to customize the heuristic used to determine the |
250 | | /// relative frequency of a given byte in the dataset being searched. |
251 | | /// |
252 | | /// The use of this trait can have a dramatic impact on performance depending |
253 | | /// on the type of data being searched. The details of why are explained in the |
254 | | /// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses |
255 | | /// a prefilter to quickly identify candidate matches that are later verified |
256 | | /// more slowly. This prefilter is implemented in terms of trying to find |
257 | | /// `rare` bytes at specific offsets that will occur less frequently in the |
258 | | /// dataset. While the concept of a `rare` byte is similar for most datasets, |
259 | | /// there are some specific datasets (like binary executables) that have |
260 | | /// dramatically different byte distributions. For these datasets customizing |
261 | | /// the byte frequency heuristic can have a massive impact on performance, and |
262 | | /// might even need to be done at runtime. |
263 | | /// |
264 | | /// The default implementation of `HeuristicFrequencyRank` reads from the |
265 | | /// static frequency table defined in `src/memmem/byte_frequencies.rs`. This |
266 | | /// is optimal for most inputs, so if you are unsure of the impact of using a |
267 | | /// custom `HeuristicFrequencyRank` you should probably just use the default. |
268 | | /// |
269 | | /// # Example |
270 | | /// |
271 | | /// ``` |
272 | | /// use memchr::{ |
273 | | /// arch::all::packedpair::HeuristicFrequencyRank, |
274 | | /// memmem::FinderBuilder, |
275 | | /// }; |
276 | | /// |
277 | | /// /// A byte-frequency table that is good for scanning binary executables. |
278 | | /// struct Binary; |
279 | | /// |
280 | | /// impl HeuristicFrequencyRank for Binary { |
281 | | /// fn rank(&self, byte: u8) -> u8 { |
282 | | /// const TABLE: [u8; 256] = [ |
283 | | /// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, |
284 | | /// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, |
285 | | /// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, |
286 | | /// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, |
287 | | /// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, |
288 | | /// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, |
289 | | /// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, |
290 | | /// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, |
291 | | /// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, |
292 | | /// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, |
293 | | /// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, |
294 | | /// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, |
295 | | /// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, |
296 | | /// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, |
297 | | /// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, |
298 | | /// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, |
299 | | /// ]; |
300 | | /// TABLE[byte as usize] |
301 | | /// } |
302 | | /// } |
303 | | /// // Create a new finder with the custom heuristic. |
304 | | /// let finder = FinderBuilder::new() |
305 | | /// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); |
306 | | /// // Find needle with custom heuristic. |
307 | | /// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); |
308 | | /// ``` |
309 | | pub trait HeuristicFrequencyRank { |
310 | | /// Return the heuristic frequency rank of the given byte. A lower rank |
311 | | /// means the byte is believed to occur less frequently in the haystack. |
312 | | /// |
313 | | /// Some uses of this heuristic may treat arbitrary absolute rank values as |
314 | | /// significant. For example, an implementation detail in this crate may |
315 | | /// determine that heuristic prefilters are inappropriate if every byte in |
316 | | /// the needle has a "high" rank. |
317 | | fn rank(&self, byte: u8) -> u8; |
318 | | } |
319 | | |
320 | | /// The default byte frequency heuristic that is good for most haystacks. |
321 | | pub(crate) struct DefaultFrequencyRank; |
322 | | |
323 | | impl HeuristicFrequencyRank for DefaultFrequencyRank { |
324 | 0 | fn rank(&self, byte: u8) -> u8 { |
325 | 0 | self::default_rank::RANK[usize::from(byte)] |
326 | 0 | } |
327 | | } |
328 | | |
329 | | /// This permits passing any implementation of `HeuristicFrequencyRank` as a |
330 | | /// borrowed version of itself. |
331 | | impl<'a, R> HeuristicFrequencyRank for &'a R |
332 | | where |
333 | | R: HeuristicFrequencyRank, |
334 | | { |
335 | 0 | fn rank(&self, byte: u8) -> u8 { |
336 | 0 | (**self).rank(byte) |
337 | 0 | } |
338 | | } |
339 | | |
340 | | #[cfg(test)] |
341 | | mod tests { |
342 | | use super::*; |
343 | | |
344 | | #[test] |
345 | | fn forward_packedpair() { |
346 | | fn find( |
347 | | haystack: &[u8], |
348 | | needle: &[u8], |
349 | | _index1: u8, |
350 | | _index2: u8, |
351 | | ) -> Option<Option<usize>> { |
352 | | // We ignore the index positions requested since it winds up making |
353 | | // this test too slow overall. |
354 | | let f = Finder::new(needle)?; |
355 | | Some(f.find_prefilter(haystack)) |
356 | | } |
357 | | crate::tests::packedpair::Runner::new().fwd(find).run() |
358 | | } |
359 | | } |