/build/cargo-vendor-dir/memchr-2.7.1/src/arch/x86_64/avx2/packedpair.rs
Line | Count | Source (jump to first uncovered line) |
1 | | /*! |
2 | | A 256-bit vector implementation of the "packed pair" SIMD algorithm. |
3 | | |
4 | | The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main |
5 | | difference is that it (by default) uses a background distribution of byte |
6 | | frequencies to heuristically select the pair of bytes to search for. |
7 | | |
8 | | [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last |
9 | | */ |
10 | | |
11 | | use core::arch::x86_64::{__m128i, __m256i}; |
12 | | |
13 | | use crate::arch::{all::packedpair::Pair, generic::packedpair}; |
14 | | |
15 | | /// A "packed pair" finder that uses 256-bit vector operations. |
16 | | /// |
17 | | /// This finder picks two bytes that it believes have high predictive power |
18 | | /// for indicating an overall match of a needle. Depending on whether |
19 | | /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets |
20 | | /// where the needle matches or could match. In the prefilter case, candidates |
21 | | /// are reported whenever the [`Pair`] of bytes given matches. |
22 | | #[derive(Clone, Copy, Debug)] |
23 | | pub struct Finder { |
24 | | sse2: packedpair::Finder<__m128i>, |
25 | | avx2: packedpair::Finder<__m256i>, |
26 | | } |
27 | | |
28 | | impl Finder { |
29 | | /// Create a new pair searcher. The searcher returned can either report |
30 | | /// exact matches of `needle` or act as a prefilter and report candidate |
31 | | /// positions of `needle`. |
32 | | /// |
33 | | /// If AVX2 is unavailable in the current environment or if a [`Pair`] |
34 | | /// could not be constructed from the needle given, then `None` is |
35 | | /// returned. |
36 | | #[inline] |
37 | 0 | pub fn new(needle: &[u8]) -> Option<Finder> { |
38 | 0 | Finder::with_pair(needle, Pair::new(needle)?) |
39 | 0 | } |
40 | | |
41 | | /// Create a new "packed pair" finder using the pair of bytes given. |
42 | | /// |
43 | | /// This constructor permits callers to control precisely which pair of |
44 | | /// bytes is used as a predicate. |
45 | | /// |
46 | | /// If AVX2 is unavailable in the current environment, then `None` is |
47 | | /// returned. |
48 | | #[inline] |
49 | 0 | pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> { |
50 | 0 | if Finder::is_available() { |
51 | | // SAFETY: we check that sse2/avx2 is available above. We are also |
52 | | // guaranteed to have needle.len() > 1 because we have a valid |
53 | | // Pair. |
54 | 0 | unsafe { Some(Finder::with_pair_impl(needle, pair)) } |
55 | | } else { |
56 | 0 | None |
57 | | } |
58 | 0 | } |
59 | | |
60 | | /// Create a new `Finder` specific to SSE2 vectors and routines. |
61 | | /// |
62 | | /// # Safety |
63 | | /// |
64 | | /// Same as the safety for `packedpair::Finder::new`, and callers must also |
65 | | /// ensure that both SSE2 and AVX2 are available. |
66 | | #[target_feature(enable = "sse2", enable = "avx2")] |
67 | | #[inline] |
68 | 0 | unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { |
69 | 0 | let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); |
70 | 0 | let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); |
71 | 0 | Finder { sse2, avx2 } |
72 | 0 | } |
73 | | |
74 | | /// Returns true when this implementation is available in the current |
75 | | /// environment. |
76 | | /// |
77 | | /// When this is true, it is guaranteed that [`Finder::with_pair`] will |
78 | | /// return a `Some` value. Similarly, when it is false, it is guaranteed |
79 | | /// that `Finder::with_pair` will return a `None` value. Notice that this |
80 | | /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, |
81 | | /// even when `Finder::is_available` is true, it is not guaranteed that a |
82 | | /// valid [`Pair`] can be found from the needle given. |
83 | | /// |
84 | | /// Note also that for the lifetime of a single program, if this returns |
85 | | /// true then it will always return true. |
86 | | #[inline] |
87 | 0 | pub fn is_available() -> bool { |
88 | 0 | #[cfg(not(target_feature = "sse2"))] |
89 | 0 | { |
90 | 0 | false |
91 | 0 | } |
92 | 0 | #[cfg(target_feature = "sse2")] |
93 | 0 | { |
94 | 0 | #[cfg(target_feature = "avx2")] |
95 | 0 | { |
96 | 0 | true |
97 | 0 | } |
98 | 0 | #[cfg(not(target_feature = "avx2"))] |
99 | 0 | { |
100 | 0 | #[cfg(feature = "std")] |
101 | 0 | { |
102 | 0 | std::is_x86_feature_detected!("avx2") |
103 | | } |
104 | | #[cfg(not(feature = "std"))] |
105 | | { |
106 | | false |
107 | | } |
108 | | } |
109 | | } |
110 | 0 | } |
111 | | |
112 | | /// Execute a search using AVX2 vectors and routines. |
113 | | /// |
114 | | /// # Panics |
115 | | /// |
116 | | /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. |
117 | | #[inline] |
118 | 0 | pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> { |
119 | 0 | // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. |
120 | 0 | unsafe { self.find_impl(haystack, needle) } |
121 | 0 | } |
122 | | |
123 | | /// Run this finder on the given haystack as a prefilter. |
124 | | /// |
125 | | /// If a candidate match is found, then an offset where the needle *could* |
126 | | /// begin in the haystack is returned. |
127 | | /// |
128 | | /// # Panics |
129 | | /// |
130 | | /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. |
131 | | #[inline] |
132 | 0 | pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> { |
133 | 0 | // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. |
134 | 0 | unsafe { self.find_prefilter_impl(haystack) } |
135 | 0 | } |
136 | | |
137 | | /// Execute a search using AVX2 vectors and routines. |
138 | | /// |
139 | | /// # Panics |
140 | | /// |
141 | | /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. |
142 | | /// |
143 | | /// # Safety |
144 | | /// |
145 | | /// (The target feature safety obligation is automatically fulfilled by |
146 | | /// virtue of being a method on `Finder`, which can only be constructed |
147 | | /// when it is safe to call `sse2` and `avx2` routines.) |
148 | | #[target_feature(enable = "sse2", enable = "avx2")] |
149 | | #[inline] |
150 | 0 | unsafe fn find_impl( |
151 | 0 | &self, |
152 | 0 | haystack: &[u8], |
153 | 0 | needle: &[u8], |
154 | 0 | ) -> Option<usize> { |
155 | 0 | if haystack.len() < self.avx2.min_haystack_len() { |
156 | 0 | self.sse2.find(haystack, needle) |
157 | | } else { |
158 | 0 | self.avx2.find(haystack, needle) |
159 | | } |
160 | 0 | } |
161 | | |
162 | | /// Execute a prefilter search using AVX2 vectors and routines. |
163 | | /// |
164 | | /// # Panics |
165 | | /// |
166 | | /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. |
167 | | /// |
168 | | /// # Safety |
169 | | /// |
170 | | /// (The target feature safety obligation is automatically fulfilled by |
171 | | /// virtue of being a method on `Finder`, which can only be constructed |
172 | | /// when it is safe to call `sse2` and `avx2` routines.) |
173 | | #[target_feature(enable = "sse2", enable = "avx2")] |
174 | | #[inline] |
175 | 0 | unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> { |
176 | 0 | if haystack.len() < self.avx2.min_haystack_len() { |
177 | 0 | self.sse2.find_prefilter(haystack) |
178 | | } else { |
179 | 0 | self.avx2.find_prefilter(haystack) |
180 | | } |
181 | 0 | } |
182 | | |
183 | | /// Returns the pair of offsets (into the needle) used to check as a |
184 | | /// predicate before confirming whether a needle exists at a particular |
185 | | /// position. |
186 | | #[inline] |
187 | 0 | pub fn pair(&self) -> &Pair { |
188 | 0 | self.avx2.pair() |
189 | 0 | } |
190 | | |
191 | | /// Returns the minimum haystack length that this `Finder` can search. |
192 | | /// |
193 | | /// Using a haystack with length smaller than this in a search will result |
194 | | /// in a panic. The reason for this restriction is that this finder is |
195 | | /// meant to be a low-level component that is part of a larger substring |
196 | | /// strategy. In that sense, it avoids trying to handle all cases and |
197 | | /// instead only handles the cases that it can handle very well. |
198 | | #[inline] |
199 | 0 | pub fn min_haystack_len(&self) -> usize { |
200 | 0 | // The caller doesn't need to care about AVX2's min_haystack_len |
201 | 0 | // since this implementation will automatically switch to the SSE2 |
202 | 0 | // implementation if the haystack is too short for AVX2. Therefore, the |
203 | 0 | // caller only needs to care about SSE2's min_haystack_len. |
204 | 0 | // |
205 | 0 | // This does assume that SSE2's min_haystack_len is less than or |
206 | 0 | // equal to AVX2's min_haystack_len. In practice, this is true and |
207 | 0 | // there is no way it could be false based on how this Finder is |
208 | 0 | // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If |
209 | 0 | // they used different pairs, then it's possible (although perhaps |
210 | 0 | // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. |
211 | 0 | self.sse2.min_haystack_len() |
212 | 0 | } |
213 | | } |
214 | | |
215 | | #[cfg(test)] |
216 | | mod tests { |
217 | | use super::*; |
218 | | |
219 | | fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> { |
220 | | let f = Finder::new(needle)?; |
221 | | if haystack.len() < f.min_haystack_len() { |
222 | | return None; |
223 | | } |
224 | | Some(f.find(haystack, needle)) |
225 | | } |
226 | | |
227 | | define_substring_forward_quickcheck!(find); |
228 | | |
229 | | #[test] |
230 | | fn forward_substring() { |
231 | | crate::tests::substring::Runner::new().fwd(find).run() |
232 | | } |
233 | | |
234 | | #[test] |
235 | | fn forward_packedpair() { |
236 | | fn find( |
237 | | haystack: &[u8], |
238 | | needle: &[u8], |
239 | | index1: u8, |
240 | | index2: u8, |
241 | | ) -> Option<Option<usize>> { |
242 | | let pair = Pair::with_indices(needle, index1, index2)?; |
243 | | let f = Finder::with_pair(needle, pair)?; |
244 | | if haystack.len() < f.min_haystack_len() { |
245 | | return None; |
246 | | } |
247 | | Some(f.find(haystack, needle)) |
248 | | } |
249 | | crate::tests::packedpair::Runner::new().fwd(find).run() |
250 | | } |
251 | | |
252 | | #[test] |
253 | | fn forward_packedpair_prefilter() { |
254 | | fn find( |
255 | | haystack: &[u8], |
256 | | needle: &[u8], |
257 | | index1: u8, |
258 | | index2: u8, |
259 | | ) -> Option<Option<usize>> { |
260 | | if !cfg!(target_feature = "sse2") { |
261 | | return None; |
262 | | } |
263 | | let pair = Pair::with_indices(needle, index1, index2)?; |
264 | | let f = Finder::with_pair(needle, pair)?; |
265 | | if haystack.len() < f.min_haystack_len() { |
266 | | return None; |
267 | | } |
268 | | Some(f.find_prefilter(haystack)) |
269 | | } |
270 | | crate::tests::packedpair::Runner::new().fwd(find).run() |
271 | | } |
272 | | } |