/build/cargo-vendor-dir/regex-automata-0.4.9/src/dfa/remapper.rs
Line | Count | Source |
1 | | use alloc::vec::Vec; |
2 | | |
3 | | use crate::util::primitives::StateID; |
4 | | |
5 | | /// Remappable is a tightly coupled abstraction that facilitates remapping |
6 | | /// state identifiers in DFAs. |
7 | | /// |
8 | | /// The main idea behind remapping state IDs is that DFAs often need to check |
9 | | /// if a certain state is a "special" state of some kind (like a match state) |
10 | | /// during a search. Since this is extremely perf critical code, we want this |
11 | | /// check to be as fast as possible. Partitioning state IDs into, for example, |
12 | | /// into "non-match" and "match" states means one can tell if a state is a |
13 | | /// match state via a simple comparison of the state ID. |
14 | | /// |
15 | | /// The issue is that during the DFA construction process, it's not |
16 | | /// particularly easy to partition the states. Instead, the simplest thing is |
17 | | /// to often just do a pass over all of the states and shuffle them into their |
18 | | /// desired partitionings. To do that, we need a mechanism for swapping states. |
19 | | /// Hence, this abstraction. |
20 | | /// |
21 | | /// Normally, for such little code, I would just duplicate it. But this is a |
22 | | /// key optimization and the implementation is a bit subtle. So the abstraction |
23 | | /// is basically a ham-fisted attempt at DRY. The only place we use this is in |
24 | | /// the dense and one-pass DFAs. |
25 | | /// |
26 | | /// See also src/dfa/special.rs for a more detailed explanation of how dense |
27 | | /// DFAs are partitioned. |
28 | | pub(super) trait Remappable: core::fmt::Debug { |
29 | | /// Return the total number of states. |
30 | | fn state_len(&self) -> usize; |
31 | | /// Return the power-of-2 exponent that yields the stride. The pertinent |
32 | | /// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride. |
33 | | fn stride2(&self) -> usize; |
34 | | /// Swap the states pointed to by the given IDs. The underlying finite |
35 | | /// state machine should be mutated such that all of the transitions in |
36 | | /// `id1` are now in the memory region where the transitions for `id2` |
37 | | /// were, and all of the transitions in `id2` are now in the memory region |
38 | | /// where the transitions for `id1` were. |
39 | | /// |
40 | | /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`. |
41 | | /// |
42 | | /// It is expected that, after calling this, the underlying value will be |
43 | | /// left in an inconsistent state, since any other transitions pointing to, |
44 | | /// e.g., `id1` need to be updated to point to `id2`, since that's where |
45 | | /// `id1` moved to. |
46 | | /// |
47 | | /// In order to "fix" the underlying inconsistent state, a `Remapper` |
48 | | /// should be used to guarantee that `remap` is called at the appropriate |
49 | | /// time. |
50 | | fn swap_states(&mut self, id1: StateID, id2: StateID); |
51 | | /// This must remap every single state ID in the underlying value according |
52 | | /// to the function given. For example, in a DFA, this should remap every |
53 | | /// transition and every starting state ID. |
54 | | fn remap(&mut self, map: impl Fn(StateID) -> StateID); |
55 | | } |
56 | | |
57 | | /// Remapper is an abstraction the manages the remapping of state IDs in a |
58 | | /// finite state machine. This is useful when one wants to shuffle states into |
59 | | /// different positions in the machine. |
60 | | /// |
61 | | /// One of the key complexities this manages is the ability to correctly move |
62 | | /// one state multiple times. |
63 | | /// |
64 | | /// Once shuffling is complete, `remap` must be called, which will rewrite |
65 | | /// all pertinent transitions to updated state IDs. Neglecting to call `remap` |
66 | | /// will almost certainly result in a corrupt machine. |
67 | | #[derive(Debug)] |
68 | | pub(super) struct Remapper { |
69 | | /// A map from the index of a state to its pre-multiplied identifier. |
70 | | /// |
71 | | /// When a state is swapped with another, then their corresponding |
72 | | /// locations in this map are also swapped. Thus, its new position will |
73 | | /// still point to its old pre-multiplied StateID. |
74 | | /// |
75 | | /// While there is a bit more to it, this then allows us to rewrite the |
76 | | /// state IDs in a DFA's transition table in a single pass. This is done |
77 | | /// by iterating over every ID in this map, then iterating over each |
78 | | /// transition for the state at that ID and re-mapping the transition from |
79 | | /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position |
80 | | /// in this map where `old_id` *started*, and set it to where it ended up |
81 | | /// after all swaps have been completed. |
82 | | map: Vec<StateID>, |
83 | | /// A mapper from state index to state ID (and back). |
84 | | idxmap: IndexMapper, |
85 | | } |
86 | | |
87 | | impl Remapper { |
88 | | /// Create a new remapper from the given remappable implementation. The |
89 | | /// remapper can then be used to swap states. The remappable value given |
90 | | /// here must the same one given to `swap` and `remap`. |
91 | 1 | pub(super) fn new(r: &impl Remappable) -> Remapper { |
92 | 1 | let idxmap = IndexMapper { stride2: r.stride2() }; |
93 | 23 | let map = (0..r.state_len()).map(|i| idxmap.to_state_id(i)).collect(); |
94 | 1 | Remapper { map, idxmap } |
95 | 1 | } |
96 | | |
97 | | /// Swap two states. Once this is called, callers must follow through to |
98 | | /// call `remap`, or else it's possible for the underlying remappable |
99 | | /// value to be in a corrupt state. |
100 | 2 | pub(super) fn swap( |
101 | 2 | &mut self, |
102 | 2 | r: &mut impl Remappable, |
103 | 2 | id1: StateID, |
104 | 2 | id2: StateID, |
105 | 2 | ) { |
106 | 2 | if id1 == id2 { |
107 | 0 | return; |
108 | 2 | } |
109 | 2 | r.swap_states(id1, id2); |
110 | 2 | self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2)); |
111 | 2 | } |
112 | | |
113 | | /// Complete the remapping process by rewriting all state IDs in the |
114 | | /// remappable value according to the swaps performed. |
115 | 1 | pub(super) fn remap(mut self, r: &mut impl Remappable) { |
116 | 1 | // Update the map to account for states that have been swapped |
117 | 1 | // multiple times. For example, if (A, C) and (C, G) are swapped, then |
118 | 1 | // transitions previously pointing to A should now point to G. But if |
119 | 1 | // we don't update our map, they will erroneously be set to C. All we |
120 | 1 | // do is follow the swaps in our map until we see our original state |
121 | 1 | // ID. |
122 | 1 | // |
123 | 1 | // The intuition here is to think about how changes are made to the |
124 | 1 | // map: only through pairwise swaps. That means that starting at any |
125 | 1 | // given state, it is always possible to find the loop back to that |
126 | 1 | // state by following the swaps represented in the map (which might be |
127 | 1 | // 0 swaps). |
128 | 1 | // |
129 | 1 | // We are also careful to clone the map before starting in order to |
130 | 1 | // freeze it. We use the frozen map to find our loops, since we need to |
131 | 1 | // update our map as well. Without freezing it, our updates could break |
132 | 1 | // the loops referenced above and produce incorrect results. |
133 | 1 | let oldmap = self.map.clone(); |
134 | 23 | for i in 0..r.state_len()1 { |
135 | 23 | let cur_id = self.idxmap.to_state_id(i); |
136 | 23 | let mut new_id = oldmap[i]; |
137 | 23 | if cur_id == new_id { |
138 | 19 | continue; |
139 | 4 | } |
140 | | loop { |
141 | 4 | let id = oldmap[self.idxmap.to_index(new_id)]; |
142 | 4 | if cur_id == id { |
143 | 4 | self.map[i] = new_id; |
144 | 4 | break; |
145 | 0 | } |
146 | 0 | new_id = id; |
147 | | } |
148 | | } |
149 | 761 | r.remap(1 |next| self.map[self.idxmap.to_index(next)]); |
150 | 1 | } |
151 | | } |
152 | | |
153 | | /// A simple type for mapping between state indices and state IDs. |
154 | | /// |
155 | | /// The reason why this exists is because state IDs are "premultiplied." That |
156 | | /// is, in order to get to the transitions for a particular state, one need |
157 | | /// only use the state ID as-is, instead of having to multiple it by transition |
158 | | /// table's stride. |
159 | | /// |
160 | | /// The downside of this is that it's inconvenient to map between state IDs |
161 | | /// using a dense map, e.g., Vec<StateID>. That's because state IDs look like |
162 | | /// `0`, `0+stride`, `0+2*stride`, `0+3*stride`, etc., instead of `0`, `1`, |
163 | | /// `2`, `3`, etc. |
164 | | /// |
165 | | /// Since our state IDs are premultiplied, we can convert back-and-forth |
166 | | /// between IDs and indices by simply unmultiplying the IDs and multiplying the |
167 | | /// indices. |
168 | | #[derive(Debug)] |
169 | | struct IndexMapper { |
170 | | /// The power of 2 corresponding to the stride of the corresponding |
171 | | /// transition table. 'id >> stride2' de-multiplies an ID while 'index << |
172 | | /// stride2' pre-multiplies an index to an ID. |
173 | | stride2: usize, |
174 | | } |
175 | | |
176 | | impl IndexMapper { |
177 | | /// Convert a state ID to a state index. |
178 | 769 | fn to_index(&self, id: StateID) -> usize { |
179 | 769 | id.as_usize() >> self.stride2 |
180 | 769 | } |
181 | | |
182 | | /// Convert a state index to a state ID. |
183 | 46 | fn to_state_id(&self, index: usize) -> StateID { |
184 | 46 | // CORRECTNESS: If the given index is not valid, then it is not |
185 | 46 | // required for this to panic or return a valid state ID. We'll "just" |
186 | 46 | // wind up with panics or silent logic errors at some other point. |
187 | 46 | StateID::new_unchecked(index << self.stride2) |
188 | 46 | } |
189 | | } |
190 | | |
191 | | #[cfg(feature = "dfa-build")] |
192 | | mod dense { |
193 | | use crate::{dfa::dense::OwnedDFA, util::primitives::StateID}; |
194 | | |
195 | | use super::Remappable; |
196 | | |
197 | | impl Remappable for OwnedDFA { |
198 | | fn state_len(&self) -> usize { |
199 | | OwnedDFA::state_len(self) |
200 | | } |
201 | | |
202 | | fn stride2(&self) -> usize { |
203 | | OwnedDFA::stride2(self) |
204 | | } |
205 | | |
206 | | fn swap_states(&mut self, id1: StateID, id2: StateID) { |
207 | | OwnedDFA::swap_states(self, id1, id2) |
208 | | } |
209 | | |
210 | | fn remap(&mut self, map: impl Fn(StateID) -> StateID) { |
211 | | OwnedDFA::remap(self, map) |
212 | | } |
213 | | } |
214 | | } |
215 | | |
216 | | #[cfg(feature = "dfa-onepass")] |
217 | | mod onepass { |
218 | | use crate::{dfa::onepass::DFA, util::primitives::StateID}; |
219 | | |
220 | | use super::Remappable; |
221 | | |
222 | | impl Remappable for DFA { |
223 | 2 | fn state_len(&self) -> usize { |
224 | 2 | DFA::state_len(self) |
225 | 2 | } |
226 | | |
227 | 1 | fn stride2(&self) -> usize { |
228 | 1 | // We don't do pre-multiplication for the one-pass DFA, so |
229 | 1 | // returning 0 has the effect of making state IDs and state indices |
230 | 1 | // equivalent. |
231 | 1 | 0 |
232 | 1 | } |
233 | | |
234 | 2 | fn swap_states(&mut self, id1: StateID, id2: StateID) { |
235 | 2 | DFA::swap_states(self, id1, id2) |
236 | 2 | } |
237 | | |
238 | 1 | fn remap(&mut self, map: impl Fn(StateID) -> StateID) { |
239 | 1 | DFA::remap(self, map) |
240 | 1 | } |
241 | | } |
242 | | } |