Coverage Report

Created: 2025-06-23 13:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/cargo-vendor-dir/regex-automata-0.4.9/src/dfa/remapper.rs
Line
Count
Source
1
use alloc::vec::Vec;
2
3
use crate::util::primitives::StateID;
4
5
/// Remappable is a tightly coupled abstraction that facilitates remapping
6
/// state identifiers in DFAs.
7
///
8
/// The main idea behind remapping state IDs is that DFAs often need to check
9
/// if a certain state is a "special" state of some kind (like a match state)
10
/// during a search. Since this is extremely perf critical code, we want this
11
/// check to be as fast as possible. Partitioning state IDs into, for example,
12
/// into "non-match" and "match" states means one can tell if a state is a
13
/// match state via a simple comparison of the state ID.
14
///
15
/// The issue is that during the DFA construction process, it's not
16
/// particularly easy to partition the states. Instead, the simplest thing is
17
/// to often just do a pass over all of the states and shuffle them into their
18
/// desired partitionings. To do that, we need a mechanism for swapping states.
19
/// Hence, this abstraction.
20
///
21
/// Normally, for such little code, I would just duplicate it. But this is a
22
/// key optimization and the implementation is a bit subtle. So the abstraction
23
/// is basically a ham-fisted attempt at DRY. The only place we use this is in
24
/// the dense and one-pass DFAs.
25
///
26
/// See also src/dfa/special.rs for a more detailed explanation of how dense
27
/// DFAs are partitioned.
28
pub(super) trait Remappable: core::fmt::Debug {
29
    /// Return the total number of states.
30
    fn state_len(&self) -> usize;
31
    /// Return the power-of-2 exponent that yields the stride. The pertinent
32
    /// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride.
33
    fn stride2(&self) -> usize;
34
    /// Swap the states pointed to by the given IDs. The underlying finite
35
    /// state machine should be mutated such that all of the transitions in
36
    /// `id1` are now in the memory region where the transitions for `id2`
37
    /// were, and all of the transitions in `id2` are now in the memory region
38
    /// where the transitions for `id1` were.
39
    ///
40
    /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
41
    ///
42
    /// It is expected that, after calling this, the underlying value will be
43
    /// left in an inconsistent state, since any other transitions pointing to,
44
    /// e.g., `id1` need to be updated to point to `id2`, since that's where
45
    /// `id1` moved to.
46
    ///
47
    /// In order to "fix" the underlying inconsistent state, a `Remapper`
48
    /// should be used to guarantee that `remap` is called at the appropriate
49
    /// time.
50
    fn swap_states(&mut self, id1: StateID, id2: StateID);
51
    /// This must remap every single state ID in the underlying value according
52
    /// to the function given. For example, in a DFA, this should remap every
53
    /// transition and every starting state ID.
54
    fn remap(&mut self, map: impl Fn(StateID) -> StateID);
55
}
56
57
/// Remapper is an abstraction the manages the remapping of state IDs in a
58
/// finite state machine. This is useful when one wants to shuffle states into
59
/// different positions in the machine.
60
///
61
/// One of the key complexities this manages is the ability to correctly move
62
/// one state multiple times.
63
///
64
/// Once shuffling is complete, `remap` must be called, which will rewrite
65
/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
66
/// will almost certainly result in a corrupt machine.
67
#[derive(Debug)]
68
pub(super) struct Remapper {
69
    /// A map from the index of a state to its pre-multiplied identifier.
70
    ///
71
    /// When a state is swapped with another, then their corresponding
72
    /// locations in this map are also swapped. Thus, its new position will
73
    /// still point to its old pre-multiplied StateID.
74
    ///
75
    /// While there is a bit more to it, this then allows us to rewrite the
76
    /// state IDs in a DFA's transition table in a single pass. This is done
77
    /// by iterating over every ID in this map, then iterating over each
78
    /// transition for the state at that ID and re-mapping the transition from
79
    /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
80
    /// in this map where `old_id` *started*, and set it to where it ended up
81
    /// after all swaps have been completed.
82
    map: Vec<StateID>,
83
    /// A mapper from state index to state ID (and back).
84
    idxmap: IndexMapper,
85
}
86
87
impl Remapper {
88
    /// Create a new remapper from the given remappable implementation. The
89
    /// remapper can then be used to swap states. The remappable value given
90
    /// here must the same one given to `swap` and `remap`.
91
1
    pub(super) fn new(r: &impl Remappable) -> Remapper {
92
1
        let idxmap = IndexMapper { stride2: r.stride2() };
93
23
        let map = (0..r.state_len()).map(|i| idxmap.to_state_id(i)).collect();
94
1
        Remapper { map, idxmap }
95
1
    }
96
97
    /// Swap two states. Once this is called, callers must follow through to
98
    /// call `remap`, or else it's possible for the underlying remappable
99
    /// value to be in a corrupt state.
100
2
    pub(super) fn swap(
101
2
        &mut self,
102
2
        r: &mut impl Remappable,
103
2
        id1: StateID,
104
2
        id2: StateID,
105
2
    ) {
106
2
        if id1 == id2 {
107
0
            return;
108
2
        }
109
2
        r.swap_states(id1, id2);
110
2
        self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2));
111
2
    }
112
113
    /// Complete the remapping process by rewriting all state IDs in the
114
    /// remappable value according to the swaps performed.
115
1
    pub(super) fn remap(mut self, r: &mut impl Remappable) {
116
1
        // Update the map to account for states that have been swapped
117
1
        // multiple times. For example, if (A, C) and (C, G) are swapped, then
118
1
        // transitions previously pointing to A should now point to G. But if
119
1
        // we don't update our map, they will erroneously be set to C. All we
120
1
        // do is follow the swaps in our map until we see our original state
121
1
        // ID.
122
1
        //
123
1
        // The intuition here is to think about how changes are made to the
124
1
        // map: only through pairwise swaps. That means that starting at any
125
1
        // given state, it is always possible to find the loop back to that
126
1
        // state by following the swaps represented in the map (which might be
127
1
        // 0 swaps).
128
1
        //
129
1
        // We are also careful to clone the map before starting in order to
130
1
        // freeze it. We use the frozen map to find our loops, since we need to
131
1
        // update our map as well. Without freezing it, our updates could break
132
1
        // the loops referenced above and produce incorrect results.
133
1
        let oldmap = self.map.clone();
134
23
        for i in 0..
r.state_len()1
{
135
23
            let cur_id = self.idxmap.to_state_id(i);
136
23
            let mut new_id = oldmap[i];
137
23
            if cur_id == new_id {
138
19
                continue;
139
4
            }
140
            loop {
141
4
                let id = oldmap[self.idxmap.to_index(new_id)];
142
4
                if cur_id == id {
143
4
                    self.map[i] = new_id;
144
4
                    break;
145
0
                }
146
0
                new_id = id;
147
            }
148
        }
149
761
        
r.remap(1
|next| self.map[self.idxmap.to_index(next)]);
150
1
    }
151
}
152
153
/// A simple type for mapping between state indices and state IDs.
154
///
155
/// The reason why this exists is because state IDs are "premultiplied." That
156
/// is, in order to get to the transitions for a particular state, one need
157
/// only use the state ID as-is, instead of having to multiple it by transition
158
/// table's stride.
159
///
160
/// The downside of this is that it's inconvenient to map between state IDs
161
/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
162
/// `0`, `0+stride`, `0+2*stride`, `0+3*stride`, etc., instead of `0`, `1`,
163
/// `2`, `3`, etc.
164
///
165
/// Since our state IDs are premultiplied, we can convert back-and-forth
166
/// between IDs and indices by simply unmultiplying the IDs and multiplying the
167
/// indices.
168
#[derive(Debug)]
169
struct IndexMapper {
170
    /// The power of 2 corresponding to the stride of the corresponding
171
    /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
172
    /// stride2' pre-multiplies an index to an ID.
173
    stride2: usize,
174
}
175
176
impl IndexMapper {
177
    /// Convert a state ID to a state index.
178
769
    fn to_index(&self, id: StateID) -> usize {
179
769
        id.as_usize() >> self.stride2
180
769
    }
181
182
    /// Convert a state index to a state ID.
183
46
    fn to_state_id(&self, index: usize) -> StateID {
184
46
        // CORRECTNESS: If the given index is not valid, then it is not
185
46
        // required for this to panic or return a valid state ID. We'll "just"
186
46
        // wind up with panics or silent logic errors at some other point.
187
46
        StateID::new_unchecked(index << self.stride2)
188
46
    }
189
}
190
191
#[cfg(feature = "dfa-build")]
192
mod dense {
193
    use crate::{dfa::dense::OwnedDFA, util::primitives::StateID};
194
195
    use super::Remappable;
196
197
    impl Remappable for OwnedDFA {
198
        fn state_len(&self) -> usize {
199
            OwnedDFA::state_len(self)
200
        }
201
202
        fn stride2(&self) -> usize {
203
            OwnedDFA::stride2(self)
204
        }
205
206
        fn swap_states(&mut self, id1: StateID, id2: StateID) {
207
            OwnedDFA::swap_states(self, id1, id2)
208
        }
209
210
        fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
211
            OwnedDFA::remap(self, map)
212
        }
213
    }
214
}
215
216
#[cfg(feature = "dfa-onepass")]
217
mod onepass {
218
    use crate::{dfa::onepass::DFA, util::primitives::StateID};
219
220
    use super::Remappable;
221
222
    impl Remappable for DFA {
223
2
        fn state_len(&self) -> usize {
224
2
            DFA::state_len(self)
225
2
        }
226
227
1
        fn stride2(&self) -> usize {
228
1
            // We don't do pre-multiplication for the one-pass DFA, so
229
1
            // returning 0 has the effect of making state IDs and state indices
230
1
            // equivalent.
231
1
            0
232
1
        }
233
234
2
        fn swap_states(&mut self, id1: StateID, id2: StateID) {
235
2
            DFA::swap_states(self, id1, id2)
236
2
        }
237
238
1
        fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
239
1
            DFA::remap(self, map)
240
1
        }
241
    }
242
}