/build/cargo-vendor-dir/regex-automata-0.4.9/src/dfa/remapper.rs

Source
use alloc::vec::Vec;

use crate::util::primitives::StateID;

/// Remappable is a tightly coupled abstraction that facilitates remapping
/// state identifiers in DFAs.
///
/// The main idea behind remapping state IDs is that DFAs often need to check
/// if a certain state is a "special" state of some kind (like a match state)
/// during a search. Since this is extremely perf critical code, we want this
/// check to be as fast as possible. Partitioning state IDs into, for example,
/// into "non-match" and "match" states means one can tell if a state is a
/// match state via a simple comparison of the state ID.
///
/// The issue is that during the DFA construction process, it's not
/// particularly easy to partition the states. Instead, the simplest thing is
/// to often just do a pass over all of the states and shuffle them into their
/// desired partitionings. To do that, we need a mechanism for swapping states.
/// Hence, this abstraction.
///
/// Normally, for such little code, I would just duplicate it. But this is a
/// key optimization and the implementation is a bit subtle. So the abstraction
/// is basically a ham-fisted attempt at DRY. The only place we use this is in
/// the dense and one-pass DFAs.
///
/// See also src/dfa/special.rs for a more detailed explanation of how dense
/// DFAs are partitioned.
pub(super) trait Remappable: core::fmt::Debug {
    /// Return the total number of states.
    fn state_len(&self) -> usize;
    /// Return the power-of-2 exponent that yields the stride. The pertinent
    /// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride.
    fn stride2(&self) -> usize;
    /// Swap the states pointed to by the given IDs. The underlying finite
    /// state machine should be mutated such that all of the transitions in
    /// `id1` are now in the memory region where the transitions for `id2`
    /// were, and all of the transitions in `id2` are now in the memory region
    /// where the transitions for `id1` were.
    ///
    /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
    ///
    /// It is expected that, after calling this, the underlying value will be
    /// left in an inconsistent state, since any other transitions pointing to,
    /// e.g., `id1` need to be updated to point to `id2`, since that's where
    /// `id1` moved to.
    ///
    /// In order to "fix" the underlying inconsistent state, a `Remapper`
    /// should be used to guarantee that `remap` is called at the appropriate
    /// time.
    fn swap_states(&mut self, id1: StateID, id2: StateID);
    /// This must remap every single state ID in the underlying value according
    /// to the function given. For example, in a DFA, this should remap every
    /// transition and every starting state ID.
    fn remap(&mut self, map: impl Fn(StateID) -> StateID);
}

/// Remapper is an abstraction the manages the remapping of state IDs in a
/// finite state machine. This is useful when one wants to shuffle states into
/// different positions in the machine.
///
/// One of the key complexities this manages is the ability to correctly move
/// one state multiple times.
///
/// Once shuffling is complete, `remap` must be called, which will rewrite
/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
/// will almost certainly result in a corrupt machine.
#[derive(Debug)]
pub(super) struct Remapper {
    /// A map from the index of a state to its pre-multiplied identifier.
    ///
    /// When a state is swapped with another, then their corresponding
    /// locations in this map are also swapped. Thus, its new position will
    /// still point to its old pre-multiplied StateID.
    ///
    /// While there is a bit more to it, this then allows us to rewrite the
    /// state IDs in a DFA's transition table in a single pass. This is done
    /// by iterating over every ID in this map, then iterating over each
    /// transition for the state at that ID and re-mapping the transition from
    /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
    /// in this map where `old_id` *started*, and set it to where it ended up
    /// after all swaps have been completed.
    map: Vec<StateID>,
    /// A mapper from state index to state ID (and back).
    idxmap: IndexMapper,
}

impl Remapper {
    /// Create a new remapper from the given remappable implementation. The
    /// remapper can then be used to swap states. The remappable value given
    /// here must the same one given to `swap` and `remap`.
    pub(super) fn new(r: &impl Remappable) -> Remapper {
        let idxmap = IndexMapper { stride2: r.stride2() };
        let map = (0..r.state_len()).map(|i| idxmap.to_state_id(i)).collect();
        Remapper { map, idxmap }
    }

    /// Swap two states. Once this is called, callers must follow through to
    /// call `remap`, or else it's possible for the underlying remappable
    /// value to be in a corrupt state.
    pub(super) fn swap(
        &mut self,
        r: &mut impl Remappable,
        id1: StateID,
        id2: StateID,
    ) {
        if id1 == id2 {
            return;
        }
        r.swap_states(id1, id2);
        self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2));
    }

    /// Complete the remapping process by rewriting all state IDs in the
    /// remappable value according to the swaps performed.
    pub(super) fn remap(mut self, r: &mut impl Remappable) {
        // Update the map to account for states that have been swapped
        // multiple times. For example, if (A, C) and (C, G) are swapped, then
        // transitions previously pointing to A should now point to G. But if
        // we don't update our map, they will erroneously be set to C. All we
        // do is follow the swaps in our map until we see our original state
        // ID.
        //
        // The intuition here is to think about how changes are made to the
        // map: only through pairwise swaps. That means that starting at any
        // given state, it is always possible to find the loop back to that
        // state by following the swaps represented in the map (which might be
        // 0 swaps).
        //
        // We are also careful to clone the map before starting in order to
        // freeze it. We use the frozen map to find our loops, since we need to
        // update our map as well. Without freezing it, our updates could break
        // the loops referenced above and produce incorrect results.
        let oldmap = self.map.clone();
        for i in 0..r.state_len()1 {
            let cur_id = self.idxmap.to_state_id(i);
            let mut new_id = oldmap[i];
            if cur_id == new_id {
                continue;
            }
            loop {
                let id = oldmap[self.idxmap.to_index(new_id)];
                if cur_id == id {
                    self.map[i] = new_id;
                    break;
                }
                new_id = id;
            }
        }
        r.remap(1|next| self.map[self.idxmap.to_index(next)]);
    }
}

/// A simple type for mapping between state indices and state IDs.
///
/// The reason why this exists is because state IDs are "premultiplied." That
/// is, in order to get to the transitions for a particular state, one need
/// only use the state ID as-is, instead of having to multiple it by transition
/// table's stride.
///
/// The downside of this is that it's inconvenient to map between state IDs
/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
/// `0`, `0+stride`, `0+2*stride`, `0+3*stride`, etc., instead of `0`, `1`,
/// `2`, `3`, etc.
///
/// Since our state IDs are premultiplied, we can convert back-and-forth
/// between IDs and indices by simply unmultiplying the IDs and multiplying the
/// indices.
#[derive(Debug)]
struct IndexMapper {
    /// The power of 2 corresponding to the stride of the corresponding
    /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
    /// stride2' pre-multiplies an index to an ID.
    stride2: usize,
}

impl IndexMapper {
    /// Convert a state ID to a state index.
    fn to_index(&self, id: StateID) -> usize {
        id.as_usize() >> self.stride2
    }

    /// Convert a state index to a state ID.
    fn to_state_id(&self, index: usize) -> StateID {
        // CORRECTNESS: If the given index is not valid, then it is not
        // required for this to panic or return a valid state ID. We'll "just"
        // wind up with panics or silent logic errors at some other point.
        StateID::new_unchecked(index << self.stride2)
    }
}

#[cfg(feature = "dfa-build")]
mod dense {
    use crate::{dfa::dense::OwnedDFA, util::primitives::StateID};

    use super::Remappable;

    impl Remappable for OwnedDFA {
        fn state_len(&self) -> usize {
            OwnedDFA::state_len(self)
        }

        fn stride2(&self) -> usize {
            OwnedDFA::stride2(self)
        }

        fn swap_states(&mut self, id1: StateID, id2: StateID) {
            OwnedDFA::swap_states(self, id1, id2)
        }

        fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
            OwnedDFA::remap(self, map)
        }
    }
}

#[cfg(feature = "dfa-onepass")]
mod onepass {
    use crate::{dfa::onepass::DFA, util::primitives::StateID};

    use super::Remappable;

    impl Remappable for DFA {
        fn state_len(&self) -> usize {
            DFA::state_len(self)
        }

        fn stride2(&self) -> usize {
            // We don't do pre-multiplication for the one-pass DFA, so
            // returning 0 has the effect of making state IDs and state indices
            // equivalent.
            0
        }

        fn swap_states(&mut self, id1: StateID, id2: StateID) {
            DFA::swap_states(self, id1, id2)
        }

        fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
            DFA::remap(self, map)
        }
    }
}

Coverage Report

Created: 2025-11-20 14:58

Line	Count	Source
1		use alloc::vec::Vec;
2
3		use crate::util::primitives::StateID;
4
5		/// Remappable is a tightly coupled abstraction that facilitates remapping
6		/// state identifiers in DFAs.
7		///
8		/// The main idea behind remapping state IDs is that DFAs often need to check
9		/// if a certain state is a "special" state of some kind (like a match state)
10		/// during a search. Since this is extremely perf critical code, we want this
11		/// check to be as fast as possible. Partitioning state IDs into, for example,
12		/// into "non-match" and "match" states means one can tell if a state is a
13		/// match state via a simple comparison of the state ID.
14		///
15		/// The issue is that during the DFA construction process, it's not
16		/// particularly easy to partition the states. Instead, the simplest thing is
17		/// to often just do a pass over all of the states and shuffle them into their
18		/// desired partitionings. To do that, we need a mechanism for swapping states.
19		/// Hence, this abstraction.
20		///
21		/// Normally, for such little code, I would just duplicate it. But this is a
22		/// key optimization and the implementation is a bit subtle. So the abstraction
23		/// is basically a ham-fisted attempt at DRY. The only place we use this is in
24		/// the dense and one-pass DFAs.
25		///
26		/// See also src/dfa/special.rs for a more detailed explanation of how dense
27		/// DFAs are partitioned.
28		pub(super) trait Remappable: core::fmt::Debug {
29		/// Return the total number of states.
30		fn state_len(&self) -> usize;
31		/// Return the power-of-2 exponent that yields the stride. The pertinent
32		/// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride.
33		fn stride2(&self) -> usize;
34		/// Swap the states pointed to by the given IDs. The underlying finite
35		/// state machine should be mutated such that all of the transitions in
36		/// `id1` are now in the memory region where the transitions for `id2`
37		/// were, and all of the transitions in `id2` are now in the memory region
38		/// where the transitions for `id1` were.
39		///
40		/// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
41		///
42		/// It is expected that, after calling this, the underlying value will be
43		/// left in an inconsistent state, since any other transitions pointing to,
44		/// e.g., `id1` need to be updated to point to `id2`, since that's where
45		/// `id1` moved to.
46		///
47		/// In order to "fix" the underlying inconsistent state, a `Remapper`
48		/// should be used to guarantee that `remap` is called at the appropriate
49		/// time.
50		fn swap_states(&mut self, id1: StateID, id2: StateID);
51		/// This must remap every single state ID in the underlying value according
52		/// to the function given. For example, in a DFA, this should remap every
53		/// transition and every starting state ID.
54		fn remap(&mut self, map: impl Fn(StateID) -> StateID);
55		}
56
57		/// Remapper is an abstraction the manages the remapping of state IDs in a
58		/// finite state machine. This is useful when one wants to shuffle states into
59		/// different positions in the machine.
60		///
61		/// One of the key complexities this manages is the ability to correctly move
62		/// one state multiple times.
63		///
64		/// Once shuffling is complete, `remap` must be called, which will rewrite
65		/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
66		/// will almost certainly result in a corrupt machine.
67		#[derive(Debug)]
68		pub(super) struct Remapper {
69		/// A map from the index of a state to its pre-multiplied identifier.
70		///
71		/// When a state is swapped with another, then their corresponding
72		/// locations in this map are also swapped. Thus, its new position will
73		/// still point to its old pre-multiplied StateID.
74		///
75		/// While there is a bit more to it, this then allows us to rewrite the
76		/// state IDs in a DFA's transition table in a single pass. This is done
77		/// by iterating over every ID in this map, then iterating over each
78		/// transition for the state at that ID and re-mapping the transition from
79		/// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
80		/// in this map where `old_id` started, and set it to where it ended up
81		/// after all swaps have been completed.
82		map: Vec<StateID>,
83		/// A mapper from state index to state ID (and back).
84		idxmap: IndexMapper,
85		}
86
87		impl Remapper {
88		/// Create a new remapper from the given remappable implementation. The
89		/// remapper can then be used to swap states. The remappable value given
90		/// here must the same one given to `swap` and `remap`.
91	1	pub(super) fn new(r: &impl Remappable) -> Remapper {
92	1	let idxmap = IndexMapper { stride2: r.stride2() };
93	20	let map = (0..r.state_len()).map(\|i\| idxmap.to_state_id(i)).collect();
94	1	Remapper { map, idxmap }
95	1	}
96
97		/// Swap two states. Once this is called, callers must follow through to
98		/// call `remap`, or else it's possible for the underlying remappable
99		/// value to be in a corrupt state.
100	1	pub(super) fn swap(
101	1	&mut self,
102	1	r: &mut impl Remappable,
103	1	id1: StateID,
104	1	id2: StateID,
105	1	) {
106	1	if id1 == id2 {
107	0	return;
108	1	}
109	1	r.swap_states(id1, id2);
110	1	self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2));
111	1	}
112
113		/// Complete the remapping process by rewriting all state IDs in the
114		/// remappable value according to the swaps performed.
115	1	pub(super) fn remap(mut self, r: &mut impl Remappable) {
116	1	// Update the map to account for states that have been swapped
117	1	// multiple times. For example, if (A, C) and (C, G) are swapped, then
118	1	// transitions previously pointing to A should now point to G. But if
119	1	// we don't update our map, they will erroneously be set to C. All we
120	1	// do is follow the swaps in our map until we see our original state
121	1	// ID.
122	1	//
123	1	// The intuition here is to think about how changes are made to the
124	1	// map: only through pairwise swaps. That means that starting at any
125	1	// given state, it is always possible to find the loop back to that
126	1	// state by following the swaps represented in the map (which might be
127	1	// 0 swaps).
128	1	//
129	1	// We are also careful to clone the map before starting in order to
130	1	// freeze it. We use the frozen map to find our loops, since we need to
131	1	// update our map as well. Without freezing it, our updates could break
132	1	// the loops referenced above and produce incorrect results.
133	1	let oldmap = self.map.clone();
134	20	for i in 0.. r.state_len()1 {
135	20	let cur_id = self.idxmap.to_state_id(i);
136	20	let mut new_id = oldmap[i];
137	20	if cur_id == new_id {
138	18	continue;
139	2	}
140		loop {
141	2	let id = oldmap[self.idxmap.to_index(new_id)];
142	2	if cur_id == id {
143	2	self.map[i] = new_id;
144	2	break;
145	0	}
146	0	new_id = id;
147		}
148		}
149	382	r.remap(1 \|next\| self.map[self.idxmap.to_index(next)]);
150	1	}
151		}
152
153		/// A simple type for mapping between state indices and state IDs.
154		///
155		/// The reason why this exists is because state IDs are "premultiplied." That
156		/// is, in order to get to the transitions for a particular state, one need
157		/// only use the state ID as-is, instead of having to multiple it by transition
158		/// table's stride.
159		///
160		/// The downside of this is that it's inconvenient to map between state IDs
161		/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
162		/// `0`, `0+stride`, `0+2stride`, `0+3stride`, etc., instead of `0`, `1`,
163		/// `2`, `3`, etc.
164		///
165		/// Since our state IDs are premultiplied, we can convert back-and-forth
166		/// between IDs and indices by simply unmultiplying the IDs and multiplying the
167		/// indices.
168		#[derive(Debug)]
169		struct IndexMapper {
170		/// The power of 2 corresponding to the stride of the corresponding
171		/// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
172		/// stride2' pre-multiplies an index to an ID.
173		stride2: usize,
174		}
175
176		impl IndexMapper {
177		/// Convert a state ID to a state index.
178	386	fn to_index(&self, id: StateID) -> usize {
179	386	id.as_usize() >> self.stride2
180	386	}
181
182		/// Convert a state index to a state ID.
183	40	fn to_state_id(&self, index: usize) -> StateID {
184	40	// CORRECTNESS: If the given index is not valid, then it is not
185	40	// required for this to panic or return a valid state ID. We'll "just"
186	40	// wind up with panics or silent logic errors at some other point.
187	40	StateID::new_unchecked(index << self.stride2)
188	40	}
189		}
190
191		#[cfg(feature = "dfa-build")]
192		mod dense {
193		use crate::{dfa::dense::OwnedDFA, util::primitives::StateID};
194
195		use super::Remappable;
196
197		impl Remappable for OwnedDFA {
198		fn state_len(&self) -> usize {
199		OwnedDFA::state_len(self)
200		}
201
202		fn stride2(&self) -> usize {
203		OwnedDFA::stride2(self)
204		}
205
206		fn swap_states(&mut self, id1: StateID, id2: StateID) {
207		OwnedDFA::swap_states(self, id1, id2)
208		}
209
210		fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
211		OwnedDFA::remap(self, map)
212		}
213		}
214		}
215
216		#[cfg(feature = "dfa-onepass")]
217		mod onepass {
218		use crate::{dfa::onepass::DFA, util::primitives::StateID};
219
220		use super::Remappable;
221
222		impl Remappable for DFA {
223	2	fn state_len(&self) -> usize {
224	2	DFA::state_len(self)
225	2	}
226
227	1	fn stride2(&self) -> usize {
228	1	// We don't do pre-multiplication for the one-pass DFA, so
229	1	// returning 0 has the effect of making state IDs and state indices
230	1	// equivalent.
231	1	0
232	1	}
233
234	1	fn swap_states(&mut self, id1: StateID, id2: StateID) {
235	1	DFA::swap_states(self, id1, id2)
236	1	}
237
238	1	fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
239	1	DFA::remap(self, map)
240	1	}
241		}
242		}