wasm/core/reader/
mod.rs

1use crate::core::reader::span::Span;
2use crate::{Error, Result};
3
4pub mod section_header;
5pub mod types;
6
7/// A struct for managing and reading WASM bytecode
8///
9/// Its purpose is to abstract parsing basic WASM values from the bytecode.
10#[derive(Clone)]
11pub struct WasmReader<'a> {
12    /// Entire WASM binary as slice
13    pub full_wasm_binary: &'a [u8],
14
15    /// Current program counter, i. e. index of the next byte to be consumed from the WASM binary
16    ///
17    /// # Correctness Note
18    ///
19    /// The `pc` points to the next byte to be consumed from the WASM binary. Therefore, after
20    /// consuming last byte, this cursor will advance past the last byte; for a WASM binary that is
21    /// 100 bytes long (valid indexes start with 0 and end with 99), the `pc` therefore can become
22    /// 100. However, it can not advance further.
23    ///
24    /// The table below illustrates this with an example for a WASM binary that is 5 bytes long:
25    ///
26    /// |                     Index |   0  |   1  |   2  |   3  |   4  | 5 | 6 |
27    /// |--------------------------:|:----:|:----:|:----:|:----:|:----:|:-:|:-:|
28    /// | `full_wasm_binary[index]` | 0xaa | 0xbb | 0xcc | 0xee | 0xff | - | - |
29    /// |      Valid `pc` position? |   ✅  |   ✅  |   ✅  |   ✅  |   ✅  | ✅ | ❌ |
30    pub pc: usize,
31}
32
33impl<'a> WasmReader<'a> {
34    /// Initialize a new [WasmReader] from a WASM byte slice
35    pub const fn new(wasm: &'a [u8]) -> Self {
36        Self {
37            full_wasm_binary: wasm,
38            pc: 0,
39        }
40    }
41
42    /// Advance the cursor to the first byte of the provided [Span] and validates that entire [Span] fits the WASM binary
43    ///
44    /// # Note
45    ///
46    /// This allows setting the [`pc`](WasmReader::pc) to one byte *past* the end of
47    /// [full_wasm_binary](WasmReader::full_wasm_binary), **if** the [Span]'s length is 0. For
48    /// further information, refer to the [field documentation of `pc`](WasmReader::pc).
49    pub fn move_start_to(&mut self, span: Span) -> Result<()> {
50        if span.from + span.len > self.full_wasm_binary.len() {
51            return Err(Error::Eof);
52        }
53
54        self.pc = span.from;
55
56        Ok(())
57    }
58
59    /// Byte slice to the remainder of the WASM binary, beginning from the current [`pc`](Self::pc)
60    pub fn remaining_bytes(&self) -> &[u8] {
61        &self.full_wasm_binary[self.pc..]
62    }
63
64    /// Create a [Span] starting from [`pc`](Self::pc) for the next `len` bytes
65    ///
66    /// Verifies the span to fit the WASM binary, i.e. using this span to index the WASM binary will
67    /// not yield an error.
68    pub fn make_span(&self, len: usize) -> Result<Span> {
69        if self.pc + len > self.full_wasm_binary.len() {
70            return Err(Error::Eof);
71        }
72        Ok(Span::new(self.pc, len))
73    }
74
75    /// Take `N` bytes starting from [`pc`](Self::pc), then advance the [`pc`](Self::pc) by `N`
76    ///
77    /// This yields back an array of the correct length
78    ///
79    /// # Note
80    ///
81    /// This allows setting the [`pc`](WasmReader::pc) to one byte *past* the end of
82    /// [full_wasm_binary](WasmReader::full_wasm_binary), **if** `N` equals the remaining bytes
83    /// slice's length. For further information, refer to the [field documentation of `pc`]
84    /// (WasmReader::pc).
85    pub fn strip_bytes<const N: usize>(&mut self) -> Result<[u8; N]> {
86        if N > self.full_wasm_binary.len() - self.pc {
87            return Err(Error::Eof);
88        }
89
90        let bytes = &self.full_wasm_binary[self.pc..(self.pc + N)];
91        self.pc += N;
92
93        Ok(bytes.try_into().expect("the slice length to be exactly N"))
94    }
95
96    /// Read the current byte without advancing the [`pc`](Self::pc)
97    ///
98    /// May yield an error if the [`pc`](Self::pc) advanced past the end of the WASM binary slice
99    pub fn peek_u8(&self) -> Result<u8> {
100        self.full_wasm_binary
101            .get(self.pc)
102            .copied()
103            .ok_or(Error::Eof)
104    }
105
106    /// Call a closure that may mutate the [WasmReader]
107    ///
108    /// Returns a tuple of the closure's return value and the number of bytes that the [`WasmReader`]
109    /// was advanced by.
110    ///
111    /// # Panics
112    ///
113    /// May panic if the closure moved the [`pc`](Self::pc) backwards, e.g. when
114    /// [move_start_to](Self::move_start_to) is called.
115    pub fn measure_num_read_bytes<T>(
116        &mut self,
117        f: impl FnOnce(&mut WasmReader) -> Result<T>,
118    ) -> Result<(T, usize)> {
119        let before = self.pc;
120        let ret = f(self)?;
121
122        // TODO maybe use checked sub, that is slower but guarantees no surprises
123        debug_assert!(
124            self.pc >= before,
125            "pc was advanced backwards towards the start"
126        );
127
128        let num_read_bytes = self.pc - before;
129        Ok((ret, num_read_bytes))
130    }
131
132    /// Skip `num_bytes`, advancing the [`pc`](Self::pc) accordingly
133    ///
134    /// # Note
135    ///
136    /// This can move the [`pc`](Self::pc) past the last byte of the WASM binary, so that reading
137    /// more than 0 further bytes would panick. However, it can not move the [`pc`](Self::pc) any
138    /// further than that, instead an error is returned. For further information, refer to the
139    /// [field documentation of `pc`] (WasmReader::pc).
140    #[allow(dead_code)]
141    pub fn skip(&mut self, num_bytes: usize) -> Result<()> {
142        if num_bytes > self.full_wasm_binary.len() - self.pc {
143            return Err(Error::Eof);
144        }
145        self.pc += num_bytes;
146        Ok(())
147    }
148
149    /// Consumes [Self], yielding back the internal reference to the WASM binary
150    pub fn into_inner(self) -> &'a [u8] {
151        self.full_wasm_binary
152    }
153
154    /// A wrapper function for reads with transaction-like behavior.
155    ///
156    /// The provided closure will be called with `&mut self` and its result will be returned.
157    /// However if the closure returns `Err(_)`, `self` will be reset as if the closure was never called.
158    #[allow(dead_code)]
159    pub fn handle_transaction<T, E>(
160        &mut self,
161        f: impl FnOnce(&mut WasmReader<'a>) -> core::result::Result<T, E>,
162    ) -> core::result::Result<T, E> {
163        let original = self.clone();
164        f(self).inspect_err(|_| {
165            *self = original;
166        })
167    }
168}
169
170pub trait WasmReadable: Sized {
171    /// Reads a new [`Self`] from given [`WasmReader`].
172    ///
173    /// Note that if this function returns `Err(_)`, the [`WasmReader`] may still have been advanced,
174    /// which may lead to unexpected behaviour.
175    /// To avoid this consider using the [`WasmReader::handle_transaction`] method to wrap this function call.
176    fn read(wasm: &mut WasmReader) -> Result<Self>;
177
178    /// Like [`read`](WasmReadable::read), but may panic
179    ///
180    /// Allows to read a [`Self`], directly returning it, instead of a [`Result`].
181    /// Useful, when prior validation already assures that a [`Self`] can be read.
182    ///
183    /// # Panics
184    ///
185    /// Panics if reading a [`Self`] fails.
186    fn read_unvalidated(wasm: &mut WasmReader) -> Self;
187}
188
189pub mod span {
190    use core::ops::Index;
191
192    use crate::core::reader::WasmReader;
193
194    /// An index and offset to describe a (sub-) slice into WASM bytecode
195    ///
196    /// Can be used to index into a [WasmReader], yielding a byte slice. As it does not
197    /// actually own the indexed data, this struct is free of lifetimes. Caution is advised when
198    /// indexing unknown slices, as a [Span] does not validate the length of the indexed slice.
199    #[derive(Copy, Clone, Debug, Hash)]
200    pub struct Span {
201        pub from: usize,
202        pub len: usize,
203    }
204
205    impl Span {
206        /// Create a new [Span], starting from `from` and ranging `len` elements
207        pub const fn new(from: usize, len: usize) -> Self {
208            Self { from, len }
209        }
210
211        /// Returns the length of this [Span]
212        pub const fn len(&self) -> usize {
213            self.len
214        }
215
216        pub const fn from(&self) -> usize {
217            self.from
218        }
219    }
220
221    impl<'a> Index<Span> for WasmReader<'a> {
222        type Output = [u8];
223
224        fn index(&self, index: Span) -> &'a Self::Output {
225            &self.full_wasm_binary[index.from..(index.from + index.len)]
226        }
227    }
228}
229
230#[cfg(test)]
231mod test {
232    use crate::ValType;
233
234    use super::*;
235    use alloc::vec;
236
237    #[test]
238    fn move_start_to() {
239        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
240        let mut wasm_reader = WasmReader::new(&my_bytes);
241
242        let span = Span::new(0, 0);
243        wasm_reader.move_start_to(span).unwrap();
244        // this actually dangerous, we did not validate there to be more than 0 bytes using the Span
245        wasm_reader.peek_u8().unwrap();
246
247        let span = Span::new(0, my_bytes.len());
248        wasm_reader.move_start_to(span).unwrap();
249        wasm_reader.peek_u8().unwrap();
250        assert_eq!(wasm_reader[span], my_bytes);
251
252        let span = Span::new(my_bytes.len(), 0);
253        wasm_reader.move_start_to(span).unwrap();
254        // span had zero length, hence wasm_reader.peek_u8() would be allowed to fail
255
256        let span = Span::new(my_bytes.len() - 1, 1);
257        wasm_reader.move_start_to(span).unwrap();
258
259        assert_eq!(wasm_reader.peek_u8().unwrap(), *my_bytes.last().unwrap());
260    }
261
262    #[test]
263    fn move_start_to_out_of_bounds_1() {
264        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
265        let mut wasm_reader = WasmReader::new(&my_bytes);
266
267        let span = Span::new(my_bytes.len(), 1);
268        assert_eq!(wasm_reader.move_start_to(span), Err(Error::Eof));
269    }
270
271    #[test]
272    fn move_start_to_out_of_bounds_2() {
273        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
274        let mut wasm_reader = WasmReader::new(&my_bytes);
275
276        let span = Span::new(0, my_bytes.len() + 1);
277        assert_eq!(wasm_reader.move_start_to(span), Err(Error::Eof));
278    }
279
280    #[test]
281    fn remaining_bytes_1() {
282        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
283        let mut wasm_reader = WasmReader::new(&my_bytes);
284
285        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
286        wasm_reader.skip(4).unwrap();
287        assert_eq!(wasm_reader.peek_u8().unwrap(), 0x15);
288
289        assert_eq!(wasm_reader.remaining_bytes(), &my_bytes[4..]);
290    }
291
292    #[test]
293    fn remaining_bytes_2() {
294        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
295        let mut wasm_reader = WasmReader::new(&my_bytes);
296
297        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
298        wasm_reader.skip(5).unwrap();
299        assert_eq!(wasm_reader.remaining_bytes(), &my_bytes[5..]);
300        assert_eq!(wasm_reader.remaining_bytes(), &[]);
301    }
302
303    #[test]
304    fn strip_bytes_1() {
305        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
306        let mut wasm_reader = WasmReader::new(&my_bytes);
307
308        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
309        let stripped_bytes = wasm_reader.strip_bytes::<4>().unwrap();
310        assert_eq!(&stripped_bytes, &my_bytes[..4]);
311        assert_eq!(wasm_reader.remaining_bytes(), &[0x15]);
312    }
313
314    #[test]
315    fn strip_bytes_2() {
316        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
317        let mut wasm_reader = WasmReader::new(&my_bytes);
318
319        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
320        wasm_reader.skip(1).unwrap();
321        let stripped_bytes = wasm_reader.strip_bytes::<4>().unwrap();
322        assert_eq!(&stripped_bytes, &my_bytes[1..5]);
323        assert_eq!(wasm_reader.remaining_bytes(), &[]);
324    }
325
326    #[test]
327    fn strip_bytes_3() {
328        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
329        let mut wasm_reader = WasmReader::new(&my_bytes);
330
331        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
332        wasm_reader.skip(2).unwrap();
333        let stripped_bytes = wasm_reader.strip_bytes::<4>();
334        assert_eq!(stripped_bytes, Err(Error::Eof));
335    }
336
337    #[test]
338    fn strip_bytes_4() {
339        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
340        let mut wasm_reader = WasmReader::new(&my_bytes);
341
342        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
343        wasm_reader.skip(5).unwrap();
344        let stripped_bytes = wasm_reader.strip_bytes::<0>().unwrap();
345        assert_eq!(stripped_bytes, [0u8; 0]);
346    }
347
348    #[test]
349    fn skip_1() {
350        let my_bytes = vec![0x11, 0x12, 0x13, 0x14, 0x15];
351        let mut wasm_reader = WasmReader::new(&my_bytes);
352        assert_eq!(wasm_reader.remaining_bytes(), my_bytes);
353        assert_eq!(wasm_reader.skip(6), Err(Error::Eof));
354    }
355
356    #[test]
357    fn reader_transaction() {
358        let bytes = [0x1, 0x2, 0x3, 0x4, 0x5, 0x6];
359        let mut reader = WasmReader::new(&bytes);
360
361        assert_eq!(
362            reader.handle_transaction(|reader| { reader.strip_bytes::<2>() }),
363            Ok([0x1, 0x2]),
364        );
365
366        let transaction_result: Result<()> = reader.handle_transaction(|reader| {
367            assert_eq!(reader.strip_bytes::<2>(), Ok([0x3, 0x4]));
368
369            // The exact error type does not matter
370            Err(Error::InvalidMagic)
371        });
372        assert_eq!(transaction_result, Err(Error::InvalidMagic));
373
374        assert_eq!(reader.strip_bytes::<3>(), Ok([0x3, 0x4, 0x5]));
375    }
376
377    #[test]
378    fn reader_transaction_ergonomics() {
379        let bytes = [0x1, 0x2, 0x3, 0x4, 0x5, 0x6];
380        let mut reader = WasmReader::new(&bytes);
381
382        assert_eq!(reader.handle_transaction(WasmReader::read_u8), Ok(0x1));
383
384        assert_eq!(
385            reader.handle_transaction(ValType::read),
386            Err(Error::InvalidValType)
387        );
388    }
389}