wasm/validation/
mod.rs

1use core::iter::Map;
2
3use alloc::collections::btree_set::{self, BTreeSet};
4use alloc::vec::Vec;
5
6use crate::core::indices::{
7    DataIdx, ElemIdx, ExtendedIdxVec, FuncIdx, GlobalIdx, IdxVec, IdxVecOverflowError, MemIdx,
8    TableIdx, TypeIdx,
9};
10use crate::core::reader::section_header::{SectionHeader, SectionTy};
11use crate::core::reader::span::Span;
12use crate::core::reader::types::data::DataSegment;
13use crate::core::reader::types::element::ElemType;
14use crate::core::reader::types::export::{Export, ExportDesc};
15use crate::core::reader::types::global::{Global, GlobalType};
16use crate::core::reader::types::import::{Import, ImportDesc};
17use crate::core::reader::types::{ExternType, FuncType, MemType, ResultType, TableType};
18use crate::core::reader::WasmReader;
19use crate::core::sidetable::Sidetable;
20use crate::core::utils::ToUsizeExt;
21use crate::ValidationError;
22
23pub(crate) mod code;
24pub(crate) mod data;
25pub(crate) mod globals;
26pub(crate) mod read_constant_expression;
27pub(crate) mod validation_stack;
28
29/// Information collected from validating a module.
30///
31/// This can be used to instantiate a new module instance in some
32/// [`Store`](crate::Store) either through
33/// [`Store::module_instantiate_unchecked`](crate::Store::module_instantiate_unchecked)
34/// or
35/// [`Linker::module_instantiate_unchecked`](crate::execution::linker::Linker::module_instantiate_unchecked).
36#[derive(Clone, Debug)]
37pub struct ValidationInfo<'bytecode> {
38    pub(crate) wasm: &'bytecode [u8],
39    pub(crate) types: IdxVec<TypeIdx, FuncType>,
40    pub(crate) imports: Vec<Import<'bytecode>>,
41    pub(crate) functions: ExtendedIdxVec<FuncIdx, TypeIdx>,
42    pub(crate) tables: ExtendedIdxVec<TableIdx, TableType>,
43    pub(crate) memories: ExtendedIdxVec<MemIdx, MemType>,
44    pub(crate) globals: ExtendedIdxVec<GlobalIdx, Global>,
45    pub(crate) exports: Vec<Export<'bytecode>>,
46    pub(crate) elements: IdxVec<ElemIdx, ElemType>,
47    pub(crate) data: IdxVec<DataIdx, DataSegment>,
48    /// Each block contains the validated code section and the stp corresponding to
49    /// the beginning of that code section
50    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
51    pub(crate) sidetable: Sidetable,
52    /// The start function which is automatically executed during instantiation
53    pub(crate) start: Option<FuncIdx>,
54    // pub(crate) exports_length: Exported,
55}
56
57fn validate_no_duplicate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
58    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
59    for export in &validation_info.exports {
60        if found_export_names.contains(export.name) {
61            return Err(ValidationError::DuplicateExportName);
62        }
63        found_export_names.insert(export.name);
64    }
65    Ok(())
66}
67
68pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
69    let mut wasm = WasmReader::new(wasm);
70
71    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
72    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
73    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
74    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
75    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
76    // therefore this hack is acceptable.
77    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
78    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
79
80    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
81
82    trace!("Starting validation of bytecode");
83
84    trace!("Validating magic value");
85    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
86        return Err(ValidationError::InvalidMagic);
87    };
88
89    trace!("Validating version number");
90    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
91        return Err(ValidationError::InvalidBinaryFormatVersion);
92    };
93    debug!("Header ok");
94
95    let mut header = None;
96    read_next_header(&mut wasm, &mut header)?;
97
98    let skip_section = |wasm: &mut WasmReader, section_header: &mut Option<SectionHeader>| {
99        handle_section(wasm, section_header, SectionTy::Custom, |wasm, h| {
100            // customsec ::= section_0(custom)
101            // custom ::= name byte*
102            // name ::= b*:vec(byte) => name (if utf8(name) = b*)
103            // vec(B) ::= n:u32 (x:B)^n => x^n
104            let _name = wasm.read_name()?;
105
106            let remaining_bytes = h
107                .contents
108                .from()
109                .checked_add(h.contents.len())
110                .and_then(|res| res.checked_sub(wasm.pc))
111                .ok_or(ValidationError::InvalidCustomSectionLength)?;
112
113            // TODO: maybe do something with these remaining bytes?
114            let mut _bytes = Vec::new();
115            for _ in 0..remaining_bytes {
116                _bytes.push(wasm.read_u8()?)
117            }
118            Ok(())
119        })
120    };
121
122    while (skip_section(&mut wasm, &mut header)?).is_some() {}
123
124    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
125        wasm.read_vec(FuncType::read).map(|types| IdxVec::new(types).expect("that index space creation never fails because the length of the types vector is encoded as a 32-bit integer in the bytecode"))
126    })?
127    .unwrap_or_default();
128
129    while (skip_section(&mut wasm, &mut header)?).is_some() {}
130
131    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
132        wasm.read_vec(|wasm| Import::read_and_validate(wasm, &types))
133    })?
134    .unwrap_or_default();
135
136    while (skip_section(&mut wasm, &mut header)?).is_some() {}
137
138    // The `Function` section only covers module-level (or "local") functions.
139    // Imported functions have their types known in the `import` section. Both
140    // local and imported functions share the same index space.
141    //
142    // Imported functions are given priority and have the first indicies, and
143    // only after that do the local functions get assigned their indices.
144    let local_functions =
145        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
146            wasm.read_vec(|wasm| TypeIdx::read_and_validate(wasm, &types))
147        })?
148        .unwrap_or_default();
149
150    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
151        ImportDesc::Func(type_idx) => Some(*type_idx),
152        _ => None,
153    });
154
155    let functions = ExtendedIdxVec::new(imported_functions.collect(), local_functions)
156        .map_err(|IdxVecOverflowError| ValidationError::TooManyFunctions)?;
157
158    while (skip_section(&mut wasm, &mut header)?).is_some() {}
159
160    let imported_tables = imports.iter().filter_map(|m| match m.desc {
161        ImportDesc::Table(table) => Some(table),
162        _ => None,
163    });
164    let local_tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
165        wasm.read_vec(TableType::read)
166    })?
167    .unwrap_or_default();
168
169    let tables = ExtendedIdxVec::new(imported_tables.collect(), local_tables)
170        .map_err(|IdxVecOverflowError| ValidationError::TooManyTables)?;
171
172    while (skip_section(&mut wasm, &mut header)?).is_some() {}
173
174    let imported_memories = imports.iter().filter_map(|m| match m.desc {
175        ImportDesc::Mem(mem) => Some(mem),
176        _ => None,
177    });
178    // let imported_memories_length = imported_memories.len();
179    let local_memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
180        wasm.read_vec(MemType::read)
181    })?
182    .unwrap_or_default();
183
184    let memories = ExtendedIdxVec::new(imported_memories.collect(), local_memories)
185        .map_err(|IdxVecOverflowError| ValidationError::TooManyMemories)?;
186
187    if memories.inner().len() > 1 {
188        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
189    }
190
191    while (skip_section(&mut wasm, &mut header)?).is_some() {}
192
193    let imported_global_types: Vec<GlobalType> = imports
194        .iter()
195        .filter_map(|m| match m.desc {
196            ImportDesc::Global(global) => Some(global),
197            _ => None,
198        })
199        .collect();
200    let local_globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
201        globals::validate_global_section(
202            wasm,
203            h,
204            &imported_global_types,
205            &mut validation_context_refs,
206            functions.inner(),
207        )
208    })?
209    .unwrap_or_default();
210
211    let imported_globals = imported_global_types.iter().map(|ty| Global {
212        // TODO using a default MAX value for spans that are never executed is
213        // not really safe. Maybe opt for an Option instead.
214        init_expr: Span::new(usize::MAX, 0),
215        ty: *ty,
216    });
217    let globals = ExtendedIdxVec::new(imported_globals.collect(), local_globals)
218        .map_err(|IdxVecOverflowError| ValidationError::TooManyGlobals)?;
219
220    while (skip_section(&mut wasm, &mut header)?).is_some() {}
221
222    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
223        wasm.read_vec(|wasm| {
224            Export::read_and_validate(
225                wasm,
226                functions.inner(),
227                tables.inner(),
228                memories.inner(),
229                globals.inner(),
230            )
231        })
232    })?
233    .unwrap_or_default();
234    validation_context_refs.extend(exports.iter().filter_map(
235        |Export { name: _, desc }| match *desc {
236            ExportDesc::Func(func_idx) => Some(func_idx),
237            _ => None,
238        },
239    ));
240
241    while (skip_section(&mut wasm, &mut header)?).is_some() {}
242
243    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
244        let func_idx = FuncIdx::read_and_validate(wasm, functions.inner())?;
245
246        // start function signature must be [] -> []
247        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
248        // SAFETY: We just validated this function index using the same
249        // `IdxVec`.
250        let type_idx = unsafe { functions.inner().get(func_idx) };
251
252        // SAFETY: There exists only one `IdxVec<TypeIdx, FuncType>` in the
253        // current function. Therefore, this has to be the same one used to
254        // create and validate this `TypeIdx`.
255        let func_type = unsafe { types.get(*type_idx) };
256        if func_type
257            != &(FuncType {
258                params: ResultType {
259                    valtypes: Vec::new(),
260                },
261                returns: ResultType {
262                    valtypes: Vec::new(),
263                },
264            })
265        {
266            Err(ValidationError::InvalidStartFunctionSignature)
267        } else {
268            Ok(func_idx)
269        }
270    })?;
271
272    while (skip_section(&mut wasm, &mut header)?).is_some() {}
273
274    let elements = handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
275        ElemType::read_and_validate(
276            wasm,
277            functions.inner(),
278            &mut validation_context_refs,
279            tables.inner(),
280            &imported_global_types,
281        )
282        .map(|elements| IdxVec::new(elements).expect("that index space creation never fails because the length of the elements vector is encoded as a 32-bit integer in the bytecode"))
283    })?
284    .unwrap_or_default();
285
286    while (skip_section(&mut wasm, &mut header)?).is_some() {}
287
288    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
289    // As per the official documentation:
290    //
291    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
292    let data_count: Option<u32> =
293        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
294            wasm.read_var_u32()
295        })?;
296    if let Some(dc) = data_count {
297        trace!("data count: {dc}");
298    }
299
300    while (skip_section(&mut wasm, &mut header)?).is_some() {}
301
302    let mut sidetable = Sidetable::new();
303    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
304        // SAFETY: It is required that all passed index values are valid in all
305        // passed `IdxVec`s. The current function does not take any index types
306        // as arguments and every `IdxVec<..., ...>` is unique because they use
307        // different generics. Therefore, all index types must be valid in their
308        // relevant `IdxVec`s.
309        unsafe {
310            code::validate_code_section(
311                wasm,
312                h,
313                &types,
314                &functions,
315                globals.inner(),
316                memories.inner(),
317                data_count,
318                tables.inner(),
319                &elements,
320                &validation_context_refs,
321                &mut sidetable,
322            )
323        }
324    })?
325    .unwrap_or_default();
326
327    if func_blocks_stps.len() != functions.len_local_definitions().into_usize() {
328        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
329    }
330
331    while (skip_section(&mut wasm, &mut header)?).is_some() {}
332
333    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
334        // wasm.read_vec(DataSegment::read)
335        data::validate_data_section(wasm, h, &imported_global_types, functions.inner(), memories.inner())
336            .map(|data_segments| IdxVec::new(data_segments).expect("that index space creation never fails because the length of the data segments vector is encoded as a 32-bit integer in the bytecode"))
337    })?
338    .unwrap_or_default();
339
340    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
341    if let Some(data_count) = data_count {
342        if data_count != data_section.len() {
343            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
344        }
345    }
346
347    while (skip_section(&mut wasm, &mut header)?).is_some() {}
348
349    // All sections should have been handled
350    if let Some(header) = header {
351        return Err(ValidationError::SectionOutOfOrder(header.ty));
352    }
353
354    debug!("Validation was successful");
355    let validation_info = ValidationInfo {
356        wasm: wasm.into_inner(),
357        types,
358        imports,
359        functions,
360        tables,
361        memories,
362        globals,
363        exports,
364        func_blocks_stps,
365        sidetable,
366        data: data_section,
367        start,
368        elements,
369    };
370    validate_no_duplicate_exports(&validation_info)?;
371
372    Ok(validation_info)
373}
374
375fn read_next_header(
376    wasm: &mut WasmReader,
377    header: &mut Option<SectionHeader>,
378) -> Result<(), ValidationError> {
379    if header.is_none() && !wasm.remaining_bytes().is_empty() {
380        *header = Some(SectionHeader::read(wasm)?);
381    }
382    Ok(())
383}
384
385#[inline(always)]
386fn handle_section<'wasm, T, F>(
387    wasm: &mut WasmReader<'wasm>,
388    header: &mut Option<SectionHeader>,
389    section_ty: SectionTy,
390    handler: F,
391) -> Result<Option<T>, ValidationError>
392where
393    T: 'wasm,
394    F: FnOnce(&mut WasmReader<'wasm>, SectionHeader) -> Result<T, ValidationError>,
395{
396    match &header {
397        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
398            let h = header.take().unwrap();
399            trace!("Handling section {:?}", h.ty);
400            let ret = handler(wasm, h)?;
401            read_next_header(wasm, header)?;
402            Ok(Some(ret))
403        }
404        _ => Ok(None),
405    }
406}
407
408impl<'wasm> ValidationInfo<'wasm> {
409    /// Returns the imports of this module as an iterator. Each import consist
410    /// of a module name, a name and an extern type.
411    ///
412    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_imports
413    pub fn imports<'a>(
414        &'a self,
415    ) -> Map<
416        core::slice::Iter<'a, Import<'wasm>>,
417        impl FnMut(&'a Import<'wasm>) -> (&'a str, &'a str, ExternType),
418    > {
419        self.imports.iter().map(|import| {
420            // SAFETY: This is sound because the argument is `self` and the
421            // import desc also comes from `self`.
422            let extern_type = unsafe { import.desc.extern_type(self) };
423            (import.module_name, import.name, extern_type)
424        })
425    }
426
427    /// Returns the exports of this module as an iterator. Each export consist
428    /// of a name, and an extern type.
429    ///
430    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_exports
431    pub fn exports<'a>(
432        &'a self,
433    ) -> Map<
434        core::slice::Iter<'a, Export<'wasm>>,
435        impl FnMut(&'a Export<'wasm>) -> (&'a str, ExternType),
436    > {
437        self.exports.iter().map(|export| {
438            // SAFETY: This is sound because the argument is `self` and the
439            // export desc also comes from `self`.
440            let extern_type = unsafe { export.desc.extern_type(self) };
441            (export.name, extern_type)
442        })
443    }
444}