wasm/validation/
mod.rs

1use core::iter::Map;
2
3use alloc::collections::btree_set::{self, BTreeSet};
4use alloc::vec::Vec;
5
6use crate::core::indices::{
7    DataIdx, ElemIdx, ExtendedIdxVec, FuncIdx, GlobalIdx, IdxVec, IdxVecOverflowError, MemIdx,
8    TableIdx, TypeIdx,
9};
10use crate::core::reader::section_header::{SectionHeader, SectionTy};
11use crate::core::reader::span::Span;
12use crate::core::reader::types::data::DataSegment;
13use crate::core::reader::types::element::ElemType;
14use crate::core::reader::types::export::{Export, ExportDesc};
15use crate::core::reader::types::global::{Global, GlobalType};
16use crate::core::reader::types::import::{Import, ImportDesc};
17use crate::core::reader::types::{ExternType, FuncType, MemType, ResultType, TableType};
18use crate::core::reader::WasmReader;
19use crate::core::sidetable::Sidetable;
20use crate::core::utils::ToUsizeExt;
21use crate::custom_section::CustomSection;
22use crate::ValidationError;
23
24pub(crate) mod code;
25pub(crate) mod custom_section;
26pub(crate) mod data;
27pub(crate) mod globals;
28pub(crate) mod read_constant_expression;
29pub(crate) mod validation_stack;
30
31/// Information collected from validating a module.
32///
33/// This can be used to instantiate a new module instance in some
34/// [`Store`](crate::Store) thorugh
35/// [`Store::module_instantiate`](crate::Store::module_instantiate)
36#[derive(Clone, Debug)]
37pub struct ValidationInfo<'bytecode> {
38    pub(crate) wasm: &'bytecode [u8],
39    pub(crate) types: IdxVec<TypeIdx, FuncType>,
40    pub(crate) imports: Vec<Import<'bytecode>>,
41    pub(crate) functions: ExtendedIdxVec<FuncIdx, TypeIdx>,
42    pub(crate) tables: ExtendedIdxVec<TableIdx, TableType>,
43    pub(crate) memories: ExtendedIdxVec<MemIdx, MemType>,
44    pub(crate) globals: ExtendedIdxVec<GlobalIdx, Global>,
45    pub(crate) exports: Vec<Export<'bytecode>>,
46    pub(crate) elements: IdxVec<ElemIdx, ElemType>,
47    pub(crate) data: IdxVec<DataIdx, DataSegment>,
48    /// Each block contains the validated code section and the stp corresponding to
49    /// the beginning of that code section
50    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
51    pub(crate) sidetable: Sidetable,
52    /// The start function which is automatically executed during instantiation
53    pub(crate) start: Option<FuncIdx>,
54    pub(crate) custom_sections: Vec<CustomSection<'bytecode>>,
55    // pub(crate) exports_length: Exported,
56}
57
58fn validate_no_duplicate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
59    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
60    for export in &validation_info.exports {
61        if found_export_names.contains(export.name) {
62            return Err(ValidationError::DuplicateExportName);
63        }
64        found_export_names.insert(export.name);
65    }
66    Ok(())
67}
68
69pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
70    let mut wasm = WasmReader::new(wasm);
71
72    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
73    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
74    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
75    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
76    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
77    // therefore this hack is acceptable.
78    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
79    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
80
81    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
82
83    trace!("Starting validation of bytecode");
84
85    trace!("Validating magic value");
86    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
87        return Err(ValidationError::InvalidMagic);
88    };
89
90    trace!("Validating version number");
91    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
92        return Err(ValidationError::InvalidBinaryFormatVersion);
93    };
94    debug!("Header ok");
95
96    let mut header = None;
97    read_next_header(&mut wasm, &mut header)?;
98
99    let mut custom_sections = Vec::new();
100    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
101
102    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
103        wasm.read_vec(FuncType::read).map(|types| IdxVec::new(types).expect("that index space creation never fails because the length of the types vector is encoded as a 32-bit integer in the bytecode"))
104    })?
105    .unwrap_or_default();
106
107    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
108
109    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
110        wasm.read_vec(|wasm| Import::read_and_validate(wasm, &types))
111    })?
112    .unwrap_or_default();
113
114    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
115
116    // The `Function` section only covers module-level (or "local") functions.
117    // Imported functions have their types known in the `import` section. Both
118    // local and imported functions share the same index space.
119    //
120    // Imported functions are given priority and have the first indicies, and
121    // only after that do the local functions get assigned their indices.
122    let local_functions =
123        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
124            wasm.read_vec(|wasm| TypeIdx::read_and_validate(wasm, &types))
125        })?
126        .unwrap_or_default();
127
128    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
129        ImportDesc::Func(type_idx) => Some(*type_idx),
130        _ => None,
131    });
132
133    let functions = ExtendedIdxVec::new(imported_functions.collect(), local_functions)
134        .map_err(|IdxVecOverflowError| ValidationError::TooManyFunctions)?;
135
136    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
137
138    let imported_tables = imports.iter().filter_map(|m| match m.desc {
139        ImportDesc::Table(table) => Some(table),
140        _ => None,
141    });
142    let local_tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
143        wasm.read_vec(TableType::read)
144    })?
145    .unwrap_or_default();
146
147    let tables = ExtendedIdxVec::new(imported_tables.collect(), local_tables)
148        .map_err(|IdxVecOverflowError| ValidationError::TooManyTables)?;
149
150    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
151
152    let imported_memories = imports.iter().filter_map(|m| match m.desc {
153        ImportDesc::Mem(mem) => Some(mem),
154        _ => None,
155    });
156    // let imported_memories_length = imported_memories.len();
157    let local_memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
158        wasm.read_vec(MemType::read)
159    })?
160    .unwrap_or_default();
161
162    let memories = ExtendedIdxVec::new(imported_memories.collect(), local_memories)
163        .map_err(|IdxVecOverflowError| ValidationError::TooManyMemories)?;
164
165    if memories.inner().len() > 1 {
166        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
167    }
168
169    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
170
171    let imported_global_types: Vec<GlobalType> = imports
172        .iter()
173        .filter_map(|m| match m.desc {
174            ImportDesc::Global(global) => Some(global),
175            _ => None,
176        })
177        .collect();
178    let local_globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
179        globals::validate_global_section(
180            wasm,
181            h,
182            &imported_global_types,
183            &mut validation_context_refs,
184            functions.inner(),
185        )
186    })?
187    .unwrap_or_default();
188
189    let imported_globals = imported_global_types.iter().map(|ty| Global {
190        // TODO using a default MAX value for spans that are never executed is
191        // not really safe. Maybe opt for an Option instead.
192        init_expr: Span::new(usize::MAX, 0),
193        ty: *ty,
194    });
195    let globals = ExtendedIdxVec::new(imported_globals.collect(), local_globals)
196        .map_err(|IdxVecOverflowError| ValidationError::TooManyGlobals)?;
197
198    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
199
200    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
201        wasm.read_vec(|wasm| {
202            Export::read_and_validate(
203                wasm,
204                functions.inner(),
205                tables.inner(),
206                memories.inner(),
207                globals.inner(),
208            )
209        })
210    })?
211    .unwrap_or_default();
212    validation_context_refs.extend(exports.iter().filter_map(
213        |Export { name: _, desc }| match *desc {
214            ExportDesc::Func(func_idx) => Some(func_idx),
215            _ => None,
216        },
217    ));
218
219    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
220
221    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
222        let func_idx = FuncIdx::read_and_validate(wasm, functions.inner())?;
223
224        // start function signature must be [] -> []
225        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
226        // SAFETY: We just validated this function index using the same
227        // `IdxVec`.
228        let type_idx = unsafe { functions.inner().get(func_idx) };
229
230        // SAFETY: There exists only one `IdxVec<TypeIdx, FuncType>` in the
231        // current function. Therefore, this has to be the same one used to
232        // create and validate this `TypeIdx`.
233        let func_type = unsafe { types.get(*type_idx) };
234        if func_type
235            != &(FuncType {
236                params: ResultType {
237                    valtypes: Vec::new(),
238                },
239                returns: ResultType {
240                    valtypes: Vec::new(),
241                },
242            })
243        {
244            Err(ValidationError::InvalidStartFunctionSignature)
245        } else {
246            Ok(func_idx)
247        }
248    })?;
249
250    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
251
252    let elements = handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
253        ElemType::read_and_validate(
254            wasm,
255            functions.inner(),
256            &mut validation_context_refs,
257            tables.inner(),
258            &imported_global_types,
259        )
260        .map(|elements| IdxVec::new(elements).expect("that index space creation never fails because the length of the elements vector is encoded as a 32-bit integer in the bytecode"))
261    })?
262    .unwrap_or_default();
263
264    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
265
266    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
267    // As per the official documentation:
268    //
269    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
270    let data_count: Option<u32> =
271        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
272            wasm.read_var_u32()
273        })?;
274
275    trace!("data count: {data_count:?}");
276
277    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
278
279    let mut sidetable = Sidetable::new();
280    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
281        // SAFETY: It is required that all passed index values are valid in all
282        // passed `IdxVec`s. The current function does not take any index types
283        // as arguments and every `IdxVec<..., ...>` is unique because they use
284        // different generics. Therefore, all index types must be valid in their
285        // relevant `IdxVec`s.
286        unsafe {
287            code::validate_code_section(
288                wasm,
289                h,
290                &types,
291                &functions,
292                globals.inner(),
293                memories.inner(),
294                data_count,
295                tables.inner(),
296                &elements,
297                &validation_context_refs,
298                &mut sidetable,
299            )
300        }
301    })?
302    .unwrap_or_default();
303
304    if func_blocks_stps.len() != functions.len_local_definitions().into_usize() {
305        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
306    }
307
308    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
309
310    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
311        // wasm.read_vec(DataSegment::read)
312        data::validate_data_section(wasm, h, &imported_global_types, functions.inner(), memories.inner())
313            .map(|data_segments| IdxVec::new(data_segments).expect("that index space creation never fails because the length of the data segments vector is encoded as a 32-bit integer in the bytecode"))
314    })?
315    .unwrap_or_default();
316
317    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
318    if let Some(data_count) = data_count {
319        if data_count != data_section.len() {
320            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
321        }
322    }
323
324    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
325
326    // All sections should have been handled
327    if let Some(header) = header {
328        return Err(ValidationError::SectionOutOfOrder(header.ty));
329    }
330
331    debug!("Validation was successful");
332    let validation_info = ValidationInfo {
333        wasm: wasm.into_inner(),
334        types,
335        imports,
336        functions,
337        tables,
338        memories,
339        globals,
340        exports,
341        func_blocks_stps,
342        sidetable,
343        data: data_section,
344        start,
345        elements,
346        custom_sections,
347    };
348    validate_no_duplicate_exports(&validation_info)?;
349
350    Ok(validation_info)
351}
352
353fn read_next_header(
354    wasm: &mut WasmReader,
355    header: &mut Option<SectionHeader>,
356) -> Result<(), ValidationError> {
357    if header.is_none() && !wasm.remaining_bytes().is_empty() {
358        *header = Some(SectionHeader::read(wasm)?);
359    }
360    Ok(())
361}
362
363#[inline(always)]
364fn handle_section<'wasm, T, F>(
365    wasm: &mut WasmReader<'wasm>,
366    header: &mut Option<SectionHeader>,
367    section_ty: SectionTy,
368    handler: F,
369) -> Result<Option<T>, ValidationError>
370where
371    T: 'wasm,
372    F: FnOnce(&mut WasmReader<'wasm>, SectionHeader) -> Result<T, ValidationError>,
373{
374    match &header {
375        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
376            let h = header.take().unwrap();
377            trace!("Handling section {:?}", h.ty);
378            let ret = handler(wasm, h)?;
379            read_next_header(wasm, header)?;
380            Ok(Some(ret))
381        }
382        _ => Ok(None),
383    }
384}
385
386/// Reads the next sections as long as they are custom sections and pushes them
387/// into the `custom_sections` vector.
388fn read_all_custom_sections<'wasm>(
389    wasm: &mut WasmReader<'wasm>,
390    section_header: &mut Option<SectionHeader>,
391    custom_sections: &mut Vec<CustomSection<'wasm>>,
392) -> Result<(), ValidationError> {
393    let mut read_custom_section = || {
394        handle_section(
395            wasm,
396            section_header,
397            SectionTy::Custom,
398            CustomSection::read_and_validate,
399        )
400    };
401
402    while let Some(custom_section) = read_custom_section()? {
403        custom_sections.push(custom_section);
404    }
405
406    Ok(())
407}
408
409impl<'wasm> ValidationInfo<'wasm> {
410    /// Returns the imports of this module as an iterator. Each import consist
411    /// of a module name, a name and an extern type.
412    ///
413    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_imports
414    pub fn imports<'a>(
415        &'a self,
416    ) -> Map<
417        core::slice::Iter<'a, Import<'wasm>>,
418        impl FnMut(&'a Import<'wasm>) -> (&'a str, &'a str, ExternType),
419    > {
420        self.imports.iter().map(|import| {
421            // SAFETY: This is sound because the argument is `self` and the
422            // import desc also comes from `self`.
423            let extern_type = unsafe { import.desc.extern_type(self) };
424            (import.module_name, import.name, extern_type)
425        })
426    }
427
428    /// Returns the exports of this module as an iterator. Each export consist
429    /// of a name, and an extern type.
430    ///
431    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_exports
432    pub fn exports<'a>(
433        &'a self,
434    ) -> Map<
435        core::slice::Iter<'a, Export<'wasm>>,
436        impl FnMut(&'a Export<'wasm>) -> (&'a str, ExternType),
437    > {
438        self.exports.iter().map(|export| {
439            // SAFETY: This is sound because the argument is `self` and the
440            // export desc also comes from `self`.
441            let extern_type = unsafe { export.desc.extern_type(self) };
442            (export.name, extern_type)
443        })
444    }
445
446    /// Returns a list of all custom sections in the bytecode. Every custom
447    /// section consists of its name and the custom section's bytecode
448    /// (excluding the name itself).
449    pub fn custom_sections(&self) -> &[CustomSection<'wasm>] {
450        &self.custom_sections
451    }
452}