wasm/validation/
mod.rs

1use core::iter::Map;
2
3use alloc::collections::btree_set::{self, BTreeSet};
4use alloc::vec::Vec;
5
6use crate::core::indices::{
7    DataIdx, ElemIdx, ExtendedIdxVec, FuncIdx, GlobalIdx, IdxVec, IdxVecOverflowError, MemIdx,
8    TableIdx, TypeIdx,
9};
10use crate::core::reader::section_header::{SectionHeader, SectionTy};
11use crate::core::reader::span::Span;
12use crate::core::reader::types::data::DataSegment;
13use crate::core::reader::types::element::ElemType;
14use crate::core::reader::types::export::{Export, ExportDesc};
15use crate::core::reader::types::global::{Global, GlobalType};
16use crate::core::reader::types::import::{Import, ImportDesc};
17use crate::core::reader::types::{ExternType, FuncType, MemType, ResultType, TableType};
18use crate::core::reader::WasmReader;
19use crate::core::sidetable::Sidetable;
20use crate::core::utils::ToUsizeExt;
21use crate::custom_section::CustomSection;
22use crate::ValidationError;
23
24pub(crate) mod code;
25pub(crate) mod custom_section;
26pub(crate) mod data;
27pub(crate) mod globals;
28pub(crate) mod read_constant_expression;
29pub(crate) mod validation_stack;
30
31/// Information collected from validating a module.
32///
33/// This can be used to instantiate a new module instance in some
34/// [`Store`](crate::Store) thorugh
35/// [`Store::module_instantiate`](crate::Store::module_instantiate)
36#[derive(Clone, Debug)]
37pub struct ValidationInfo<'bytecode> {
38    pub(crate) wasm: &'bytecode [u8],
39    pub(crate) types: IdxVec<TypeIdx, FuncType>,
40    pub(crate) imports: Vec<Import<'bytecode>>,
41    pub(crate) functions: ExtendedIdxVec<FuncIdx, TypeIdx>,
42    pub(crate) tables: ExtendedIdxVec<TableIdx, TableType>,
43    pub(crate) memories: ExtendedIdxVec<MemIdx, MemType>,
44    pub(crate) globals: ExtendedIdxVec<GlobalIdx, Global>,
45    pub(crate) exports: Vec<Export<'bytecode>>,
46    pub(crate) elements: IdxVec<ElemIdx, ElemType>,
47    pub(crate) data: IdxVec<DataIdx, DataSegment>,
48    /// Each block contains the validated code section and the stp corresponding to
49    /// the beginning of that code section
50    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
51    pub(crate) sidetable: Sidetable,
52    /// The start function which is automatically executed during instantiation
53    pub(crate) start: Option<FuncIdx>,
54    pub(crate) custom_sections: Vec<CustomSection<'bytecode>>,
55    // pub(crate) exports_length: Exported,
56}
57
58fn validate_no_duplicate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
59    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
60    for export in &validation_info.exports {
61        if found_export_names.contains(export.name) {
62            return Err(ValidationError::DuplicateExportName);
63        }
64        found_export_names.insert(export.name);
65    }
66    Ok(())
67}
68
69pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
70    let mut wasm = WasmReader::new(wasm);
71
72    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
73    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
74    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
75    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
76    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
77    // therefore this hack is acceptable.
78    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
79    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
80
81    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
82
83    trace!("Starting validation of bytecode");
84
85    trace!("Validating magic value");
86    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
87        return Err(ValidationError::InvalidMagic);
88    };
89
90    trace!("Validating version number");
91    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
92        return Err(ValidationError::InvalidBinaryFormatVersion);
93    };
94    debug!("Header ok");
95
96    let mut header = None;
97    read_next_header(&mut wasm, &mut header)?;
98
99    let mut custom_sections = Vec::new();
100    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
101
102    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
103        wasm.read_vec(FuncType::read).map(|types| IdxVec::new(types).expect("that index space creation never fails because the length of the types vector is encoded as a 32-bit integer in the bytecode"))
104    })?
105    .unwrap_or_default();
106
107    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
108
109    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
110        wasm.read_vec(|wasm| Import::read_and_validate(wasm, &types))
111    })?
112    .unwrap_or_default();
113
114    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
115
116    // The `Function` section only covers module-level (or "local") functions.
117    // Imported functions have their types known in the `import` section. Both
118    // local and imported functions share the same index space.
119    //
120    // Imported functions are given priority and have the first indicies, and
121    // only after that do the local functions get assigned their indices.
122    let local_functions =
123        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
124            wasm.read_vec(|wasm| TypeIdx::read_and_validate(wasm, &types))
125        })?
126        .unwrap_or_default();
127
128    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
129        ImportDesc::Func(type_idx) => Some(*type_idx),
130        _ => None,
131    });
132
133    let functions = ExtendedIdxVec::new(imported_functions.collect(), local_functions)
134        .map_err(|IdxVecOverflowError| ValidationError::TooManyFunctions)?;
135
136    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
137
138    let imported_tables = imports.iter().filter_map(|m| match m.desc {
139        ImportDesc::Table(table) => Some(table),
140        _ => None,
141    });
142    let local_tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
143        wasm.read_vec(TableType::read)
144    })?
145    .unwrap_or_default();
146
147    let tables = ExtendedIdxVec::new(imported_tables.collect(), local_tables)
148        .map_err(|IdxVecOverflowError| ValidationError::TooManyTables)?;
149
150    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
151
152    let imported_memories = imports.iter().filter_map(|m| match m.desc {
153        ImportDesc::Mem(mem) => Some(mem),
154        _ => None,
155    });
156    // let imported_memories_length = imported_memories.len();
157    let local_memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
158        wasm.read_vec(MemType::read)
159    })?
160    .unwrap_or_default();
161
162    let memories = ExtendedIdxVec::new(imported_memories.collect(), local_memories)
163        .map_err(|IdxVecOverflowError| ValidationError::TooManyMemories)?;
164
165    if memories.inner().len() > 1 {
166        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
167    }
168
169    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
170
171    let imported_global_types: Vec<GlobalType> = imports
172        .iter()
173        .filter_map(|m| match m.desc {
174            ImportDesc::Global(global) => Some(global),
175            _ => None,
176        })
177        .collect();
178    let local_globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
179        globals::validate_global_section(
180            wasm,
181            h,
182            &imported_global_types,
183            &mut validation_context_refs,
184            functions.inner(),
185        )
186    })?
187    .unwrap_or_default();
188
189    let imported_globals = imported_global_types.iter().map(|ty| Global {
190        // TODO using a default MAX value for spans that are never executed is
191        // not really safe. Maybe opt for an Option instead.
192        init_expr: Span::new(usize::MAX, 0),
193        ty: *ty,
194    });
195    let globals = ExtendedIdxVec::new(imported_globals.collect(), local_globals)
196        .map_err(|IdxVecOverflowError| ValidationError::TooManyGlobals)?;
197
198    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
199
200    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
201        wasm.read_vec(|wasm| {
202            Export::read_and_validate(
203                wasm,
204                functions.inner(),
205                tables.inner(),
206                memories.inner(),
207                globals.inner(),
208            )
209        })
210    })?
211    .unwrap_or_default();
212    validation_context_refs.extend(exports.iter().filter_map(
213        |Export { name: _, desc }| match *desc {
214            ExportDesc::Func(func_idx) => Some(func_idx),
215            _ => None,
216        },
217    ));
218
219    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
220
221    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
222        let func_idx = FuncIdx::read_and_validate(wasm, functions.inner())?;
223
224        // start function signature must be [] -> []
225        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
226        // SAFETY: We just validated this function index using the same
227        // `IdxVec`.
228        let type_idx = unsafe { functions.inner().get(func_idx) };
229
230        // SAFETY: There exists only one `IdxVec<TypeIdx, FuncType>` in the
231        // current function. Therefore, this has to be the same one used to
232        // create and validate this `TypeIdx`.
233        let func_type = unsafe { types.get(*type_idx) };
234        if func_type
235            != &(FuncType {
236                params: ResultType {
237                    valtypes: Vec::new(),
238                },
239                returns: ResultType {
240                    valtypes: Vec::new(),
241                },
242            })
243        {
244            Err(ValidationError::InvalidStartFunctionSignature)
245        } else {
246            Ok(func_idx)
247        }
248    })?;
249
250    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
251
252    let elements = handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
253        ElemType::read_and_validate(
254            wasm,
255            functions.inner(),
256            &mut validation_context_refs,
257            tables.inner(),
258            &imported_global_types,
259        )
260        .map(|elements| IdxVec::new(elements).expect("that index space creation never fails because the length of the elements vector is encoded as a 32-bit integer in the bytecode"))
261    })?
262    .unwrap_or_default();
263
264    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
265
266    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
267    // As per the official documentation:
268    //
269    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
270    let data_count: Option<u32> =
271        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
272            wasm.read_var_u32()
273        })?;
274    if let Some(dc) = data_count {
275        trace!("data count: {dc}");
276    }
277
278    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
279
280    let mut sidetable = Sidetable::new();
281    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
282        // SAFETY: It is required that all passed index values are valid in all
283        // passed `IdxVec`s. The current function does not take any index types
284        // as arguments and every `IdxVec<..., ...>` is unique because they use
285        // different generics. Therefore, all index types must be valid in their
286        // relevant `IdxVec`s.
287        unsafe {
288            code::validate_code_section(
289                wasm,
290                h,
291                &types,
292                &functions,
293                globals.inner(),
294                memories.inner(),
295                data_count,
296                tables.inner(),
297                &elements,
298                &validation_context_refs,
299                &mut sidetable,
300            )
301        }
302    })?
303    .unwrap_or_default();
304
305    if func_blocks_stps.len() != functions.len_local_definitions().into_usize() {
306        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
307    }
308
309    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
310
311    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
312        // wasm.read_vec(DataSegment::read)
313        data::validate_data_section(wasm, h, &imported_global_types, functions.inner(), memories.inner())
314            .map(|data_segments| IdxVec::new(data_segments).expect("that index space creation never fails because the length of the data segments vector is encoded as a 32-bit integer in the bytecode"))
315    })?
316    .unwrap_or_default();
317
318    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
319    if let Some(data_count) = data_count {
320        if data_count != data_section.len() {
321            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
322        }
323    }
324
325    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
326
327    // All sections should have been handled
328    if let Some(header) = header {
329        return Err(ValidationError::SectionOutOfOrder(header.ty));
330    }
331
332    debug!("Validation was successful");
333    let validation_info = ValidationInfo {
334        wasm: wasm.into_inner(),
335        types,
336        imports,
337        functions,
338        tables,
339        memories,
340        globals,
341        exports,
342        func_blocks_stps,
343        sidetable,
344        data: data_section,
345        start,
346        elements,
347        custom_sections,
348    };
349    validate_no_duplicate_exports(&validation_info)?;
350
351    Ok(validation_info)
352}
353
354fn read_next_header(
355    wasm: &mut WasmReader,
356    header: &mut Option<SectionHeader>,
357) -> Result<(), ValidationError> {
358    if header.is_none() && !wasm.remaining_bytes().is_empty() {
359        *header = Some(SectionHeader::read(wasm)?);
360    }
361    Ok(())
362}
363
364#[inline(always)]
365fn handle_section<'wasm, T, F>(
366    wasm: &mut WasmReader<'wasm>,
367    header: &mut Option<SectionHeader>,
368    section_ty: SectionTy,
369    handler: F,
370) -> Result<Option<T>, ValidationError>
371where
372    T: 'wasm,
373    F: FnOnce(&mut WasmReader<'wasm>, SectionHeader) -> Result<T, ValidationError>,
374{
375    match &header {
376        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
377            let h = header.take().unwrap();
378            trace!("Handling section {:?}", h.ty);
379            let ret = handler(wasm, h)?;
380            read_next_header(wasm, header)?;
381            Ok(Some(ret))
382        }
383        _ => Ok(None),
384    }
385}
386
387/// Reads the next sections as long as they are custom sections and pushes them
388/// into the `custom_sections` vector.
389fn read_all_custom_sections<'wasm>(
390    wasm: &mut WasmReader<'wasm>,
391    section_header: &mut Option<SectionHeader>,
392    custom_sections: &mut Vec<CustomSection<'wasm>>,
393) -> Result<(), ValidationError> {
394    let mut read_custom_section = || {
395        handle_section(
396            wasm,
397            section_header,
398            SectionTy::Custom,
399            CustomSection::read_and_validate,
400        )
401    };
402
403    while let Some(custom_section) = read_custom_section()? {
404        custom_sections.push(custom_section);
405    }
406
407    Ok(())
408}
409
410impl<'wasm> ValidationInfo<'wasm> {
411    /// Returns the imports of this module as an iterator. Each import consist
412    /// of a module name, a name and an extern type.
413    ///
414    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_imports
415    pub fn imports<'a>(
416        &'a self,
417    ) -> Map<
418        core::slice::Iter<'a, Import<'wasm>>,
419        impl FnMut(&'a Import<'wasm>) -> (&'a str, &'a str, ExternType),
420    > {
421        self.imports.iter().map(|import| {
422            // SAFETY: This is sound because the argument is `self` and the
423            // import desc also comes from `self`.
424            let extern_type = unsafe { import.desc.extern_type(self) };
425            (import.module_name, import.name, extern_type)
426        })
427    }
428
429    /// Returns the exports of this module as an iterator. Each export consist
430    /// of a name, and an extern type.
431    ///
432    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_exports
433    pub fn exports<'a>(
434        &'a self,
435    ) -> Map<
436        core::slice::Iter<'a, Export<'wasm>>,
437        impl FnMut(&'a Export<'wasm>) -> (&'a str, ExternType),
438    > {
439        self.exports.iter().map(|export| {
440            // SAFETY: This is sound because the argument is `self` and the
441            // export desc also comes from `self`.
442            let extern_type = unsafe { export.desc.extern_type(self) };
443            (export.name, extern_type)
444        })
445    }
446
447    /// Returns a list of all custom sections in the bytecode. Every custom
448    /// section consists of its name and the custom section's bytecode
449    /// (excluding the name itself).
450    pub fn custom_sections(&self) -> &[CustomSection<'wasm>] {
451        &self.custom_sections
452    }
453}