wasm/validation/
mod.rs

1use core::iter::Map;
2
3use alloc::collections::btree_set::{self, BTreeSet};
4use alloc::vec::Vec;
5
6use crate::core::error::DecodingError;
7use crate::core::indices::{
8    DataIdx, ElemIdx, ExtendedIdxVec, FuncIdx, GlobalIdx, IdxVec, IdxVecOverflowError, MemIdx,
9    TableIdx, TypeIdx,
10};
11use crate::core::reader::section_header::{SectionHeader, SectionTy};
12use crate::core::reader::span::Span;
13use crate::core::reader::types::data::DataSegment;
14use crate::core::reader::types::element::ElemType;
15use crate::core::reader::types::export::{Export, ExportDesc};
16use crate::core::reader::types::global::{Global, GlobalType};
17use crate::core::reader::types::import::{Import, ImportDesc};
18use crate::core::reader::types::{ExternType, FuncType, MemType, ResultType, TableType};
19use crate::core::reader::WasmReader;
20use crate::core::sidetable::Sidetable;
21use crate::core::utils::ToUsizeExt;
22use crate::custom_section::CustomSection;
23use crate::ValidationError;
24
25pub(crate) mod code;
26pub(crate) mod custom_section;
27pub(crate) mod data;
28pub(crate) mod globals;
29pub(crate) mod read_constant_expression;
30pub(crate) mod validation_stack;
31
32/// Information collected from validating a module.
33///
34/// This can be used to instantiate a new module instance in some
35/// [`Store`](crate::Store) thorugh
36/// [`Store::module_instantiate`](crate::Store::module_instantiate)
37#[derive(Clone, Debug)]
38pub struct ValidationInfo<'bytecode> {
39    pub(crate) wasm: &'bytecode [u8],
40    pub(crate) types: IdxVec<TypeIdx, FuncType>,
41    pub(crate) imports: Vec<Import<'bytecode>>,
42    pub(crate) functions: ExtendedIdxVec<FuncIdx, TypeIdx>,
43    pub(crate) tables: ExtendedIdxVec<TableIdx, TableType>,
44    pub(crate) memories: ExtendedIdxVec<MemIdx, MemType>,
45    pub(crate) globals: ExtendedIdxVec<GlobalIdx, Global>,
46    pub(crate) exports: Vec<Export<'bytecode>>,
47    pub(crate) elements: IdxVec<ElemIdx, ElemType>,
48    pub(crate) data: IdxVec<DataIdx, DataSegment>,
49    /// Each block contains the validated code section and the stp corresponding to
50    /// the beginning of that code section
51    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
52    pub(crate) sidetable: Sidetable,
53    /// The start function which is automatically executed during instantiation
54    pub(crate) start: Option<FuncIdx>,
55    pub(crate) custom_sections: Vec<CustomSection<'bytecode>>,
56    // pub(crate) exports_length: Exported,
57}
58
59fn validate_no_duplicate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
60    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
61    for export in &validation_info.exports {
62        if found_export_names.contains(export.name) {
63            return Err(ValidationError::DuplicateExportName);
64        }
65        found_export_names.insert(export.name);
66    }
67    Ok(())
68}
69
70pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
71    let mut wasm = WasmReader::new(wasm);
72
73    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
74    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
75    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
76    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
77    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
78    // therefore this hack is acceptable.
79    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
80    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
81
82    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
83
84    trace!("Starting validation of bytecode");
85
86    trace!("Validating magic value");
87    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
88        return Err(DecodingError::InvalidMagic.into());
89    };
90
91    trace!("Validating version number");
92    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
93        return Err(DecodingError::InvalidBinaryFormatVersion.into());
94    };
95    debug!("Header ok");
96
97    let mut header = None;
98    read_next_header(&mut wasm, &mut header)?;
99
100    let mut custom_sections = Vec::new();
101    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
102
103    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
104        wasm.read_vec(FuncType::read).map(|types| IdxVec::new(types).expect("that index space creation never fails because the length of the types vector is encoded as a 32-bit integer in the bytecode"))
105    })?
106    .unwrap_or_default();
107
108    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
109
110    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
111        wasm.read_vec(|wasm| Import::read_and_validate(wasm, &types))
112    })?
113    .unwrap_or_default();
114
115    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
116
117    // The `Function` section only covers module-level (or "local") functions.
118    // Imported functions have their types known in the `import` section. Both
119    // local and imported functions share the same index space.
120    //
121    // Imported functions are given priority and have the first indicies, and
122    // only after that do the local functions get assigned their indices.
123    let local_functions =
124        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
125            wasm.read_vec(|wasm| TypeIdx::read_and_validate(wasm, &types))
126        })?
127        .unwrap_or_default();
128
129    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
130        ImportDesc::Func(type_idx) => Some(*type_idx),
131        _ => None,
132    });
133
134    let functions = ExtendedIdxVec::new(imported_functions.collect(), local_functions)
135        .map_err(|IdxVecOverflowError| ValidationError::TooManyFunctions)?;
136
137    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
138
139    let imported_tables = imports.iter().filter_map(|m| match m.desc {
140        ImportDesc::Table(table) => Some(table),
141        _ => None,
142    });
143    let local_tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
144        wasm.read_vec(TableType::read)
145    })?
146    .unwrap_or_default();
147
148    let tables = ExtendedIdxVec::new(imported_tables.collect(), local_tables)
149        .map_err(|IdxVecOverflowError| ValidationError::TooManyTables)?;
150
151    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
152
153    let imported_memories = imports.iter().filter_map(|m| match m.desc {
154        ImportDesc::Mem(mem) => Some(mem),
155        _ => None,
156    });
157    // let imported_memories_length = imported_memories.len();
158    let local_memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
159        wasm.read_vec(MemType::read)
160    })?
161    .unwrap_or_default();
162
163    let memories = ExtendedIdxVec::new(imported_memories.collect(), local_memories)
164        .map_err(|IdxVecOverflowError| ValidationError::TooManyMemories)?;
165
166    if memories.inner().len() > 1 {
167        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
168    }
169
170    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
171
172    let imported_global_types: Vec<GlobalType> = imports
173        .iter()
174        .filter_map(|m| match m.desc {
175            ImportDesc::Global(global) => Some(global),
176            _ => None,
177        })
178        .collect();
179    let local_globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
180        globals::validate_global_section(
181            wasm,
182            h,
183            &imported_global_types,
184            &mut validation_context_refs,
185            functions.inner(),
186        )
187    })?
188    .unwrap_or_default();
189
190    let imported_globals = imported_global_types.iter().map(|ty| Global {
191        // TODO using a default MAX value for spans that are never executed is
192        // not really safe. Maybe opt for an Option instead.
193        init_expr: Span::new(usize::MAX, 0),
194        ty: *ty,
195    });
196    let globals = ExtendedIdxVec::new(imported_globals.collect(), local_globals)
197        .map_err(|IdxVecOverflowError| ValidationError::TooManyGlobals)?;
198
199    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
200
201    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
202        wasm.read_vec(|wasm| {
203            Export::read_and_validate(
204                wasm,
205                functions.inner(),
206                tables.inner(),
207                memories.inner(),
208                globals.inner(),
209            )
210        })
211    })?
212    .unwrap_or_default();
213    validation_context_refs.extend(exports.iter().filter_map(
214        |Export { name: _, desc }| match *desc {
215            ExportDesc::Func(func_idx) => Some(func_idx),
216            _ => None,
217        },
218    ));
219
220    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
221
222    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
223        let func_idx = FuncIdx::read_and_validate(wasm, functions.inner())?;
224
225        // start function signature must be [] -> []
226        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
227        // SAFETY: We just validated this function index using the same
228        // `IdxVec`.
229        let type_idx = unsafe { functions.inner().get(func_idx) };
230
231        // SAFETY: There exists only one `IdxVec<TypeIdx, FuncType>` in the
232        // current function. Therefore, this has to be the same one used to
233        // create and validate this `TypeIdx`.
234        let func_type = unsafe { types.get(*type_idx) };
235        if func_type
236            != &(FuncType {
237                params: ResultType {
238                    valtypes: Vec::new(),
239                },
240                returns: ResultType {
241                    valtypes: Vec::new(),
242                },
243            })
244        {
245            Err(ValidationError::InvalidStartFunctionSignature)
246        } else {
247            Ok(func_idx)
248        }
249    })?;
250
251    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
252
253    let elements = handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
254        ElemType::read_and_validate(
255            wasm,
256            functions.inner(),
257            &mut validation_context_refs,
258            tables.inner(),
259            &imported_global_types,
260        )
261        .map(|elements| IdxVec::new(elements).expect("that index space creation never fails because the length of the elements vector is encoded as a 32-bit integer in the bytecode"))
262    })?
263    .unwrap_or_default();
264
265    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
266
267    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
268    // As per the official documentation:
269    //
270    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
271    let data_count: Option<u32> =
272        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
273            wasm.read_var_u32()
274        })?;
275
276    trace!("data count: {data_count:?}");
277
278    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
279
280    let mut sidetable = Sidetable::new();
281    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
282        // SAFETY: It is required that all passed index values are valid in all
283        // passed `IdxVec`s. The current function does not take any index types
284        // as arguments and every `IdxVec<..., ...>` is unique because they use
285        // different generics. Therefore, all index types must be valid in their
286        // relevant `IdxVec`s.
287        unsafe {
288            code::validate_code_section(
289                wasm,
290                h,
291                &types,
292                &functions,
293                globals.inner(),
294                memories.inner(),
295                data_count,
296                tables.inner(),
297                &elements,
298                &validation_context_refs,
299                &mut sidetable,
300            )
301        }
302    })?
303    .unwrap_or_default();
304
305    if func_blocks_stps.len() != functions.len_local_definitions().into_usize() {
306        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
307    }
308
309    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
310
311    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
312        // wasm.read_vec(DataSegment::read)
313        data::validate_data_section(wasm, h, &imported_global_types, functions.inner(), memories.inner())
314            .map(|data_segments| IdxVec::new(data_segments).expect("that index space creation never fails because the length of the data segments vector is encoded as a 32-bit integer in the bytecode"))
315    })?
316    .unwrap_or_default();
317
318    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
319    if let Some(data_count) = data_count {
320        if data_count != data_section.len() {
321            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
322        }
323    }
324
325    read_all_custom_sections(&mut wasm, &mut header, &mut custom_sections)?;
326
327    // All sections should have been handled
328    if let Some(header) = header {
329        return Err(ValidationError::SectionOutOfOrder(header.ty));
330    }
331
332    debug!("Validation was successful");
333    let validation_info = ValidationInfo {
334        wasm: wasm.into_inner(),
335        types,
336        imports,
337        functions,
338        tables,
339        memories,
340        globals,
341        exports,
342        func_blocks_stps,
343        sidetable,
344        data: data_section,
345        start,
346        elements,
347        custom_sections,
348    };
349    validate_no_duplicate_exports(&validation_info)?;
350
351    Ok(validation_info)
352}
353
354fn read_next_header(
355    wasm: &mut WasmReader,
356    header: &mut Option<SectionHeader>,
357) -> Result<(), DecodingError> {
358    if header.is_none() && !wasm.remaining_bytes().is_empty() {
359        *header = Some(SectionHeader::read(wasm)?);
360    }
361    Ok(())
362}
363
364#[inline(always)]
365fn handle_section<'wasm, T, F, E>(
366    wasm: &mut WasmReader<'wasm>,
367    header: &mut Option<SectionHeader>,
368    section_ty: SectionTy,
369    handler: F,
370) -> Result<Option<T>, E>
371where
372    T: 'wasm,
373    F: FnOnce(&mut WasmReader<'wasm>, SectionHeader) -> Result<T, E>,
374    E: From<DecodingError>,
375{
376    match &header {
377        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
378            let h = header.take().unwrap();
379            trace!("Handling section {:?}", h.ty);
380            let ret = handler(wasm, h)?;
381            read_next_header(wasm, header)?;
382            Ok(Some(ret))
383        }
384        _ => Ok(None),
385    }
386}
387
388/// Reads the next sections as long as they are custom sections and pushes them
389/// into the `custom_sections` vector.
390fn read_all_custom_sections<'wasm>(
391    wasm: &mut WasmReader<'wasm>,
392    section_header: &mut Option<SectionHeader>,
393    custom_sections: &mut Vec<CustomSection<'wasm>>,
394) -> Result<(), ValidationError> {
395    let mut read_custom_section = || {
396        handle_section(
397            wasm,
398            section_header,
399            SectionTy::Custom,
400            CustomSection::read_and_validate,
401        )
402    };
403
404    while let Some(custom_section) = read_custom_section()? {
405        custom_sections.push(custom_section);
406    }
407
408    Ok(())
409}
410
411impl<'wasm> ValidationInfo<'wasm> {
412    /// Returns the imports of this module as an iterator. Each import consist
413    /// of a module name, a name and an extern type.
414    ///
415    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_imports
416    pub fn imports<'a>(
417        &'a self,
418    ) -> Map<
419        core::slice::Iter<'a, Import<'wasm>>,
420        impl FnMut(&'a Import<'wasm>) -> (&'a str, &'a str, ExternType),
421    > {
422        self.imports.iter().map(|import| {
423            // SAFETY: This is sound because the argument is `self` and the
424            // import desc also comes from `self`.
425            let extern_type = unsafe { import.desc.extern_type(self) };
426            (import.module_name, import.name, extern_type)
427        })
428    }
429
430    /// Returns the exports of this module as an iterator. Each export consist
431    /// of a name, and an extern type.
432    ///
433    /// See: WebAssembly Specification 2.0 - 7.1.5 - module_exports
434    pub fn exports<'a>(
435        &'a self,
436    ) -> Map<
437        core::slice::Iter<'a, Export<'wasm>>,
438        impl FnMut(&'a Export<'wasm>) -> (&'a str, ExternType),
439    > {
440        self.exports.iter().map(|export| {
441            // SAFETY: This is sound because the argument is `self` and the
442            // export desc also comes from `self`.
443            let extern_type = unsafe { export.desc.extern_type(self) };
444            (export.name, extern_type)
445        })
446    }
447
448    /// Returns a list of all custom sections in the bytecode. Every custom
449    /// section consists of its name and the custom section's bytecode
450    /// (excluding the name itself).
451    pub fn custom_sections(&self) -> &[CustomSection<'wasm>] {
452        &self.custom_sections
453    }
454}