wasm/validation/
mod.rs

1use alloc::collections::btree_set::{self, BTreeSet};
2use alloc::vec::Vec;
3
4use crate::core::indices::{FuncIdx, TypeIdx};
5use crate::core::reader::section_header::{SectionHeader, SectionTy};
6use crate::core::reader::span::Span;
7use crate::core::reader::types::data::DataSegment;
8use crate::core::reader::types::element::ElemType;
9use crate::core::reader::types::export::Export;
10use crate::core::reader::types::global::{Global, GlobalType};
11use crate::core::reader::types::import::{Import, ImportDesc};
12use crate::core::reader::types::{FuncType, MemType, ResultType, TableType};
13use crate::core::reader::{WasmReadable, WasmReader};
14use crate::core::sidetable::Sidetable;
15use crate::{ExportDesc, ValidationError};
16
17pub(crate) mod code;
18pub(crate) mod data;
19pub(crate) mod globals;
20pub(crate) mod read_constant_expression;
21pub(crate) mod validation_stack;
22
23#[derive(Clone, Debug)]
24pub(crate) struct ImportsLength {
25    pub imported_functions: usize,
26    pub imported_globals: usize,
27    pub imported_memories: usize,
28    pub imported_tables: usize,
29}
30
31/// Information collected from validating a module.
32///
33/// This can be used to instantiate a new module instance in some
34/// [`Store`](crate::Store) either through
35/// [`Store::module_instantiate_unchecked`](crate::Store::module_instantiate_unchecked)
36/// or
37/// [`Linker::module_instantiate_unchecked`](crate::execution::linker::Linker::module_instantiate_unchecked).
38#[derive(Clone, Debug)]
39pub struct ValidationInfo<'bytecode> {
40    pub(crate) wasm: &'bytecode [u8],
41    pub(crate) types: Vec<FuncType>,
42    pub(crate) imports: Vec<Import>,
43    pub(crate) functions: Vec<TypeIdx>,
44    pub(crate) tables: Vec<TableType>,
45    pub(crate) memories: Vec<MemType>,
46    pub(crate) globals: Vec<Global>,
47    #[allow(dead_code)]
48    pub(crate) exports: Vec<Export>,
49    /// Each block contains the validated code section and the stp corresponding to
50    /// the beginning of that code section
51    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
52    pub(crate) sidetable: Sidetable,
53    pub(crate) data: Vec<DataSegment>,
54    /// The start function which is automatically executed during instantiation
55    pub(crate) start: Option<FuncIdx>,
56    pub(crate) elements: Vec<ElemType>,
57    pub(crate) imports_length: ImportsLength,
58    // pub(crate) exports_length: Exported,
59}
60
61fn validate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
62    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
63    use crate::core::reader::types::export::ExportDesc::*;
64    for export in &validation_info.exports {
65        if found_export_names.contains(export.name.as_str()) {
66            return Err(ValidationError::DuplicateExportName);
67        }
68        found_export_names.insert(export.name.as_str());
69        match export.desc {
70            FuncIdx(func_idx) => {
71                if validation_info.functions.len()
72                    + validation_info.imports_length.imported_functions
73                    <= func_idx
74                {
75                    return Err(ValidationError::InvalidFuncIdx(func_idx));
76                }
77            }
78            TableIdx(table_idx) => {
79                if validation_info.tables.len() + validation_info.imports_length.imported_tables
80                    <= table_idx
81                {
82                    return Err(ValidationError::InvalidTableIdx(table_idx));
83                }
84            }
85            MemIdx(mem_idx) => {
86                if validation_info.memories.len() + validation_info.imports_length.imported_memories
87                    <= mem_idx
88                {
89                    return Err(ValidationError::InvalidMemIndex(mem_idx));
90                }
91            }
92            GlobalIdx(global_idx) => {
93                if validation_info.globals.len() + validation_info.imports_length.imported_globals
94                    <= global_idx
95                {
96                    return Err(ValidationError::InvalidGlobalIdx(global_idx));
97                }
98            }
99        }
100    }
101    Ok(())
102}
103
104fn get_imports_length(imports: &Vec<Import>) -> ImportsLength {
105    let mut imports_length = ImportsLength {
106        imported_functions: 0,
107        imported_globals: 0,
108        imported_memories: 0,
109        imported_tables: 0,
110    };
111
112    for import in imports {
113        match import.desc {
114            ImportDesc::Func(_) => imports_length.imported_functions += 1,
115            ImportDesc::Global(_) => imports_length.imported_globals += 1,
116            ImportDesc::Mem(_) => imports_length.imported_memories += 1,
117            ImportDesc::Table(_) => imports_length.imported_tables += 1,
118        }
119    }
120
121    imports_length
122}
123
124pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
125    let mut wasm = WasmReader::new(wasm);
126
127    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
128    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
129    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
130    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
131    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
132    // therefore this hack is acceptable.
133    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
134    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
135
136    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
137
138    trace!("Starting validation of bytecode");
139
140    trace!("Validating magic value");
141    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
142        return Err(ValidationError::InvalidMagic);
143    };
144
145    trace!("Validating version number");
146    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
147        return Err(ValidationError::InvalidBinaryFormatVersion);
148    };
149    debug!("Header ok");
150
151    let mut header = None;
152    read_next_header(&mut wasm, &mut header)?;
153
154    let skip_section = |wasm: &mut WasmReader, section_header: &mut Option<SectionHeader>| {
155        handle_section(wasm, section_header, SectionTy::Custom, |wasm, h| {
156            // customsec ::= section_0(custom)
157            // custom ::= name byte*
158            // name ::= b*:vec(byte) => name (if utf8(name) = b*)
159            // vec(B) ::= n:u32 (x:B)^n => x^n
160            let _name = wasm.read_name()?;
161
162            let remaining_bytes = h
163                .contents
164                .from()
165                .checked_add(h.contents.len())
166                .and_then(|res| res.checked_sub(wasm.pc))
167                .ok_or(ValidationError::InvalidCustomSectionLength)?;
168
169            // TODO: maybe do something with these remaining bytes?
170            let mut _bytes = Vec::new();
171            for _ in 0..remaining_bytes {
172                _bytes.push(wasm.read_u8()?)
173            }
174            Ok(())
175        })
176    };
177
178    while (skip_section(&mut wasm, &mut header)?).is_some() {}
179
180    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
181        wasm.read_vec(FuncType::read)
182    })?
183    .unwrap_or_default();
184
185    while (skip_section(&mut wasm, &mut header)?).is_some() {}
186
187    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
188        wasm.read_vec(|wasm| {
189            let import = Import::read(wasm)?;
190
191            match import.desc {
192                ImportDesc::Func(type_idx) => {
193                    types
194                        .get(type_idx)
195                        .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
196                }
197                ImportDesc::Table(_table_type) => {}
198                ImportDesc::Mem(_mem_type) => {}
199                ImportDesc::Global(_global_type) => {}
200            }
201
202            Ok(import)
203        })
204    })?
205    .unwrap_or_default();
206    let imports_length = get_imports_length(&imports);
207
208    while (skip_section(&mut wasm, &mut header)?).is_some() {}
209
210    // The `Function` section only covers module-level (or "local") functions.
211    // Imported functions have their types known in the `import` section. Both
212    // local and imported functions share the same index space.
213    //
214    // Imported functions are given priority and have the first indicies, and
215    // only after that do the local functions get assigned their indices.
216    let local_functions =
217        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
218            wasm.read_vec(|wasm| {
219                let type_idx = wasm.read_var_u32()? as usize;
220                types
221                    .get(type_idx)
222                    .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
223                Ok(type_idx)
224            })
225        })?
226        .unwrap_or_default();
227
228    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
229        ImportDesc::Func(type_idx) => Some(*type_idx),
230        _ => None,
231    });
232
233    let all_functions = imported_functions
234        .clone()
235        .chain(local_functions.iter().cloned())
236        .collect::<Vec<TypeIdx>>();
237
238    while (skip_section(&mut wasm, &mut header)?).is_some() {}
239
240    let imported_tables = imports
241        .iter()
242        .filter_map(|m| match m.desc {
243            ImportDesc::Table(table) => Some(table),
244            _ => None,
245        })
246        .collect::<Vec<TableType>>();
247    let tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
248        wasm.read_vec(TableType::read)
249    })?
250    .unwrap_or_default();
251
252    let all_tables = {
253        let mut temp = imported_tables;
254        temp.extend(tables.clone());
255        temp
256    };
257
258    while (skip_section(&mut wasm, &mut header)?).is_some() {}
259
260    let imported_memories = imports
261        .iter()
262        .filter_map(|m| match m.desc {
263            ImportDesc::Mem(mem) => Some(mem),
264            _ => None,
265        })
266        .collect::<Vec<MemType>>();
267    // let imported_memories_length = imported_memories.len();
268    let memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
269        wasm.read_vec(MemType::read)
270    })?
271    .unwrap_or_default();
272
273    let all_memories = {
274        let mut temp = imported_memories;
275        temp.extend(memories.clone());
276        temp
277    };
278    if all_memories.len() > 1 {
279        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
280    }
281
282    while (skip_section(&mut wasm, &mut header)?).is_some() {}
283
284    // we start off with the imported globals
285    let /* mut */ imported_global_types = imports
286        .iter()
287        .filter_map(|m| match m.desc {
288            ImportDesc::Global(global) => Some(global),
289            _ => None,
290        })
291        .collect::<Vec<GlobalType>>();
292    let imported_global_types_len = imported_global_types.len();
293    let globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
294        globals::validate_global_section(
295            wasm,
296            h,
297            &imported_global_types,
298            &mut validation_context_refs,
299            all_functions.len(),
300        )
301    })?
302    .unwrap_or_default();
303    let mut all_globals = Vec::new();
304    for item in imported_global_types.iter().take(imported_global_types_len) {
305        all_globals.push(Global {
306            init_expr: Span::new(usize::MAX, 0),
307            ty: *item,
308        })
309    }
310    for item in &globals {
311        all_globals.push(*item)
312    }
313
314    while (skip_section(&mut wasm, &mut header)?).is_some() {}
315
316    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
317        wasm.read_vec(Export::read)
318    })?
319    .unwrap_or_default();
320    validation_context_refs.extend(exports.iter().filter_map(
321        |Export { name: _, desc }| match *desc {
322            ExportDesc::FuncIdx(func_idx) => Some(func_idx),
323            _ => None,
324        },
325    ));
326
327    while (skip_section(&mut wasm, &mut header)?).is_some() {}
328
329    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
330        let func_idx = wasm.read_var_u32().map(|idx| idx as FuncIdx)?;
331        // start function signature must be [] -> []
332        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
333        let type_idx = *all_functions
334            .get(func_idx)
335            .ok_or(ValidationError::InvalidFuncIdx(func_idx))?;
336        if types[type_idx]
337            != (FuncType {
338                params: ResultType {
339                    valtypes: Vec::new(),
340                },
341                returns: ResultType {
342                    valtypes: Vec::new(),
343                },
344            })
345        {
346            Err(ValidationError::InvalidStartFunctionSignature)
347        } else {
348            Ok(func_idx)
349        }
350    })?;
351
352    while (skip_section(&mut wasm, &mut header)?).is_some() {}
353
354    let elements: Vec<ElemType> =
355        handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
356            ElemType::read_from_wasm(
357                wasm,
358                &all_functions,
359                &mut validation_context_refs,
360                &all_tables,
361                &imported_global_types,
362            )
363        })?
364        .unwrap_or_default();
365
366    while (skip_section(&mut wasm, &mut header)?).is_some() {}
367
368    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
369    // As per the official documentation:
370    //
371    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
372    let data_count: Option<u32> =
373        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
374            wasm.read_var_u32()
375        })?;
376    if data_count.is_some() {
377        trace!("data count: {}", data_count.unwrap());
378    }
379
380    while (skip_section(&mut wasm, &mut header)?).is_some() {}
381
382    let mut sidetable = Sidetable::new();
383    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
384        code::validate_code_section(
385            wasm,
386            h,
387            &types,
388            &all_functions,
389            imported_functions.count(),
390            &all_globals,
391            &all_memories,
392            &data_count,
393            &all_tables,
394            &elements,
395            &validation_context_refs,
396            &mut sidetable,
397        )
398    })?
399    .unwrap_or_default();
400
401    if func_blocks_stps.len() != local_functions.len() {
402        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
403    }
404
405    while (skip_section(&mut wasm, &mut header)?).is_some() {}
406
407    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
408        // wasm.read_vec(DataSegment::read)
409        data::validate_data_section(
410            wasm,
411            h,
412            &imported_global_types,
413            all_memories.len(),
414            all_functions.len(),
415        )
416    })?
417    .unwrap_or_default();
418
419    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
420    if let (Some(data_count), data_len) = (data_count, data_section.len()) {
421        if data_count as usize != data_len {
422            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
423        }
424    }
425
426    while (skip_section(&mut wasm, &mut header)?).is_some() {}
427
428    // All sections should have been handled
429    if let Some(header) = header {
430        return Err(ValidationError::SectionOutOfOrder(header.ty));
431    }
432
433    debug!("Validation was successful");
434    let validation_info = ValidationInfo {
435        wasm: wasm.into_inner(),
436        types,
437        imports,
438        functions: local_functions,
439        tables,
440        memories,
441        globals,
442        exports,
443        func_blocks_stps,
444        sidetable,
445        data: data_section,
446        start,
447        elements,
448        imports_length,
449    };
450    validate_exports(&validation_info)?;
451
452    Ok(validation_info)
453}
454
455fn read_next_header(
456    wasm: &mut WasmReader,
457    header: &mut Option<SectionHeader>,
458) -> Result<(), ValidationError> {
459    if header.is_none() && !wasm.remaining_bytes().is_empty() {
460        *header = Some(SectionHeader::read(wasm)?);
461    }
462    Ok(())
463}
464
465#[inline(always)]
466fn handle_section<T, F: FnOnce(&mut WasmReader, SectionHeader) -> Result<T, ValidationError>>(
467    wasm: &mut WasmReader,
468    header: &mut Option<SectionHeader>,
469    section_ty: SectionTy,
470    handler: F,
471) -> Result<Option<T>, ValidationError> {
472    match &header {
473        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
474            let h = header.take().unwrap();
475            trace!("Handling section {:?}", h.ty);
476            let ret = handler(wasm, h)?;
477            read_next_header(wasm, header)?;
478            Ok(Some(ret))
479        }
480        _ => Ok(None),
481    }
482}