wasm/validation/
mod.rs

1use alloc::collections::btree_set::{self, BTreeSet};
2use alloc::vec::Vec;
3
4use crate::core::indices::{FuncIdx, TypeIdx};
5use crate::core::reader::section_header::{SectionHeader, SectionTy};
6use crate::core::reader::span::Span;
7use crate::core::reader::types::data::DataSegment;
8use crate::core::reader::types::element::ElemType;
9use crate::core::reader::types::export::Export;
10use crate::core::reader::types::global::{Global, GlobalType};
11use crate::core::reader::types::import::{Import, ImportDesc};
12use crate::core::reader::types::{FuncType, MemType, ResultType, TableType};
13use crate::core::reader::{WasmReadable, WasmReader};
14use crate::core::sidetable::Sidetable;
15use crate::{ExportDesc, ValidationError};
16
17pub(crate) mod code;
18pub(crate) mod data;
19pub(crate) mod globals;
20pub(crate) mod read_constant_expression;
21pub(crate) mod validation_stack;
22
23#[derive(Clone, Debug)]
24pub(crate) struct ImportsLength {
25    pub imported_functions: usize,
26    pub imported_globals: usize,
27    pub imported_memories: usize,
28    pub imported_tables: usize,
29}
30
31/// Information collected from validating a module.
32///
33/// This can be used to instantiate a new module instance in some
34/// [`Store`](crate::Store) either through
35/// [`Store::module_instantiate`](crate::Store::module_instantiate) or
36/// [`Linker::module_instantiate`](crate::execution::linker::Linker::module_instantiate).
37#[derive(Clone, Debug)]
38pub struct ValidationInfo<'bytecode> {
39    pub(crate) wasm: &'bytecode [u8],
40    pub(crate) types: Vec<FuncType>,
41    pub(crate) imports: Vec<Import>,
42    pub(crate) functions: Vec<TypeIdx>,
43    pub(crate) tables: Vec<TableType>,
44    pub(crate) memories: Vec<MemType>,
45    pub(crate) globals: Vec<Global>,
46    #[allow(dead_code)]
47    pub(crate) exports: Vec<Export>,
48    /// Each block contains the validated code section and the stp corresponding to
49    /// the beginning of that code section
50    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
51    pub(crate) sidetable: Sidetable,
52    pub(crate) data: Vec<DataSegment>,
53    /// The start function which is automatically executed during instantiation
54    pub(crate) start: Option<FuncIdx>,
55    pub(crate) elements: Vec<ElemType>,
56    pub(crate) imports_length: ImportsLength,
57    // pub(crate) exports_length: Exported,
58}
59
60fn validate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
61    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
62    use crate::core::reader::types::export::ExportDesc::*;
63    for export in &validation_info.exports {
64        if found_export_names.contains(export.name.as_str()) {
65            return Err(ValidationError::DuplicateExportName);
66        }
67        found_export_names.insert(export.name.as_str());
68        match export.desc {
69            FuncIdx(func_idx) => {
70                if validation_info.functions.len()
71                    + validation_info.imports_length.imported_functions
72                    <= func_idx
73                {
74                    return Err(ValidationError::InvalidFuncIdx(func_idx));
75                }
76            }
77            TableIdx(table_idx) => {
78                if validation_info.tables.len() + validation_info.imports_length.imported_tables
79                    <= table_idx
80                {
81                    return Err(ValidationError::InvalidTableIdx(table_idx));
82                }
83            }
84            MemIdx(mem_idx) => {
85                if validation_info.memories.len() + validation_info.imports_length.imported_memories
86                    <= mem_idx
87                {
88                    return Err(ValidationError::InvalidMemIndex(mem_idx));
89                }
90            }
91            GlobalIdx(global_idx) => {
92                if validation_info.globals.len() + validation_info.imports_length.imported_globals
93                    <= global_idx
94                {
95                    return Err(ValidationError::InvalidGlobalIdx(global_idx));
96                }
97            }
98        }
99    }
100    Ok(())
101}
102
103fn get_imports_length(imports: &Vec<Import>) -> ImportsLength {
104    let mut imports_length = ImportsLength {
105        imported_functions: 0,
106        imported_globals: 0,
107        imported_memories: 0,
108        imported_tables: 0,
109    };
110
111    for import in imports {
112        match import.desc {
113            ImportDesc::Func(_) => imports_length.imported_functions += 1,
114            ImportDesc::Global(_) => imports_length.imported_globals += 1,
115            ImportDesc::Mem(_) => imports_length.imported_memories += 1,
116            ImportDesc::Table(_) => imports_length.imported_tables += 1,
117        }
118    }
119
120    imports_length
121}
122
123pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
124    let mut wasm = WasmReader::new(wasm);
125
126    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
127    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
128    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
129    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
130    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
131    // therefore this hack is acceptable.
132    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
133    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
134
135    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
136
137    trace!("Starting validation of bytecode");
138
139    trace!("Validating magic value");
140    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
141        return Err(ValidationError::InvalidMagic);
142    };
143
144    trace!("Validating version number");
145    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
146        return Err(ValidationError::InvalidBinaryFormatVersion);
147    };
148    debug!("Header ok");
149
150    let mut header = None;
151    read_next_header(&mut wasm, &mut header)?;
152
153    let skip_section = |wasm: &mut WasmReader, section_header: &mut Option<SectionHeader>| {
154        handle_section(wasm, section_header, SectionTy::Custom, |wasm, h| {
155            // customsec ::= section_0(custom)
156            // custom ::= name byte*
157            // name ::= b*:vec(byte) => name (if utf8(name) = b*)
158            // vec(B) ::= n:u32 (x:B)^n => x^n
159            let _name = wasm.read_name()?;
160
161            let remaining_bytes = h
162                .contents
163                .from()
164                .checked_add(h.contents.len())
165                .and_then(|res| res.checked_sub(wasm.pc))
166                .ok_or(ValidationError::InvalidCustomSectionLength)?;
167
168            // TODO: maybe do something with these remaining bytes?
169            let mut _bytes = Vec::new();
170            for _ in 0..remaining_bytes {
171                _bytes.push(wasm.read_u8()?)
172            }
173            Ok(())
174        })
175    };
176
177    while (skip_section(&mut wasm, &mut header)?).is_some() {}
178
179    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
180        wasm.read_vec(FuncType::read)
181    })?
182    .unwrap_or_default();
183
184    while (skip_section(&mut wasm, &mut header)?).is_some() {}
185
186    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
187        wasm.read_vec(|wasm| {
188            let import = Import::read(wasm)?;
189
190            match import.desc {
191                ImportDesc::Func(type_idx) => {
192                    types
193                        .get(type_idx)
194                        .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
195                }
196                ImportDesc::Table(_table_type) => {}
197                ImportDesc::Mem(_mem_type) => {}
198                ImportDesc::Global(_global_type) => {}
199            }
200
201            Ok(import)
202        })
203    })?
204    .unwrap_or_default();
205    let imports_length = get_imports_length(&imports);
206
207    while (skip_section(&mut wasm, &mut header)?).is_some() {}
208
209    // The `Function` section only covers module-level (or "local") functions.
210    // Imported functions have their types known in the `import` section. Both
211    // local and imported functions share the same index space.
212    //
213    // Imported functions are given priority and have the first indicies, and
214    // only after that do the local functions get assigned their indices.
215    let local_functions =
216        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
217            wasm.read_vec(|wasm| {
218                let type_idx = wasm.read_var_u32()? as usize;
219                types
220                    .get(type_idx)
221                    .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
222                Ok(type_idx)
223            })
224        })?
225        .unwrap_or_default();
226
227    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
228        ImportDesc::Func(type_idx) => Some(*type_idx),
229        _ => None,
230    });
231
232    let all_functions = imported_functions
233        .clone()
234        .chain(local_functions.iter().cloned())
235        .collect::<Vec<TypeIdx>>();
236
237    while (skip_section(&mut wasm, &mut header)?).is_some() {}
238
239    let imported_tables = imports
240        .iter()
241        .filter_map(|m| match m.desc {
242            ImportDesc::Table(table) => Some(table),
243            _ => None,
244        })
245        .collect::<Vec<TableType>>();
246    let tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
247        wasm.read_vec(TableType::read)
248    })?
249    .unwrap_or_default();
250
251    let all_tables = {
252        let mut temp = imported_tables;
253        temp.extend(tables.clone());
254        temp
255    };
256
257    while (skip_section(&mut wasm, &mut header)?).is_some() {}
258
259    let imported_memories = imports
260        .iter()
261        .filter_map(|m| match m.desc {
262            ImportDesc::Mem(mem) => Some(mem),
263            _ => None,
264        })
265        .collect::<Vec<MemType>>();
266    // let imported_memories_length = imported_memories.len();
267    let memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
268        wasm.read_vec(MemType::read)
269    })?
270    .unwrap_or_default();
271
272    let all_memories = {
273        let mut temp = imported_memories;
274        temp.extend(memories.clone());
275        temp
276    };
277    if all_memories.len() > 1 {
278        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
279    }
280
281    while (skip_section(&mut wasm, &mut header)?).is_some() {}
282
283    // we start off with the imported globals
284    let /* mut */ imported_global_types = imports
285        .iter()
286        .filter_map(|m| match m.desc {
287            ImportDesc::Global(global) => Some(global),
288            _ => None,
289        })
290        .collect::<Vec<GlobalType>>();
291    let imported_global_types_len = imported_global_types.len();
292    let globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
293        globals::validate_global_section(
294            wasm,
295            h,
296            &imported_global_types,
297            &mut validation_context_refs,
298            all_functions.len(),
299        )
300    })?
301    .unwrap_or_default();
302    let mut all_globals = Vec::new();
303    for item in imported_global_types.iter().take(imported_global_types_len) {
304        all_globals.push(Global {
305            init_expr: Span::new(usize::MAX, 0),
306            ty: *item,
307        })
308    }
309    for item in &globals {
310        all_globals.push(*item)
311    }
312
313    while (skip_section(&mut wasm, &mut header)?).is_some() {}
314
315    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
316        wasm.read_vec(Export::read)
317    })?
318    .unwrap_or_default();
319    validation_context_refs.extend(exports.iter().filter_map(
320        |Export { name: _, desc }| match *desc {
321            ExportDesc::FuncIdx(func_idx) => Some(func_idx),
322            _ => None,
323        },
324    ));
325
326    while (skip_section(&mut wasm, &mut header)?).is_some() {}
327
328    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
329        let func_idx = wasm.read_var_u32().map(|idx| idx as FuncIdx)?;
330        // start function signature must be [] -> []
331        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
332        let type_idx = *all_functions
333            .get(func_idx)
334            .ok_or(ValidationError::InvalidFuncIdx(func_idx))?;
335        if types[type_idx]
336            != (FuncType {
337                params: ResultType {
338                    valtypes: Vec::new(),
339                },
340                returns: ResultType {
341                    valtypes: Vec::new(),
342                },
343            })
344        {
345            Err(ValidationError::InvalidStartFunctionSignature)
346        } else {
347            Ok(func_idx)
348        }
349    })?;
350
351    while (skip_section(&mut wasm, &mut header)?).is_some() {}
352
353    let elements: Vec<ElemType> =
354        handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
355            ElemType::read_from_wasm(
356                wasm,
357                &all_functions,
358                &mut validation_context_refs,
359                &all_tables,
360                &imported_global_types,
361            )
362        })?
363        .unwrap_or_default();
364
365    while (skip_section(&mut wasm, &mut header)?).is_some() {}
366
367    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
368    // As per the official documentation:
369    //
370    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
371    let data_count: Option<u32> =
372        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
373            wasm.read_var_u32()
374        })?;
375    if data_count.is_some() {
376        trace!("data count: {}", data_count.unwrap());
377    }
378
379    while (skip_section(&mut wasm, &mut header)?).is_some() {}
380
381    let mut sidetable = Sidetable::new();
382    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
383        code::validate_code_section(
384            wasm,
385            h,
386            &types,
387            &all_functions,
388            imported_functions.count(),
389            &all_globals,
390            &all_memories,
391            &data_count,
392            &all_tables,
393            &elements,
394            &validation_context_refs,
395            &mut sidetable,
396        )
397    })?
398    .unwrap_or_default();
399
400    if func_blocks_stps.len() != local_functions.len() {
401        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
402    }
403
404    while (skip_section(&mut wasm, &mut header)?).is_some() {}
405
406    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
407        // wasm.read_vec(DataSegment::read)
408        data::validate_data_section(
409            wasm,
410            h,
411            &imported_global_types,
412            all_memories.len(),
413            all_functions.len(),
414        )
415    })?
416    .unwrap_or_default();
417
418    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
419    if let (Some(data_count), data_len) = (data_count, data_section.len()) {
420        if data_count as usize != data_len {
421            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
422        }
423    }
424
425    while (skip_section(&mut wasm, &mut header)?).is_some() {}
426
427    // All sections should have been handled
428    if let Some(header) = header {
429        return Err(ValidationError::SectionOutOfOrder(header.ty));
430    }
431
432    debug!("Validation was successful");
433    let validation_info = ValidationInfo {
434        wasm: wasm.into_inner(),
435        types,
436        imports,
437        functions: local_functions,
438        tables,
439        memories,
440        globals,
441        exports,
442        func_blocks_stps,
443        sidetable,
444        data: data_section,
445        start,
446        elements,
447        imports_length,
448    };
449    validate_exports(&validation_info)?;
450
451    Ok(validation_info)
452}
453
454fn read_next_header(
455    wasm: &mut WasmReader,
456    header: &mut Option<SectionHeader>,
457) -> Result<(), ValidationError> {
458    if header.is_none() && !wasm.remaining_bytes().is_empty() {
459        *header = Some(SectionHeader::read(wasm)?);
460    }
461    Ok(())
462}
463
464#[inline(always)]
465fn handle_section<T, F: FnOnce(&mut WasmReader, SectionHeader) -> Result<T, ValidationError>>(
466    wasm: &mut WasmReader,
467    header: &mut Option<SectionHeader>,
468    section_ty: SectionTy,
469    handler: F,
470) -> Result<Option<T>, ValidationError> {
471    match &header {
472        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
473            let h = header.take().unwrap();
474            trace!("Handling section {:?}", h.ty);
475            let ret = handler(wasm, h)?;
476            read_next_header(wasm, header)?;
477            Ok(Some(ret))
478        }
479        _ => Ok(None),
480    }
481}