wasm/validation/
mod.rs

1use alloc::collections::btree_set::{self, BTreeSet};
2use alloc::vec::Vec;
3
4use crate::core::indices::{FuncIdx, TypeIdx};
5use crate::core::reader::section_header::{SectionHeader, SectionTy};
6use crate::core::reader::span::Span;
7use crate::core::reader::types::data::DataSegment;
8use crate::core::reader::types::element::ElemType;
9use crate::core::reader::types::export::Export;
10use crate::core::reader::types::global::{Global, GlobalType};
11use crate::core::reader::types::import::{Import, ImportDesc};
12use crate::core::reader::types::{FuncType, MemType, ResultType, TableType};
13use crate::core::reader::{WasmReadable, WasmReader};
14use crate::core::sidetable::Sidetable;
15use crate::{ExportDesc, ValidationError};
16
17pub(crate) mod code;
18pub(crate) mod data;
19pub(crate) mod globals;
20pub(crate) mod read_constant_expression;
21pub(crate) mod validation_stack;
22
23#[derive(Clone, Debug)]
24pub(crate) struct ImportsLength {
25    pub imported_functions: usize,
26    pub imported_globals: usize,
27    pub imported_memories: usize,
28    pub imported_tables: usize,
29}
30
31/// Information collected from validating a module.
32/// This can be used to create a [crate::RuntimeInstance].
33#[derive(Clone, Debug)]
34pub struct ValidationInfo<'bytecode> {
35    pub(crate) wasm: &'bytecode [u8],
36    pub(crate) types: Vec<FuncType>,
37    pub(crate) imports: Vec<Import>,
38    pub(crate) functions: Vec<TypeIdx>,
39    pub(crate) tables: Vec<TableType>,
40    pub(crate) memories: Vec<MemType>,
41    pub(crate) globals: Vec<Global>,
42    #[allow(dead_code)]
43    pub(crate) exports: Vec<Export>,
44    /// Each block contains the validated code section and the stp corresponding to
45    /// the beginning of that code section
46    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
47    pub(crate) sidetable: Sidetable,
48    pub(crate) data: Vec<DataSegment>,
49    /// The start function which is automatically executed during instantiation
50    pub(crate) start: Option<FuncIdx>,
51    pub(crate) elements: Vec<ElemType>,
52    pub(crate) imports_length: ImportsLength,
53    // pub(crate) exports_length: Exported,
54}
55
56fn validate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
57    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
58    use crate::core::reader::types::export::ExportDesc::*;
59    for export in &validation_info.exports {
60        if found_export_names.contains(export.name.as_str()) {
61            return Err(ValidationError::DuplicateExportName);
62        }
63        found_export_names.insert(export.name.as_str());
64        match export.desc {
65            FuncIdx(func_idx) => {
66                if validation_info.functions.len()
67                    + validation_info.imports_length.imported_functions
68                    <= func_idx
69                {
70                    return Err(ValidationError::InvalidFuncIdx(func_idx));
71                }
72            }
73            TableIdx(table_idx) => {
74                if validation_info.tables.len() + validation_info.imports_length.imported_tables
75                    <= table_idx
76                {
77                    return Err(ValidationError::InvalidTableIdx(table_idx));
78                }
79            }
80            MemIdx(mem_idx) => {
81                if validation_info.memories.len() + validation_info.imports_length.imported_memories
82                    <= mem_idx
83                {
84                    return Err(ValidationError::InvalidMemIndex(mem_idx));
85                }
86            }
87            GlobalIdx(global_idx) => {
88                if validation_info.globals.len() + validation_info.imports_length.imported_globals
89                    <= global_idx
90                {
91                    return Err(ValidationError::InvalidGlobalIdx(global_idx));
92                }
93            }
94        }
95    }
96    Ok(())
97}
98
99fn get_imports_length(imports: &Vec<Import>) -> ImportsLength {
100    let mut imports_length = ImportsLength {
101        imported_functions: 0,
102        imported_globals: 0,
103        imported_memories: 0,
104        imported_tables: 0,
105    };
106
107    for import in imports {
108        match import.desc {
109            ImportDesc::Func(_) => imports_length.imported_functions += 1,
110            ImportDesc::Global(_) => imports_length.imported_globals += 1,
111            ImportDesc::Mem(_) => imports_length.imported_memories += 1,
112            ImportDesc::Table(_) => imports_length.imported_tables += 1,
113        }
114    }
115
116    imports_length
117}
118
119pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
120    let mut wasm = WasmReader::new(wasm);
121
122    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
123    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
124    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
125    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
126    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
127    // therefore this hack is acceptable.
128    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
129    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
130
131    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
132
133    trace!("Starting validation of bytecode");
134
135    trace!("Validating magic value");
136    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
137        return Err(ValidationError::InvalidMagic);
138    };
139
140    trace!("Validating version number");
141    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
142        return Err(ValidationError::InvalidBinaryFormatVersion);
143    };
144    debug!("Header ok");
145
146    let mut header = None;
147    read_next_header(&mut wasm, &mut header)?;
148
149    let skip_section = |wasm: &mut WasmReader, section_header: &mut Option<SectionHeader>| {
150        handle_section(wasm, section_header, SectionTy::Custom, |wasm, h| {
151            // customsec ::= section_0(custom)
152            // custom ::= name byte*
153            // name ::= b*:vec(byte) => name (if utf8(name) = b*)
154            // vec(B) ::= n:u32 (x:B)^n => x^n
155            let _name = wasm.read_name()?;
156
157            let remaining_bytes = h
158                .contents
159                .from()
160                .checked_add(h.contents.len())
161                .and_then(|res| res.checked_sub(wasm.pc))
162                .ok_or(ValidationError::InvalidCustomSectionLength)?;
163
164            // TODO: maybe do something with these remaining bytes?
165            let mut _bytes = Vec::new();
166            for _ in 0..remaining_bytes {
167                _bytes.push(wasm.read_u8()?)
168            }
169            Ok(())
170        })
171    };
172
173    while (skip_section(&mut wasm, &mut header)?).is_some() {}
174
175    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
176        wasm.read_vec(FuncType::read)
177    })?
178    .unwrap_or_default();
179
180    while (skip_section(&mut wasm, &mut header)?).is_some() {}
181
182    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
183        wasm.read_vec(|wasm| {
184            let import = Import::read(wasm)?;
185
186            match import.desc {
187                ImportDesc::Func(type_idx) => {
188                    types
189                        .get(type_idx)
190                        .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
191                }
192                ImportDesc::Table(_table_type) => {}
193                ImportDesc::Mem(_mem_type) => {}
194                ImportDesc::Global(_global_type) => {}
195            }
196
197            Ok(import)
198        })
199    })?
200    .unwrap_or_default();
201    let imports_length = get_imports_length(&imports);
202
203    while (skip_section(&mut wasm, &mut header)?).is_some() {}
204
205    // The `Function` section only covers module-level (or "local") functions.
206    // Imported functions have their types known in the `import` section. Both
207    // local and imported functions share the same index space.
208    //
209    // Imported functions are given priority and have the first indicies, and
210    // only after that do the local functions get assigned their indices.
211    let local_functions =
212        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
213            wasm.read_vec(|wasm| {
214                let type_idx = wasm.read_var_u32()? as usize;
215                types
216                    .get(type_idx)
217                    .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
218                Ok(type_idx)
219            })
220        })?
221        .unwrap_or_default();
222
223    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
224        ImportDesc::Func(type_idx) => Some(*type_idx),
225        _ => None,
226    });
227
228    let all_functions = imported_functions
229        .clone()
230        .chain(local_functions.iter().cloned())
231        .collect::<Vec<TypeIdx>>();
232
233    while (skip_section(&mut wasm, &mut header)?).is_some() {}
234
235    let imported_tables = imports
236        .iter()
237        .filter_map(|m| match m.desc {
238            ImportDesc::Table(table) => Some(table),
239            _ => None,
240        })
241        .collect::<Vec<TableType>>();
242    let tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
243        wasm.read_vec(TableType::read)
244    })?
245    .unwrap_or_default();
246
247    let all_tables = {
248        let mut temp = imported_tables;
249        temp.extend(tables.clone());
250        temp
251    };
252
253    while (skip_section(&mut wasm, &mut header)?).is_some() {}
254
255    let imported_memories = imports
256        .iter()
257        .filter_map(|m| match m.desc {
258            ImportDesc::Mem(mem) => Some(mem),
259            _ => None,
260        })
261        .collect::<Vec<MemType>>();
262    // let imported_memories_length = imported_memories.len();
263    let memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
264        wasm.read_vec(MemType::read)
265    })?
266    .unwrap_or_default();
267
268    let all_memories = {
269        let mut temp = imported_memories;
270        temp.extend(memories.clone());
271        temp
272    };
273    if all_memories.len() > 1 {
274        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
275    }
276
277    while (skip_section(&mut wasm, &mut header)?).is_some() {}
278
279    // we start off with the imported globals
280    let /* mut */ imported_global_types = imports
281        .iter()
282        .filter_map(|m| match m.desc {
283            ImportDesc::Global(global) => Some(global),
284            _ => None,
285        })
286        .collect::<Vec<GlobalType>>();
287    let imported_global_types_len = imported_global_types.len();
288    let globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
289        globals::validate_global_section(
290            wasm,
291            h,
292            &imported_global_types,
293            &mut validation_context_refs,
294            all_functions.len(),
295        )
296    })?
297    .unwrap_or_default();
298    let mut all_globals = Vec::new();
299    for item in imported_global_types.iter().take(imported_global_types_len) {
300        all_globals.push(Global {
301            init_expr: Span::new(usize::MAX, 0),
302            ty: *item,
303        })
304    }
305    for item in &globals {
306        all_globals.push(*item)
307    }
308
309    while (skip_section(&mut wasm, &mut header)?).is_some() {}
310
311    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
312        wasm.read_vec(Export::read)
313    })?
314    .unwrap_or_default();
315    validation_context_refs.extend(exports.iter().filter_map(
316        |Export { name: _, desc }| match *desc {
317            ExportDesc::FuncIdx(func_idx) => Some(func_idx),
318            _ => None,
319        },
320    ));
321
322    while (skip_section(&mut wasm, &mut header)?).is_some() {}
323
324    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
325        let func_idx = wasm.read_var_u32().map(|idx| idx as FuncIdx)?;
326        // start function signature must be [] -> []
327        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
328        let type_idx = *all_functions
329            .get(func_idx)
330            .ok_or(ValidationError::InvalidFuncIdx(func_idx))?;
331        if types[type_idx]
332            != (FuncType {
333                params: ResultType {
334                    valtypes: Vec::new(),
335                },
336                returns: ResultType {
337                    valtypes: Vec::new(),
338                },
339            })
340        {
341            Err(ValidationError::InvalidStartFunctionSignature)
342        } else {
343            Ok(func_idx)
344        }
345    })?;
346
347    while (skip_section(&mut wasm, &mut header)?).is_some() {}
348
349    let elements: Vec<ElemType> =
350        handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
351            ElemType::read_from_wasm(
352                wasm,
353                &all_functions,
354                &mut validation_context_refs,
355                &all_tables,
356                &imported_global_types,
357            )
358        })?
359        .unwrap_or_default();
360
361    while (skip_section(&mut wasm, &mut header)?).is_some() {}
362
363    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
364    // As per the official documentation:
365    //
366    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
367    let data_count: Option<u32> =
368        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
369            wasm.read_var_u32()
370        })?;
371    if data_count.is_some() {
372        trace!("data count: {}", data_count.unwrap());
373    }
374
375    while (skip_section(&mut wasm, &mut header)?).is_some() {}
376
377    let mut sidetable = Sidetable::new();
378    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
379        code::validate_code_section(
380            wasm,
381            h,
382            &types,
383            &all_functions,
384            imported_functions.count(),
385            &all_globals,
386            &all_memories,
387            &data_count,
388            &all_tables,
389            &elements,
390            &validation_context_refs,
391            &mut sidetable,
392        )
393    })?
394    .unwrap_or_default();
395
396    if func_blocks_stps.len() != local_functions.len() {
397        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
398    }
399
400    while (skip_section(&mut wasm, &mut header)?).is_some() {}
401
402    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
403        // wasm.read_vec(DataSegment::read)
404        data::validate_data_section(
405            wasm,
406            h,
407            &imported_global_types,
408            all_memories.len(),
409            all_functions.len(),
410        )
411    })?
412    .unwrap_or_default();
413
414    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
415    if let (Some(data_count), data_len) = (data_count, data_section.len()) {
416        if data_count as usize != data_len {
417            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
418        }
419    }
420
421    while (skip_section(&mut wasm, &mut header)?).is_some() {}
422
423    // All sections should have been handled
424    if let Some(header) = header {
425        return Err(ValidationError::SectionOutOfOrder(header.ty));
426    }
427
428    debug!("Validation was successful");
429    let validation_info = ValidationInfo {
430        wasm: wasm.into_inner(),
431        types,
432        imports,
433        functions: local_functions,
434        tables,
435        memories,
436        globals,
437        exports,
438        func_blocks_stps,
439        sidetable,
440        data: data_section,
441        start,
442        elements,
443        imports_length,
444    };
445    validate_exports(&validation_info)?;
446
447    Ok(validation_info)
448}
449
450fn read_next_header(
451    wasm: &mut WasmReader,
452    header: &mut Option<SectionHeader>,
453) -> Result<(), ValidationError> {
454    if header.is_none() && !wasm.remaining_bytes().is_empty() {
455        *header = Some(SectionHeader::read(wasm)?);
456    }
457    Ok(())
458}
459
460#[inline(always)]
461fn handle_section<T, F: FnOnce(&mut WasmReader, SectionHeader) -> Result<T, ValidationError>>(
462    wasm: &mut WasmReader,
463    header: &mut Option<SectionHeader>,
464    section_ty: SectionTy,
465    handler: F,
466) -> Result<Option<T>, ValidationError> {
467    match &header {
468        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
469            let h = header.take().unwrap();
470            trace!("Handling section {:?}", h.ty);
471            let ret = handler(wasm, h)?;
472            read_next_header(wasm, header)?;
473            Ok(Some(ret))
474        }
475        _ => Ok(None),
476    }
477}