wasm/validation/
mod.rs

1use alloc::collections::btree_set::{self, BTreeSet};
2use alloc::vec::Vec;
3
4use crate::core::indices::{FuncIdx, TypeIdx};
5use crate::core::reader::section_header::{SectionHeader, SectionTy};
6use crate::core::reader::span::Span;
7use crate::core::reader::types::data::DataSegment;
8use crate::core::reader::types::element::ElemType;
9use crate::core::reader::types::export::Export;
10use crate::core::reader::types::global::{Global, GlobalType};
11use crate::core::reader::types::import::{Import, ImportDesc};
12use crate::core::reader::types::{FuncType, MemType, ResultType, TableType};
13use crate::core::reader::{WasmReadable, WasmReader};
14use crate::core::sidetable::Sidetable;
15use crate::{ExportDesc, ValidationError};
16
17pub(crate) mod code;
18pub(crate) mod data;
19pub(crate) mod globals;
20pub(crate) mod read_constant_expression;
21pub(crate) mod validation_stack;
22
23#[derive(Clone, Debug)]
24pub(crate) struct ImportsLength {
25    pub imported_functions: usize,
26    pub imported_globals: usize,
27    pub imported_memories: usize,
28    pub imported_tables: usize,
29}
30
31/// Information collected from validating a module.
32/// This can be used to create a [crate::RuntimeInstance].
33#[derive(Clone, Debug)]
34pub struct ValidationInfo<'bytecode> {
35    pub(crate) wasm: &'bytecode [u8],
36    pub(crate) types: Vec<FuncType>,
37    pub(crate) imports: Vec<Import>,
38    pub(crate) functions: Vec<TypeIdx>,
39    pub(crate) tables: Vec<TableType>,
40    pub(crate) memories: Vec<MemType>,
41    pub(crate) globals: Vec<Global>,
42    #[allow(dead_code)]
43    pub(crate) exports: Vec<Export>,
44    /// Each block contains the validated code section and the stp corresponding to
45    /// the beginning of that code section
46    pub(crate) func_blocks_stps: Vec<(Span, usize)>,
47    pub(crate) sidetable: Sidetable,
48    pub(crate) data: Vec<DataSegment>,
49    /// The start function which is automatically executed during instantiation
50    pub(crate) start: Option<FuncIdx>,
51    pub(crate) elements: Vec<ElemType>,
52    pub(crate) imports_length: ImportsLength,
53    // pub(crate) exports_length: Exported,
54}
55
56fn validate_exports(validation_info: &ValidationInfo) -> Result<(), ValidationError> {
57    let mut found_export_names: btree_set::BTreeSet<&str> = btree_set::BTreeSet::new();
58    use crate::core::reader::types::export::ExportDesc::*;
59    for export in &validation_info.exports {
60        if found_export_names.contains(export.name.as_str()) {
61            return Err(ValidationError::DuplicateExportName);
62        }
63        found_export_names.insert(export.name.as_str());
64        match export.desc {
65            FuncIdx(func_idx) => {
66                if validation_info.functions.len()
67                    + validation_info.imports_length.imported_functions
68                    <= func_idx
69                {
70                    return Err(ValidationError::UnknownFunction);
71                }
72            }
73            TableIdx(table_idx) => {
74                if validation_info.tables.len() + validation_info.imports_length.imported_tables
75                    <= table_idx
76                {
77                    return Err(ValidationError::UnknownTable);
78                }
79            }
80            MemIdx(mem_idx) => {
81                if validation_info.memories.len() + validation_info.imports_length.imported_memories
82                    <= mem_idx
83                {
84                    return Err(ValidationError::UnknownMemory);
85                }
86            }
87            GlobalIdx(global_idx) => {
88                if validation_info.globals.len() + validation_info.imports_length.imported_globals
89                    <= global_idx
90                {
91                    return Err(ValidationError::UnknownGlobal);
92                }
93            }
94        }
95    }
96    Ok(())
97}
98
99fn get_imports_length(imports: &Vec<Import>) -> ImportsLength {
100    let mut imports_length = ImportsLength {
101        imported_functions: 0,
102        imported_globals: 0,
103        imported_memories: 0,
104        imported_tables: 0,
105    };
106
107    for import in imports {
108        match import.desc {
109            ImportDesc::Func(_) => imports_length.imported_functions += 1,
110            ImportDesc::Global(_) => imports_length.imported_globals += 1,
111            ImportDesc::Mem(_) => imports_length.imported_memories += 1,
112            ImportDesc::Table(_) => imports_length.imported_tables += 1,
113        }
114    }
115
116    imports_length
117}
118
119pub fn validate(wasm: &[u8]) -> Result<ValidationInfo<'_>, ValidationError> {
120    let mut wasm = WasmReader::new(wasm);
121
122    // represents C.refs in https://webassembly.github.io/spec/core/valid/conventions.html#context
123    // A func.ref instruction is onlv valid if it has an immediate that is a member of C.refs.
124    // this list holds all the func_idx's occurring in the module, except in its functions or start function.
125    // I make an exception here by not including func_idx's occuring within data segments in C.refs as well, so that single pass validation is possible.
126    // If there is a func_idx within the data segment, this would ultimately mean that data segment cannot be validated,
127    // therefore this hack is acceptable.
128    // https://webassembly.github.io/spec/core/valid/modules.html#data-segments
129    // https://webassembly.github.io/spec/core/valid/modules.html#valid-module
130
131    let mut validation_context_refs: BTreeSet<FuncIdx> = BTreeSet::new();
132
133    trace!("Starting validation of bytecode");
134
135    trace!("Validating magic value");
136    let [0x00, 0x61, 0x73, 0x6d] = wasm.strip_bytes::<4>()? else {
137        return Err(ValidationError::InvalidMagic);
138    };
139
140    trace!("Validating version number");
141    let [0x01, 0x00, 0x00, 0x00] = wasm.strip_bytes::<4>()? else {
142        return Err(ValidationError::InvalidVersion);
143    };
144    debug!("Header ok");
145
146    let mut header = None;
147    read_next_header(&mut wasm, &mut header)?;
148
149    let skip_section = |wasm: &mut WasmReader, section_header: &mut Option<SectionHeader>| {
150        handle_section(wasm, section_header, SectionTy::Custom, |wasm, h| {
151            use alloc::string::*;
152            // customsec ::= section_0(custom)
153            // custom ::= name byte*
154            // name ::= b*:vec(byte) => name (if utf8(name) = b*)
155            // vec(B) ::= n:u32 (x:B)^n => x^n
156            let _name = wasm.read_name()?;
157
158            let remaining_bytes = match h
159                .contents
160                .from()
161                .checked_add(h.contents.len())
162                .and_then(|res| res.checked_sub(wasm.pc))
163            {
164                None => Err(ValidationError::InvalidSection(
165                    SectionTy::Custom,
166                    "Remaining bytes less than 0 after reading name!".to_string(),
167                )),
168                Some(remaining_bytes) => Ok(remaining_bytes),
169            }?;
170
171            // TODO: maybe do something with these remaining bytes?
172            let mut _bytes = Vec::new();
173            for _ in 0..remaining_bytes {
174                _bytes.push(wasm.read_u8()?)
175            }
176            Ok(())
177        })
178    };
179
180    while (skip_section(&mut wasm, &mut header)?).is_some() {}
181
182    let types = handle_section(&mut wasm, &mut header, SectionTy::Type, |wasm, _| {
183        wasm.read_vec(FuncType::read)
184    })?
185    .unwrap_or_default();
186
187    while (skip_section(&mut wasm, &mut header)?).is_some() {}
188
189    let imports = handle_section(&mut wasm, &mut header, SectionTy::Import, |wasm, _| {
190        wasm.read_vec(|wasm| {
191            let import = Import::read(wasm)?;
192
193            match import.desc {
194                ImportDesc::Func(type_idx) => {
195                    types
196                        .get(type_idx)
197                        .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
198                }
199                ImportDesc::Table(_table_type) => {}
200                ImportDesc::Mem(_mem_type) => {}
201                ImportDesc::Global(_global_type) => {}
202            }
203
204            Ok(import)
205        })
206    })?
207    .unwrap_or_default();
208    let imports_length = get_imports_length(&imports);
209
210    while (skip_section(&mut wasm, &mut header)?).is_some() {}
211
212    // The `Function` section only covers module-level (or "local") functions.
213    // Imported functions have their types known in the `import` section. Both
214    // local and imported functions share the same index space.
215    //
216    // Imported functions are given priority and have the first indicies, and
217    // only after that do the local functions get assigned their indices.
218    let local_functions =
219        handle_section(&mut wasm, &mut header, SectionTy::Function, |wasm, _| {
220            wasm.read_vec(|wasm| {
221                let type_idx = wasm.read_var_u32()? as usize;
222                types
223                    .get(type_idx)
224                    .ok_or(ValidationError::InvalidTypeIdx(type_idx))?;
225                Ok(type_idx)
226            })
227        })?
228        .unwrap_or_default();
229
230    let imported_functions = imports.iter().filter_map(|import| match &import.desc {
231        ImportDesc::Func(type_idx) => Some(*type_idx),
232        _ => None,
233    });
234
235    let all_functions = imported_functions
236        .clone()
237        .chain(local_functions.iter().cloned())
238        .collect::<Vec<TypeIdx>>();
239
240    while (skip_section(&mut wasm, &mut header)?).is_some() {}
241
242    let imported_tables = imports
243        .iter()
244        .filter_map(|m| match m.desc {
245            ImportDesc::Table(table) => Some(table),
246            _ => None,
247        })
248        .collect::<Vec<TableType>>();
249    let tables = handle_section(&mut wasm, &mut header, SectionTy::Table, |wasm, _| {
250        wasm.read_vec(TableType::read)
251    })?
252    .unwrap_or_default();
253
254    let all_tables = {
255        let mut temp = imported_tables;
256        temp.extend(tables.clone());
257        temp
258    };
259
260    while (skip_section(&mut wasm, &mut header)?).is_some() {}
261
262    let imported_memories = imports
263        .iter()
264        .filter_map(|m| match m.desc {
265            ImportDesc::Mem(mem) => Some(mem),
266            _ => None,
267        })
268        .collect::<Vec<MemType>>();
269    // let imported_memories_length = imported_memories.len();
270    let memories = handle_section(&mut wasm, &mut header, SectionTy::Memory, |wasm, _| {
271        wasm.read_vec(MemType::read)
272    })?
273    .unwrap_or_default();
274
275    let all_memories = {
276        let mut temp = imported_memories;
277        temp.extend(memories.clone());
278        temp
279    };
280    if all_memories.len() > 1 {
281        return Err(ValidationError::UnsupportedMultipleMemoriesProposal);
282    }
283
284    while (skip_section(&mut wasm, &mut header)?).is_some() {}
285
286    // we start off with the imported globals
287    let /* mut */ imported_global_types = imports
288        .iter()
289        .filter_map(|m| match m.desc {
290            ImportDesc::Global(global) => Some(global),
291            _ => None,
292        })
293        .collect::<Vec<GlobalType>>();
294    let imported_global_types_len = imported_global_types.len();
295    let globals = handle_section(&mut wasm, &mut header, SectionTy::Global, |wasm, h| {
296        globals::validate_global_section(
297            wasm,
298            h,
299            &imported_global_types,
300            &mut validation_context_refs,
301            all_functions.len(),
302        )
303    })?
304    .unwrap_or_default();
305    let mut all_globals = Vec::new();
306    for item in imported_global_types.iter().take(imported_global_types_len) {
307        all_globals.push(Global {
308            init_expr: Span::new(usize::MAX, 0),
309            ty: *item,
310        })
311    }
312    for item in &globals {
313        all_globals.push(*item)
314    }
315
316    while (skip_section(&mut wasm, &mut header)?).is_some() {}
317
318    let exports = handle_section(&mut wasm, &mut header, SectionTy::Export, |wasm, _| {
319        wasm.read_vec(Export::read)
320    })?
321    .unwrap_or_default();
322    validation_context_refs.extend(exports.iter().filter_map(
323        |Export { name: _, desc }| match *desc {
324            ExportDesc::FuncIdx(func_idx) => Some(func_idx),
325            _ => None,
326        },
327    ));
328
329    while (skip_section(&mut wasm, &mut header)?).is_some() {}
330
331    let start = handle_section(&mut wasm, &mut header, SectionTy::Start, |wasm, _| {
332        let func_idx = wasm.read_var_u32().map(|idx| idx as FuncIdx)?;
333        // start function signature must be [] -> []
334        // https://webassembly.github.io/spec/core/valid/modules.html#start-function
335        let type_idx = *all_functions
336            .get(func_idx)
337            .ok_or(ValidationError::FunctionIsNotDefined(func_idx))?;
338        if types[type_idx]
339            != (FuncType {
340                params: ResultType {
341                    valtypes: Vec::new(),
342                },
343                returns: ResultType {
344                    valtypes: Vec::new(),
345                },
346            })
347        {
348            // TODO fix error type
349            Err(ValidationError::InvalidFuncType)
350        } else {
351            Ok(func_idx)
352        }
353    })?;
354
355    while (skip_section(&mut wasm, &mut header)?).is_some() {}
356
357    let elements: Vec<ElemType> =
358        handle_section(&mut wasm, &mut header, SectionTy::Element, |wasm, _| {
359            ElemType::read_from_wasm(
360                wasm,
361                &all_functions,
362                &mut validation_context_refs,
363                &all_tables,
364                &imported_global_types,
365            )
366        })?
367        .unwrap_or_default();
368
369    while (skip_section(&mut wasm, &mut header)?).is_some() {}
370
371    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
372    // As per the official documentation:
373    //
374    // The data count section is used to simplify single-pass validation. Since the data section occurs after the code section, the `memory.init` and `data.drop` and instructions would not be able to check whether the data segment index is valid until the data section is read. The data count section occurs before the code section, so a single-pass validator can use this count instead of deferring validation.
375    let data_count: Option<u32> =
376        handle_section(&mut wasm, &mut header, SectionTy::DataCount, |wasm, _| {
377            wasm.read_var_u32()
378        })?;
379    if data_count.is_some() {
380        trace!("data count: {}", data_count.unwrap());
381    }
382
383    while (skip_section(&mut wasm, &mut header)?).is_some() {}
384
385    let mut sidetable = Sidetable::new();
386    let func_blocks_stps = handle_section(&mut wasm, &mut header, SectionTy::Code, |wasm, h| {
387        code::validate_code_section(
388            wasm,
389            h,
390            &types,
391            &all_functions,
392            imported_functions.count(),
393            &all_globals,
394            &all_memories,
395            &data_count,
396            &all_tables,
397            &elements,
398            &validation_context_refs,
399            &mut sidetable,
400        )
401    })?
402    .unwrap_or_default();
403
404    if func_blocks_stps.len() != local_functions.len() {
405        return Err(ValidationError::FunctionAndCodeSectionsHaveDifferentLengths);
406    }
407
408    while (skip_section(&mut wasm, &mut header)?).is_some() {}
409
410    let data_section = handle_section(&mut wasm, &mut header, SectionTy::Data, |wasm, h| {
411        // wasm.read_vec(DataSegment::read)
412        data::validate_data_section(
413            wasm,
414            h,
415            &imported_global_types,
416            all_memories.len(),
417            all_functions.len(),
418        )
419    })?
420    .unwrap_or_default();
421
422    // https://webassembly.github.io/spec/core/binary/modules.html#data-count-section
423    if let (Some(data_count), data_len) = (data_count, data_section.len()) {
424        if data_count as usize != data_len {
425            return Err(ValidationError::DataCountAndDataSectionsLengthAreDifferent);
426        }
427    }
428
429    while (skip_section(&mut wasm, &mut header)?).is_some() {}
430
431    // All sections should have been handled
432    if let Some(header) = header {
433        return Err(ValidationError::SectionOutOfOrder(header.ty));
434    }
435
436    debug!("Validation was successful");
437    let validation_info = ValidationInfo {
438        wasm: wasm.into_inner(),
439        types,
440        imports,
441        functions: local_functions,
442        tables,
443        memories,
444        globals,
445        exports,
446        func_blocks_stps,
447        sidetable,
448        data: data_section,
449        start,
450        elements,
451        imports_length,
452    };
453    validate_exports(&validation_info)?;
454
455    Ok(validation_info)
456}
457
458fn read_next_header(
459    wasm: &mut WasmReader,
460    header: &mut Option<SectionHeader>,
461) -> Result<(), ValidationError> {
462    if header.is_none() && !wasm.remaining_bytes().is_empty() {
463        *header = Some(SectionHeader::read(wasm)?);
464    }
465    Ok(())
466}
467
468#[inline(always)]
469fn handle_section<T, F: FnOnce(&mut WasmReader, SectionHeader) -> Result<T, ValidationError>>(
470    wasm: &mut WasmReader,
471    header: &mut Option<SectionHeader>,
472    section_ty: SectionTy,
473    handler: F,
474) -> Result<Option<T>, ValidationError> {
475    match &header {
476        Some(SectionHeader { ty, .. }) if *ty == section_ty => {
477            let h = header.take().unwrap();
478            trace!("Handling section {:?}", h.ty);
479            let ret = handler(wasm, h)?;
480            read_next_header(wasm, header)?;
481            Ok(Some(ret))
482        }
483        _ => Ok(None),
484    }
485}