wasmtime_environ/compile/
module_environ.rs

1use crate::module::{
2    FuncRefIndex, Initializer, MemoryInitialization, MemoryInitializer, MemoryPlan, Module,
3    TablePlan, TableSegment, TableSegmentElements,
4};
5use crate::prelude::*;
6use crate::{
7    DataIndex, DefinedFuncIndex, ElemIndex, EntityIndex, EntityType, FuncIndex, GlobalIndex,
8    InitMemory, MemoryIndex, ModuleTypesBuilder, PrimaryMap, StaticMemoryInitializer, TableIndex,
9    TableInitialValue, Tunables, TypeConvert, TypeIndex, Unsigned, WasmError, WasmHeapType,
10    WasmResult, WasmValType, WasmparserTypeConverter,
11};
12use anyhow::{bail, Result};
13use cranelift_entity::packed_option::ReservedValue;
14use std::borrow::Cow;
15use std::collections::HashMap;
16use std::mem;
17use std::path::PathBuf;
18use std::sync::Arc;
19use wasmparser::{
20    types::Types, CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind,
21    FuncToValidate, FunctionBody, KnownCustom, NameSectionReader, Naming, Parser, Payload, TypeRef,
22    Validator, ValidatorResources,
23};
24use wasmtime_types::{ConstExpr, ConstOp, ModuleInternedTypeIndex, SizeOverflow, WasmHeapTopType};
25
26/// Object containing the standalone environment information.
27pub struct ModuleEnvironment<'a, 'data> {
28    /// The current module being translated
29    result: ModuleTranslation<'data>,
30
31    /// Intern'd types for this entire translation, shared by all modules.
32    types: &'a mut ModuleTypesBuilder,
33
34    // Various bits and pieces of configuration
35    validator: &'a mut Validator,
36    tunables: &'a Tunables,
37}
38
39/// The result of translating via `ModuleEnvironment`. Function bodies are not
40/// yet translated, and data initializers have not yet been copied out of the
41/// original buffer.
42#[derive(Default)]
43pub struct ModuleTranslation<'data> {
44    /// Module information.
45    pub module: Module,
46
47    /// The input wasm binary.
48    ///
49    /// This can be useful, for example, when modules are parsed from a
50    /// component and the embedder wants access to the raw wasm modules
51    /// themselves.
52    pub wasm: &'data [u8],
53
54    /// References to the function bodies.
55    pub function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
56
57    /// A list of type signatures which are considered exported from this
58    /// module, or those that can possibly be called. This list is sorted, and
59    /// trampolines for each of these signatures are required.
60    pub exported_signatures: Vec<ModuleInternedTypeIndex>,
61
62    /// DWARF debug information, if enabled, parsed from the module.
63    pub debuginfo: DebugInfoData<'data>,
64
65    /// Set if debuginfo was found but it was not parsed due to `Tunables`
66    /// configuration.
67    pub has_unparsed_debuginfo: bool,
68
69    /// List of data segments found in this module which should be concatenated
70    /// together for the final compiled artifact.
71    ///
72    /// These data segments, when concatenated, are indexed by the
73    /// `MemoryInitializer` type.
74    pub data: Vec<Cow<'data, [u8]>>,
75
76    /// The desired alignment of `data` in the final data section of the object
77    /// file that we'll emit.
78    ///
79    /// Note that this is 1 by default but `MemoryInitialization::Static` might
80    /// switch this to a higher alignment to facilitate mmap-ing data from
81    /// an object file into a linear memory.
82    pub data_align: Option<u64>,
83
84    /// Total size of all data pushed onto `data` so far.
85    total_data: u32,
86
87    /// List of passive element segments found in this module which will get
88    /// concatenated for the final artifact.
89    pub passive_data: Vec<&'data [u8]>,
90
91    /// Total size of all passive data pushed into `passive_data` so far.
92    total_passive_data: u32,
93
94    /// When we're parsing the code section this will be incremented so we know
95    /// which function is currently being defined.
96    code_index: u32,
97
98    /// The type information of the current module made available at the end of the
99    /// validation process.
100    types: Option<Types>,
101}
102
103impl<'data> ModuleTranslation<'data> {
104    /// Returns a reference to the type information of the current module.
105    pub fn get_types(&self) -> &Types {
106        self.types
107            .as_ref()
108            .expect("module type information to be available")
109    }
110}
111
112/// Contains function data: byte code and its offset in the module.
113pub struct FunctionBodyData<'a> {
114    /// The body of the function, containing code and locals.
115    pub body: FunctionBody<'a>,
116    /// Validator for the function body
117    pub validator: FuncToValidate<ValidatorResources>,
118}
119
120#[derive(Debug, Default)]
121#[allow(missing_docs)]
122pub struct DebugInfoData<'a> {
123    pub dwarf: Dwarf<'a>,
124    pub name_section: NameSection<'a>,
125    pub wasm_file: WasmFileInfo,
126    pub debug_loc: gimli::DebugLoc<Reader<'a>>,
127    pub debug_loclists: gimli::DebugLocLists<Reader<'a>>,
128    pub debug_ranges: gimli::DebugRanges<Reader<'a>>,
129    pub debug_rnglists: gimli::DebugRngLists<Reader<'a>>,
130    pub debug_cu_index: gimli::DebugCuIndex<Reader<'a>>,
131    pub debug_tu_index: gimli::DebugTuIndex<Reader<'a>>,
132}
133
134#[allow(missing_docs)]
135pub type Dwarf<'input> = gimli::Dwarf<Reader<'input>>;
136
137type Reader<'input> = gimli::EndianSlice<'input, gimli::LittleEndian>;
138
139#[derive(Debug, Default)]
140#[allow(missing_docs)]
141pub struct NameSection<'a> {
142    pub module_name: Option<&'a str>,
143    pub func_names: HashMap<FuncIndex, &'a str>,
144    pub locals_names: HashMap<FuncIndex, HashMap<u32, &'a str>>,
145}
146
147#[derive(Debug, Default)]
148#[allow(missing_docs)]
149pub struct WasmFileInfo {
150    pub path: Option<PathBuf>,
151    pub code_section_offset: u64,
152    pub imported_func_count: u32,
153    pub funcs: Vec<FunctionMetadata>,
154}
155
156#[derive(Debug)]
157#[allow(missing_docs)]
158pub struct FunctionMetadata {
159    pub params: Box<[WasmValType]>,
160    pub locals: Box<[(u32, WasmValType)]>,
161}
162
163impl<'a, 'data> ModuleEnvironment<'a, 'data> {
164    /// Allocates the environment data structures.
165    pub fn new(
166        tunables: &'a Tunables,
167        validator: &'a mut Validator,
168        types: &'a mut ModuleTypesBuilder,
169    ) -> Self {
170        Self {
171            result: ModuleTranslation::default(),
172            types,
173            tunables,
174            validator,
175        }
176    }
177
178    /// Translate a wasm module using this environment.
179    ///
180    /// This function will translate the `data` provided with `parser`,
181    /// validating everything along the way with this environment's validator.
182    ///
183    /// The result of translation, [`ModuleTranslation`], contains everything
184    /// necessary to compile functions afterwards as well as learn type
185    /// information about the module at runtime.
186    pub fn translate(
187        mut self,
188        parser: Parser,
189        data: &'data [u8],
190    ) -> Result<ModuleTranslation<'data>> {
191        self.result.wasm = data;
192
193        for payload in parser.parse_all(data) {
194            self.translate_payload(payload?)?;
195        }
196
197        Ok(self.result)
198    }
199
200    fn translate_payload(&mut self, payload: Payload<'data>) -> Result<()> {
201        match payload {
202            Payload::Version {
203                num,
204                encoding,
205                range,
206            } => {
207                self.validator.version(num, encoding, &range)?;
208                match encoding {
209                    Encoding::Module => {}
210                    Encoding::Component => {
211                        bail!("expected a WebAssembly module but was given a WebAssembly component")
212                    }
213                }
214            }
215
216            Payload::End(offset) => {
217                self.result.types = Some(self.validator.end(offset)?);
218
219                // With the `escaped_funcs` set of functions finished
220                // we can calculate the set of signatures that are exported as
221                // the set of exported functions' signatures.
222                self.result.exported_signatures = self
223                    .result
224                    .module
225                    .functions
226                    .iter()
227                    .filter_map(|(_, func)| {
228                        if func.is_escaping() {
229                            Some(func.signature)
230                        } else {
231                            None
232                        }
233                    })
234                    .collect();
235                self.result.exported_signatures.sort_unstable();
236                self.result.exported_signatures.dedup();
237            }
238
239            Payload::TypeSection(types) => {
240                self.validator.type_section(&types)?;
241
242                let count = self.validator.types(0).unwrap().core_type_count();
243                log::trace!("interning {count} Wasm types");
244
245                let capacity = usize::try_from(count).unwrap();
246                self.result.module.types.reserve(capacity);
247                self.types.reserve_wasm_signatures(capacity);
248
249                // Iterate over each *rec group* -- not type -- defined in the
250                // types section. Rec groups are the unit of canonicalization
251                // and therefore the unit at which we need to process at a
252                // time. `wasmparser` has already done the hard work of
253                // de-duplicating and canonicalizing the rec groups within the
254                // module for us, we just need to translate them into our data
255                // structures. Note that, if the Wasm defines duplicate rec
256                // groups, we need copy the duplicates over (shallowly) as well,
257                // so that our types index space doesn't have holes.
258                let mut type_index = 0;
259                while type_index < count {
260                    let validator_types = self.validator.types(0).unwrap();
261
262                    // Get the rec group for the current type index, which is
263                    // always the first type defined in a rec group.
264                    log::trace!("looking up wasmparser type for index {type_index}");
265                    let core_type_id = validator_types.core_type_at(type_index).unwrap_sub();
266                    log::trace!(
267                        "  --> {core_type_id:?} = {:?}",
268                        validator_types[core_type_id],
269                    );
270                    let rec_group_id = validator_types.rec_group_id_of(core_type_id);
271                    debug_assert_eq!(
272                        validator_types
273                            .rec_group_elements(rec_group_id)
274                            .position(|id| id == core_type_id),
275                        Some(0)
276                    );
277
278                    // Intern the rec group and then fill in this module's types
279                    // index space.
280                    let interned = self.types.intern_rec_group(
281                        &self.result.module,
282                        validator_types,
283                        rec_group_id,
284                    )?;
285                    let elems = self.types.rec_group_elements(interned);
286                    let len = elems.len();
287                    self.result.module.types.reserve(len);
288                    for ty in elems {
289                        self.result.module.types.push(ty);
290                    }
291
292                    // Advance `type_index` to the start of the next rec group.
293                    type_index += u32::try_from(len).unwrap();
294                }
295            }
296
297            Payload::ImportSection(imports) => {
298                self.validator.import_section(&imports)?;
299
300                let cnt = usize::try_from(imports.count()).unwrap();
301                self.result.module.initializers.reserve(cnt);
302
303                for entry in imports {
304                    let import = entry?;
305                    let ty = match import.ty {
306                        TypeRef::Func(index) => {
307                            let index = TypeIndex::from_u32(index);
308                            let interned_index = self.result.module.types[index];
309                            self.result.module.num_imported_funcs += 1;
310                            self.result.debuginfo.wasm_file.imported_func_count += 1;
311                            EntityType::Function(wasmtime_types::EngineOrModuleTypeIndex::Module(
312                                interned_index,
313                            ))
314                        }
315                        TypeRef::Memory(ty) => {
316                            self.result.module.num_imported_memories += 1;
317                            EntityType::Memory(ty.into())
318                        }
319                        TypeRef::Global(ty) => {
320                            self.result.module.num_imported_globals += 1;
321                            EntityType::Global(self.convert_global_type(&ty))
322                        }
323                        TypeRef::Table(ty) => {
324                            self.result.module.num_imported_tables += 1;
325                            EntityType::Table(self.convert_table_type(&ty)?)
326                        }
327
328                        // doesn't get past validation
329                        TypeRef::Tag(_) => unreachable!(),
330                    };
331                    self.declare_import(import.module, import.name, ty);
332                }
333            }
334
335            Payload::FunctionSection(functions) => {
336                self.validator.function_section(&functions)?;
337
338                let cnt = usize::try_from(functions.count()).unwrap();
339                self.result.module.functions.reserve_exact(cnt);
340
341                for entry in functions {
342                    let sigindex = entry?;
343                    let ty = TypeIndex::from_u32(sigindex);
344                    let interned_index = self.result.module.types[ty];
345                    self.result.module.push_function(interned_index);
346                }
347            }
348
349            Payload::TableSection(tables) => {
350                self.validator.table_section(&tables)?;
351                let cnt = usize::try_from(tables.count()).unwrap();
352                self.result.module.table_plans.reserve_exact(cnt);
353
354                for entry in tables {
355                    let wasmparser::Table { ty, init } = entry?;
356                    let table = self.convert_table_type(&ty)?;
357                    let plan = TablePlan::for_table(table, &self.tunables);
358                    self.result.module.table_plans.push(plan);
359                    let init = match init {
360                        wasmparser::TableInit::RefNull => TableInitialValue::Null {
361                            precomputed: Vec::new(),
362                        },
363                        wasmparser::TableInit::Expr(expr) => {
364                            let (init, escaped) = ConstExpr::from_wasmparser(expr)?;
365                            for f in escaped {
366                                self.flag_func_escaped(f);
367                            }
368                            TableInitialValue::Expr(init)
369                        }
370                    };
371                    self.result
372                        .module
373                        .table_initialization
374                        .initial_values
375                        .push(init);
376                }
377            }
378
379            Payload::MemorySection(memories) => {
380                self.validator.memory_section(&memories)?;
381
382                let cnt = usize::try_from(memories.count()).unwrap();
383                self.result.module.memory_plans.reserve_exact(cnt);
384
385                for entry in memories {
386                    let memory = entry?;
387                    let plan = MemoryPlan::for_memory(memory.into(), &self.tunables);
388                    self.result.module.memory_plans.push(plan);
389                }
390            }
391
392            Payload::TagSection(tags) => {
393                self.validator.tag_section(&tags)?;
394
395                // This feature isn't enabled at this time, so we should
396                // never get here.
397                unreachable!();
398            }
399
400            Payload::GlobalSection(globals) => {
401                self.validator.global_section(&globals)?;
402
403                let cnt = usize::try_from(globals.count()).unwrap();
404                self.result.module.globals.reserve_exact(cnt);
405
406                for entry in globals {
407                    let wasmparser::Global { ty, init_expr } = entry?;
408                    let (initializer, escaped) = ConstExpr::from_wasmparser(init_expr)?;
409                    for f in escaped {
410                        self.flag_func_escaped(f);
411                    }
412                    let ty = self.convert_global_type(&ty);
413                    self.result.module.globals.push(ty);
414                    self.result.module.global_initializers.push(initializer);
415                }
416            }
417
418            Payload::ExportSection(exports) => {
419                self.validator.export_section(&exports)?;
420
421                let cnt = usize::try_from(exports.count()).unwrap();
422                self.result.module.exports.reserve(cnt);
423
424                for entry in exports {
425                    let wasmparser::Export { name, kind, index } = entry?;
426                    let entity = match kind {
427                        ExternalKind::Func => {
428                            let index = FuncIndex::from_u32(index);
429                            self.flag_func_escaped(index);
430                            EntityIndex::Function(index)
431                        }
432                        ExternalKind::Table => EntityIndex::Table(TableIndex::from_u32(index)),
433                        ExternalKind::Memory => EntityIndex::Memory(MemoryIndex::from_u32(index)),
434                        ExternalKind::Global => EntityIndex::Global(GlobalIndex::from_u32(index)),
435
436                        // this never gets past validation
437                        ExternalKind::Tag => unreachable!(),
438                    };
439                    self.result
440                        .module
441                        .exports
442                        .insert(String::from(name), entity);
443                }
444            }
445
446            Payload::StartSection { func, range } => {
447                self.validator.start_section(func, &range)?;
448
449                let func_index = FuncIndex::from_u32(func);
450                self.flag_func_escaped(func_index);
451                debug_assert!(self.result.module.start_func.is_none());
452                self.result.module.start_func = Some(func_index);
453            }
454
455            Payload::ElementSection(elements) => {
456                self.validator.element_section(&elements)?;
457
458                for (index, entry) in elements.into_iter().enumerate() {
459                    let wasmparser::Element {
460                        kind,
461                        items,
462                        range: _,
463                    } = entry?;
464
465                    // Build up a list of `FuncIndex` corresponding to all the
466                    // entries listed in this segment. Note that it's not
467                    // possible to create anything other than a `ref.null
468                    // extern` for externref segments, so those just get
469                    // translated to the reserved value of `FuncIndex`.
470                    let elements = match items {
471                        ElementItems::Functions(funcs) => {
472                            let mut elems =
473                                Vec::with_capacity(usize::try_from(funcs.count()).unwrap());
474                            for func in funcs {
475                                let func = FuncIndex::from_u32(func?);
476                                self.flag_func_escaped(func);
477                                elems.push(func);
478                            }
479                            TableSegmentElements::Functions(elems.into())
480                        }
481                        ElementItems::Expressions(_ty, items) => {
482                            let mut exprs =
483                                Vec::with_capacity(usize::try_from(items.count()).unwrap());
484                            for expr in items {
485                                let (expr, escaped) = ConstExpr::from_wasmparser(expr?)?;
486                                exprs.push(expr);
487                                for func in escaped {
488                                    self.flag_func_escaped(func);
489                                }
490                            }
491                            TableSegmentElements::Expressions(exprs.into())
492                        }
493                    };
494
495                    match kind {
496                        ElementKind::Active {
497                            table_index,
498                            offset_expr,
499                        } => {
500                            let table_index = TableIndex::from_u32(table_index.unwrap_or(0));
501                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
502                            debug_assert!(escaped.is_empty());
503
504                            self.result
505                                .module
506                                .table_initialization
507                                .segments
508                                .push(TableSegment {
509                                    table_index,
510                                    offset,
511                                    elements: elements.into(),
512                                });
513                        }
514
515                        ElementKind::Passive => {
516                            let elem_index = ElemIndex::from_u32(index as u32);
517                            let index = self.result.module.passive_elements.len();
518                            self.result.module.passive_elements.push(elements.into());
519                            self.result
520                                .module
521                                .passive_elements_map
522                                .insert(elem_index, index);
523                        }
524
525                        ElementKind::Declared => {}
526                    }
527                }
528            }
529
530            Payload::CodeSectionStart { count, range, .. } => {
531                self.validator.code_section_start(count, &range)?;
532                let cnt = usize::try_from(count).unwrap();
533                self.result.function_body_inputs.reserve_exact(cnt);
534                self.result.debuginfo.wasm_file.code_section_offset = range.start as u64;
535            }
536
537            Payload::CodeSectionEntry(body) => {
538                let validator = self.validator.code_section_entry(&body)?;
539                let func_index =
540                    self.result.code_index + self.result.module.num_imported_funcs as u32;
541                let func_index = FuncIndex::from_u32(func_index);
542
543                if self.tunables.generate_native_debuginfo {
544                    let sig_index = self.result.module.functions[func_index].signature;
545                    let sig = self.types[sig_index].unwrap_func();
546                    let mut locals = Vec::new();
547                    for pair in body.get_locals_reader()? {
548                        let (cnt, ty) = pair?;
549                        let ty = self.convert_valtype(ty);
550                        locals.push((cnt, ty));
551                    }
552                    self.result
553                        .debuginfo
554                        .wasm_file
555                        .funcs
556                        .push(FunctionMetadata {
557                            locals: locals.into_boxed_slice(),
558                            params: sig.params().into(),
559                        });
560                }
561                self.result
562                    .function_body_inputs
563                    .push(FunctionBodyData { validator, body });
564                self.result.code_index += 1;
565            }
566
567            Payload::DataSection(data) => {
568                self.validator.data_section(&data)?;
569
570                let initializers = match &mut self.result.module.memory_initialization {
571                    MemoryInitialization::Segmented(i) => i,
572                    _ => unreachable!(),
573                };
574
575                let cnt = usize::try_from(data.count()).unwrap();
576                initializers.reserve_exact(cnt);
577                self.result.data.reserve_exact(cnt);
578
579                for (index, entry) in data.into_iter().enumerate() {
580                    let wasmparser::Data {
581                        kind,
582                        data,
583                        range: _,
584                    } = entry?;
585                    let mk_range = |total: &mut u32| -> Result<_, WasmError> {
586                        let range = u32::try_from(data.len())
587                            .ok()
588                            .and_then(|size| {
589                                let start = *total;
590                                let end = start.checked_add(size)?;
591                                Some(start..end)
592                            })
593                            .ok_or_else(|| {
594                                WasmError::Unsupported(format!(
595                                    "more than 4 gigabytes of data in wasm module",
596                                ))
597                            })?;
598                        *total += range.end - range.start;
599                        Ok(range)
600                    };
601                    match kind {
602                        DataKind::Active {
603                            memory_index,
604                            offset_expr,
605                        } => {
606                            let range = mk_range(&mut self.result.total_data)?;
607                            let memory_index = MemoryIndex::from_u32(memory_index);
608                            let (offset, escaped) = ConstExpr::from_wasmparser(offset_expr)?;
609                            debug_assert!(escaped.is_empty());
610
611                            initializers.push(MemoryInitializer {
612                                memory_index,
613                                offset,
614                                data: range,
615                            });
616                            self.result.data.push(data.into());
617                        }
618                        DataKind::Passive => {
619                            let data_index = DataIndex::from_u32(index as u32);
620                            let range = mk_range(&mut self.result.total_passive_data)?;
621                            self.result.passive_data.push(data);
622                            self.result
623                                .module
624                                .passive_data_map
625                                .insert(data_index, range);
626                        }
627                    }
628                }
629            }
630
631            Payload::DataCountSection { count, range } => {
632                self.validator.data_count_section(count, &range)?;
633
634                // Note: the count passed in here is the *total* segment count
635                // There is no way to reserve for just the passive segments as
636                // they are discovered when iterating the data section entries
637                // Given that the total segment count might be much larger than
638                // the passive count, do not reserve anything here.
639            }
640
641            Payload::CustomSection(s)
642                if s.name() == "webidl-bindings" || s.name() == "wasm-interface-types" =>
643            {
644                bail!(
645                    "\
646Support for interface types has temporarily been removed from `wasmtime`.
647
648For more information about this temporary change you can read on the issue online:
649
650    https://github.com/bytecodealliance/wasmtime/issues/1271
651
652and for re-adding support for interface types you can see this issue:
653
654    https://github.com/bytecodealliance/wasmtime/issues/677
655"
656                )
657            }
658
659            Payload::CustomSection(s) => {
660                self.register_custom_section(&s);
661            }
662
663            // It's expected that validation will probably reject other
664            // payloads such as `UnknownSection` or those related to the
665            // component model. If, however, something gets past validation then
666            // that's a bug in Wasmtime as we forgot to implement something.
667            other => {
668                self.validator.payload(&other)?;
669                panic!("unimplemented section in wasm file {other:?}");
670            }
671        }
672        Ok(())
673    }
674
675    fn register_custom_section(&mut self, section: &CustomSectionReader<'data>) {
676        match section.as_known() {
677            KnownCustom::Name(name) => {
678                let result = self.name_section(name);
679                if let Err(e) = result {
680                    log::warn!("failed to parse name section {:?}", e);
681                }
682            }
683            _ => {
684                let name = section.name().trim_end_matches(".dwo");
685                if name.starts_with(".debug_") {
686                    self.dwarf_section(name, section);
687                }
688            }
689        }
690    }
691
692    fn dwarf_section(&mut self, name: &str, section: &CustomSectionReader<'data>) {
693        if !self.tunables.generate_native_debuginfo && !self.tunables.parse_wasm_debuginfo {
694            self.result.has_unparsed_debuginfo = true;
695            return;
696        }
697        let info = &mut self.result.debuginfo;
698        let dwarf = &mut info.dwarf;
699        let endian = gimli::LittleEndian;
700        let data = section.data();
701        let slice = gimli::EndianSlice::new(data, endian);
702
703        match name {
704            // `gimli::Dwarf` fields.
705            ".debug_abbrev" => dwarf.debug_abbrev = gimli::DebugAbbrev::new(data, endian),
706            ".debug_addr" => dwarf.debug_addr = gimli::DebugAddr::from(slice),
707            ".debug_info" => {
708                dwarf.debug_info = gimli::DebugInfo::new(data, endian);
709            }
710            ".debug_line" => dwarf.debug_line = gimli::DebugLine::new(data, endian),
711            ".debug_line_str" => dwarf.debug_line_str = gimli::DebugLineStr::from(slice),
712            ".debug_str" => dwarf.debug_str = gimli::DebugStr::new(data, endian),
713            ".debug_str_offsets" => dwarf.debug_str_offsets = gimli::DebugStrOffsets::from(slice),
714            ".debug_str_sup" => {
715                let mut dwarf_sup: Dwarf<'data> = Default::default();
716                dwarf_sup.debug_str = gimli::DebugStr::from(slice);
717                dwarf.sup = Some(Arc::new(dwarf_sup));
718            }
719            ".debug_types" => dwarf.debug_types = gimli::DebugTypes::from(slice),
720
721            // Additional fields.
722            ".debug_loc" => info.debug_loc = gimli::DebugLoc::from(slice),
723            ".debug_loclists" => info.debug_loclists = gimli::DebugLocLists::from(slice),
724            ".debug_ranges" => info.debug_ranges = gimli::DebugRanges::new(data, endian),
725            ".debug_rnglists" => info.debug_rnglists = gimli::DebugRngLists::new(data, endian),
726
727            // DWARF package fields
728            ".debug_cu_index" => info.debug_cu_index = gimli::DebugCuIndex::new(data, endian),
729            ".debug_tu_index" => info.debug_tu_index = gimli::DebugTuIndex::new(data, endian),
730
731            // We don't use these at the moment.
732            ".debug_aranges" | ".debug_pubnames" | ".debug_pubtypes" => return,
733            other => {
734                log::warn!("unknown debug section `{}`", other);
735                return;
736            }
737        }
738
739        dwarf.ranges = gimli::RangeLists::new(info.debug_ranges, info.debug_rnglists);
740        dwarf.locations = gimli::LocationLists::new(info.debug_loc, info.debug_loclists);
741    }
742
743    /// Declares a new import with the `module` and `field` names, importing the
744    /// `ty` specified.
745    ///
746    /// Note that this method is somewhat tricky due to the implementation of
747    /// the module linking proposal. In the module linking proposal two-level
748    /// imports are recast as single-level imports of instances. That recasting
749    /// happens here by recording an import of an instance for the first time
750    /// we see a two-level import.
751    ///
752    /// When the module linking proposal is disabled, however, disregard this
753    /// logic and instead work directly with two-level imports since no
754    /// instances are defined.
755    fn declare_import(&mut self, module: &'data str, field: &'data str, ty: EntityType) {
756        let index = self.push_type(ty);
757        self.result.module.initializers.push(Initializer::Import {
758            name: module.to_owned(),
759            field: field.to_owned(),
760            index,
761        });
762    }
763
764    fn push_type(&mut self, ty: EntityType) -> EntityIndex {
765        match ty {
766            EntityType::Function(ty) => EntityIndex::Function({
767                let func_index = self
768                    .result
769                    .module
770                    .push_function(ty.unwrap_module_type_index());
771                // Imported functions can escape; in fact, they've already done
772                // so to get here.
773                self.flag_func_escaped(func_index);
774                func_index
775            }),
776            EntityType::Table(ty) => {
777                let plan = TablePlan::for_table(ty, &self.tunables);
778                EntityIndex::Table(self.result.module.table_plans.push(plan))
779            }
780            EntityType::Memory(ty) => {
781                let plan = MemoryPlan::for_memory(ty, &self.tunables);
782                EntityIndex::Memory(self.result.module.memory_plans.push(plan))
783            }
784            EntityType::Global(ty) => EntityIndex::Global(self.result.module.globals.push(ty)),
785            EntityType::Tag(_) => unimplemented!(),
786        }
787    }
788
789    fn flag_func_escaped(&mut self, func: FuncIndex) {
790        let ty = &mut self.result.module.functions[func];
791        // If this was already assigned a funcref index no need to re-assign it.
792        if ty.is_escaping() {
793            return;
794        }
795        let index = self.result.module.num_escaped_funcs as u32;
796        ty.func_ref = FuncRefIndex::from_u32(index);
797        self.result.module.num_escaped_funcs += 1;
798    }
799
800    /// Parses the Name section of the wasm module.
801    fn name_section(&mut self, names: NameSectionReader<'data>) -> WasmResult<()> {
802        for subsection in names {
803            match subsection? {
804                wasmparser::Name::Function(names) => {
805                    for name in names {
806                        let Naming { index, name } = name?;
807                        // Skip this naming if it's naming a function that
808                        // doesn't actually exist.
809                        if (index as usize) >= self.result.module.functions.len() {
810                            continue;
811                        }
812
813                        // Store the name unconditionally, regardless of
814                        // whether we're parsing debuginfo, since function
815                        // names are almost always present in the
816                        // final compilation artifact.
817                        let index = FuncIndex::from_u32(index);
818                        self.result
819                            .debuginfo
820                            .name_section
821                            .func_names
822                            .insert(index, name);
823                    }
824                }
825                wasmparser::Name::Module { name, .. } => {
826                    self.result.module.name = Some(name.to_string());
827                    if self.tunables.generate_native_debuginfo {
828                        self.result.debuginfo.name_section.module_name = Some(name);
829                    }
830                }
831                wasmparser::Name::Local(reader) => {
832                    if !self.tunables.generate_native_debuginfo {
833                        continue;
834                    }
835                    for f in reader {
836                        let f = f?;
837                        // Skip this naming if it's naming a function that
838                        // doesn't actually exist.
839                        if (f.index as usize) >= self.result.module.functions.len() {
840                            continue;
841                        }
842                        for name in f.names {
843                            let Naming { index, name } = name?;
844
845                            self.result
846                                .debuginfo
847                                .name_section
848                                .locals_names
849                                .entry(FuncIndex::from_u32(f.index))
850                                .or_insert(HashMap::new())
851                                .insert(index, name);
852                        }
853                    }
854                }
855                wasmparser::Name::Label(_)
856                | wasmparser::Name::Type(_)
857                | wasmparser::Name::Table(_)
858                | wasmparser::Name::Global(_)
859                | wasmparser::Name::Memory(_)
860                | wasmparser::Name::Element(_)
861                | wasmparser::Name::Data(_)
862                | wasmparser::Name::Tag(_)
863                | wasmparser::Name::Field(_)
864                | wasmparser::Name::Unknown { .. } => {}
865            }
866        }
867        Ok(())
868    }
869}
870
871impl TypeConvert for ModuleEnvironment<'_, '_> {
872    fn lookup_heap_type(&self, index: wasmparser::UnpackedIndex) -> WasmHeapType {
873        WasmparserTypeConverter::new(&self.types, &self.result.module).lookup_heap_type(index)
874    }
875
876    fn lookup_type_index(
877        &self,
878        index: wasmparser::UnpackedIndex,
879    ) -> wasmtime_types::EngineOrModuleTypeIndex {
880        WasmparserTypeConverter::new(&self.types, &self.result.module).lookup_type_index(index)
881    }
882}
883
884impl ModuleTranslation<'_> {
885    /// Attempts to convert segmented memory initialization into static
886    /// initialization for the module that this translation represents.
887    ///
888    /// If this module's memory initialization is not compatible with paged
889    /// initialization then this won't change anything. Otherwise if it is
890    /// compatible then the `memory_initialization` field will be updated.
891    ///
892    /// Takes a `page_size` argument in order to ensure that all
893    /// initialization is page-aligned for mmap-ability, and
894    /// `max_image_size_always_allowed` to control how we decide
895    /// whether to use static init.
896    ///
897    /// We will try to avoid generating very sparse images, which are
898    /// possible if e.g. a module has an initializer at offset 0 and a
899    /// very high offset (say, 1 GiB). To avoid this, we use a dual
900    /// condition: we always allow images less than
901    /// `max_image_size_always_allowed`, and the embedder of Wasmtime
902    /// can set this if desired to ensure that static init should
903    /// always be done if the size of the module or its heaps is
904    /// otherwise bounded by the system. We also allow images with
905    /// static init data bigger than that, but only if it is "dense",
906    /// defined as having at least half (50%) of its pages with some
907    /// data.
908    ///
909    /// We could do something slightly better by building a dense part
910    /// and keeping a sparse list of outlier/leftover segments (see
911    /// issue #3820). This would also allow mostly-static init of
912    /// modules that have some dynamically-placed data segments. But,
913    /// for now, this is sufficient to allow a system that "knows what
914    /// it's doing" to always get static init.
915    pub fn try_static_init(&mut self, page_size: u64, max_image_size_always_allowed: u64) {
916        // This method only attempts to transform a `Segmented` memory init
917        // into a `Static` one, no other state.
918        if !self.module.memory_initialization.is_segmented() {
919            return;
920        }
921
922        // First a dry run of memory initialization is performed. This
923        // collects information about the extent of memory initialized for each
924        // memory as well as the size of all data segments being copied in.
925        struct Memory {
926            data_size: u64,
927            min_addr: u64,
928            max_addr: u64,
929            // The `usize` here is a pointer into `self.data` which is the list
930            // of data segments corresponding to what was found in the original
931            // wasm module.
932            segments: Vec<(usize, StaticMemoryInitializer)>,
933        }
934        let mut info = PrimaryMap::with_capacity(self.module.memory_plans.len());
935        for _ in 0..self.module.memory_plans.len() {
936            info.push(Memory {
937                data_size: 0,
938                min_addr: u64::MAX,
939                max_addr: 0,
940                segments: Vec::new(),
941            });
942        }
943
944        struct InitMemoryAtCompileTime<'a> {
945            module: &'a Module,
946            info: &'a mut PrimaryMap<MemoryIndex, Memory>,
947            idx: usize,
948        }
949        impl InitMemory for InitMemoryAtCompileTime<'_> {
950            fn memory_size_in_bytes(
951                &mut self,
952                memory_index: MemoryIndex,
953            ) -> Result<u64, SizeOverflow> {
954                self.module.memory_plans[memory_index]
955                    .memory
956                    .minimum_byte_size()
957            }
958
959            fn eval_offset(&mut self, memory_index: MemoryIndex, expr: &ConstExpr) -> Option<u64> {
960                let mem64 = self.module.memory_plans[memory_index].memory.memory64;
961                match expr.ops() {
962                    &[ConstOp::I32Const(offset)] if !mem64 => Some(offset.unsigned().into()),
963                    &[ConstOp::I64Const(offset)] if mem64 => Some(offset.unsigned()),
964                    _ => None,
965                }
966            }
967
968            fn write(&mut self, memory: MemoryIndex, init: &StaticMemoryInitializer) -> bool {
969                // Currently `Static` only applies to locally-defined memories,
970                // so if a data segment references an imported memory then
971                // transitioning to a `Static` memory initializer is not
972                // possible.
973                if self.module.defined_memory_index(memory).is_none() {
974                    return false;
975                };
976                let info = &mut self.info[memory];
977                let data_len = u64::from(init.data.end - init.data.start);
978                if data_len > 0 {
979                    info.data_size += data_len;
980                    info.min_addr = info.min_addr.min(init.offset);
981                    info.max_addr = info.max_addr.max(init.offset + data_len);
982                    info.segments.push((self.idx, init.clone()));
983                }
984                self.idx += 1;
985                true
986            }
987        }
988        let ok = self
989            .module
990            .memory_initialization
991            .init_memory(&mut InitMemoryAtCompileTime {
992                idx: 0,
993                module: &self.module,
994                info: &mut info,
995            });
996        if !ok {
997            return;
998        }
999
1000        // Validate that the memory information collected is indeed valid for
1001        // static memory initialization.
1002        for (i, info) in info.iter().filter(|(_, info)| info.data_size > 0) {
1003            let image_size = info.max_addr - info.min_addr;
1004
1005            // Simplify things for now by bailing out entirely if any memory has
1006            // a page size smaller than the host's page size. This fixes a case
1007            // where currently initializers are created in host-page-size units
1008            // of length which means that a larger-than-the-entire-memory
1009            // initializer can be created. This can be handled technically but
1010            // would require some more changes to help fix the assert elsewhere
1011            // that this protects against.
1012            if self.module.memory_plans[i].memory.page_size() < page_size {
1013                return;
1014            }
1015
1016            // If the range of memory being initialized is less than twice the
1017            // total size of the data itself then it's assumed that static
1018            // initialization is ok. This means we'll at most double memory
1019            // consumption during the memory image creation process, which is
1020            // currently assumed to "probably be ok" but this will likely need
1021            // tweaks over time.
1022            if image_size < info.data_size.saturating_mul(2) {
1023                continue;
1024            }
1025
1026            // If the memory initialization image is larger than the size of all
1027            // data, then we still allow memory initialization if the image will
1028            // be of a relatively modest size, such as 1MB here.
1029            if image_size < max_image_size_always_allowed {
1030                continue;
1031            }
1032
1033            // At this point memory initialization is concluded to be too
1034            // expensive to do at compile time so it's entirely deferred to
1035            // happen at runtime.
1036            return;
1037        }
1038
1039        // Here's where we've now committed to changing to static memory. The
1040        // memory initialization image is built here from the page data and then
1041        // it's converted to a single initializer.
1042        let data = mem::replace(&mut self.data, Vec::new());
1043        let mut map = PrimaryMap::with_capacity(info.len());
1044        let mut module_data_size = 0u32;
1045        for (memory, info) in info.iter() {
1046            // Create the in-memory `image` which is the initialized contents of
1047            // this linear memory.
1048            let extent = if info.segments.len() > 0 {
1049                (info.max_addr - info.min_addr) as usize
1050            } else {
1051                0
1052            };
1053            let mut image = Vec::with_capacity(extent);
1054            for (idx, init) in info.segments.iter() {
1055                let data = &data[*idx];
1056                assert_eq!(data.len(), init.data.len());
1057                let offset = usize::try_from(init.offset - info.min_addr).unwrap();
1058                if image.len() < offset {
1059                    image.resize(offset, 0u8);
1060                    image.extend_from_slice(data);
1061                } else {
1062                    image.splice(
1063                        offset..(offset + data.len()).min(image.len()),
1064                        data.iter().copied(),
1065                    );
1066                }
1067            }
1068            assert_eq!(image.len(), extent);
1069            assert_eq!(image.capacity(), extent);
1070            let mut offset = if info.segments.len() > 0 {
1071                info.min_addr
1072            } else {
1073                0
1074            };
1075
1076            // Chop off trailing zeros from the image as memory is already
1077            // zero-initialized. Note that `i` is the position of a nonzero
1078            // entry here, so to not lose it we truncate to `i + 1`.
1079            if let Some(i) = image.iter().rposition(|i| *i != 0) {
1080                image.truncate(i + 1);
1081            }
1082
1083            // Also chop off leading zeros, if any.
1084            if let Some(i) = image.iter().position(|i| *i != 0) {
1085                offset += i as u64;
1086                image.drain(..i);
1087            }
1088            let mut len = u64::try_from(image.len()).unwrap();
1089
1090            // The goal is to enable mapping this image directly into memory, so
1091            // the offset into linear memory must be a multiple of the page
1092            // size. If that's not already the case then the image is padded at
1093            // the front and back with extra zeros as necessary
1094            if offset % page_size != 0 {
1095                let zero_padding = offset % page_size;
1096                self.data.push(vec![0; zero_padding as usize].into());
1097                offset -= zero_padding;
1098                len += zero_padding;
1099            }
1100            self.data.push(image.into());
1101            if len % page_size != 0 {
1102                let zero_padding = page_size - (len % page_size);
1103                self.data.push(vec![0; zero_padding as usize].into());
1104                len += zero_padding;
1105            }
1106
1107            // Offset/length should now always be page-aligned.
1108            assert!(offset % page_size == 0);
1109            assert!(len % page_size == 0);
1110
1111            // Create the `StaticMemoryInitializer` which describes this image,
1112            // only needed if the image is actually present and has a nonzero
1113            // length. The `offset` has been calculates above, originally
1114            // sourced from `info.min_addr`. The `data` field is the extent
1115            // within the final data segment we'll emit to an ELF image, which
1116            // is the concatenation of `self.data`, so here it's the size of
1117            // the section-so-far plus the current segment we're appending.
1118            let len = u32::try_from(len).unwrap();
1119            let init = if len > 0 {
1120                Some(StaticMemoryInitializer {
1121                    offset,
1122                    data: module_data_size..module_data_size + len,
1123                })
1124            } else {
1125                None
1126            };
1127            let idx = map.push(init);
1128            assert_eq!(idx, memory);
1129            module_data_size += len;
1130        }
1131        self.data_align = Some(page_size);
1132        self.module.memory_initialization = MemoryInitialization::Static { map };
1133    }
1134
1135    /// Attempts to convert the module's table initializers to
1136    /// FuncTable form where possible. This enables lazy table
1137    /// initialization later by providing a one-to-one map of initial
1138    /// table values, without having to parse all segments.
1139    pub fn try_func_table_init(&mut self) {
1140        // This should be large enough to support very large Wasm
1141        // modules with huge funcref tables, but small enough to avoid
1142        // OOMs or DoS on truly sparse tables.
1143        const MAX_FUNC_TABLE_SIZE: u32 = 1024 * 1024;
1144
1145        // First convert any element-initialized tables to images of just that
1146        // single function if the minimum size of the table allows doing so.
1147        for ((_, init), (_, plan)) in self
1148            .module
1149            .table_initialization
1150            .initial_values
1151            .iter_mut()
1152            .zip(
1153                self.module
1154                    .table_plans
1155                    .iter()
1156                    .skip(self.module.num_imported_tables),
1157            )
1158        {
1159            let table_size = plan.table.minimum;
1160            if table_size > MAX_FUNC_TABLE_SIZE {
1161                continue;
1162            }
1163            if let TableInitialValue::Expr(expr) = init {
1164                if let [ConstOp::RefFunc(f)] = expr.ops() {
1165                    *init = TableInitialValue::Null {
1166                        precomputed: vec![*f; table_size as usize],
1167                    };
1168                }
1169            }
1170        }
1171
1172        let mut segments = mem::take(&mut self.module.table_initialization.segments)
1173            .into_iter()
1174            .peekable();
1175
1176        // The goal of this loop is to interpret a table segment and apply it
1177        // "statically" to a local table. This will iterate over segments and
1178        // apply them one-by-one to each table.
1179        //
1180        // If any segment can't be applied, however, then this loop exits and
1181        // all remaining segments are placed back into the segment list. This is
1182        // because segments are supposed to be initialized one-at-a-time which
1183        // means that intermediate state is visible with respect to traps. If
1184        // anything isn't statically known to not trap it's pessimistically
1185        // assumed to trap meaning all further segment initializers must be
1186        // applied manually at instantiation time.
1187        while let Some(segment) = segments.peek() {
1188            let defined_index = match self.module.defined_table_index(segment.table_index) {
1189                Some(index) => index,
1190                // Skip imported tables: we can't provide a preconstructed
1191                // table for them, because their values depend on the
1192                // imported table overlaid with whatever segments we have.
1193                None => break,
1194            };
1195
1196            // If the base of this segment is dynamic, then we can't
1197            // include it in the statically-built array of initial
1198            // contents.
1199            let offset = match segment.offset.ops() {
1200                &[ConstOp::I32Const(offset)] => offset.unsigned(),
1201                _ => break,
1202            };
1203
1204            // Get the end of this segment. If out-of-bounds, or too
1205            // large for our dense table representation, then skip the
1206            // segment.
1207            let top = match offset.checked_add(segment.elements.len()) {
1208                Some(top) => top,
1209                None => break,
1210            };
1211            let table_size = self.module.table_plans[segment.table_index].table.minimum;
1212            if top > table_size || top > MAX_FUNC_TABLE_SIZE {
1213                break;
1214            }
1215
1216            match self.module.table_plans[segment.table_index]
1217                .table
1218                .wasm_ty
1219                .heap_type
1220                .top()
1221            {
1222                WasmHeapTopType::Func => {}
1223                // If this is not a funcref table, then we can't support a
1224                // pre-computed table of function indices. Technically this
1225                // initializer won't trap so we could continue processing
1226                // segments, but that's left as a future optimization if
1227                // necessary.
1228                WasmHeapTopType::Any | WasmHeapTopType::Extern => break,
1229            }
1230
1231            // Function indices can be optimized here, but fully general
1232            // expressions are deferred to get evaluated at runtime.
1233            let function_elements = match &segment.elements {
1234                TableSegmentElements::Functions(indices) => indices,
1235                TableSegmentElements::Expressions(_) => break,
1236            };
1237
1238            let precomputed =
1239                match &mut self.module.table_initialization.initial_values[defined_index] {
1240                    TableInitialValue::Null { precomputed } => precomputed,
1241
1242                    // If this table is still listed as an initial value here
1243                    // then that means the initial size of the table doesn't
1244                    // support a precomputed function list, so skip this.
1245                    // Technically this won't trap so it's possible to process
1246                    // further initializers, but that's left as a future
1247                    // optimization.
1248                    TableInitialValue::Expr(_) => break,
1249                };
1250
1251            // At this point we're committing to pre-initializing the table
1252            // with the `segment` that's being iterated over. This segment is
1253            // applied to the `precomputed` list for the table by ensuring
1254            // it's large enough to hold the segment and then copying the
1255            // segment into the precomputed list.
1256            if precomputed.len() < top as usize {
1257                precomputed.resize(top as usize, FuncIndex::reserved_value());
1258            }
1259            let dst = &mut precomputed[offset as usize..top as usize];
1260            dst.copy_from_slice(&function_elements);
1261
1262            // advance the iterator to see the next segment
1263            let _ = segments.next();
1264        }
1265        self.module.table_initialization.segments = segments.collect();
1266    }
1267}