wasmparser/
parser.rs

1use crate::binary_reader::WASM_MAGIC_NUMBER;
2use crate::prelude::*;
3use crate::CoreTypeSectionReader;
4#[cfg(feature = "features")]
5use crate::WasmFeatures;
6use crate::{
7    limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
8    ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
9    ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
10    ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
11    GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
12    SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
13};
14use core::fmt;
15use core::iter;
16use core::ops::Range;
17
18pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
19
20// Note that this started at `0xa` and we're incrementing up from there. When
21// the component model is stabilized this will become 0x1. The changes here are:
22//
23// * [????-??-??] 0xa - original version
24// * [2023-01-05] 0xb - `export` introduces an alias
25// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
26// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
27//                      allows for `(import (interface "...") ...)` syntax.
28pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
29
30const KIND_MODULE: u16 = 0x00;
31const KIND_COMPONENT: u16 = 0x01;
32
33/// The supported encoding formats for the parser.
34#[derive(Debug, Clone, Copy, Eq, PartialEq)]
35pub enum Encoding {
36    /// The encoding format is a WebAssembly module.
37    Module,
38    /// The encoding format is a WebAssembly component.
39    Component,
40}
41
42/// An incremental parser of a binary WebAssembly module or component.
43///
44/// This type is intended to be used to incrementally parse a WebAssembly module
45/// or component as bytes become available for the module. This can also be used
46/// to parse modules or components that are already entirely resident within memory.
47///
48/// This primary function for a parser is the [`Parser::parse`] function which
49/// will incrementally consume input. You can also use the [`Parser::parse_all`]
50/// function to parse a module or component that is entirely resident in memory.
51#[derive(Debug, Clone)]
52pub struct Parser {
53    state: State,
54    offset: u64,
55    max_size: u64,
56    encoding: Encoding,
57    #[cfg(feature = "features")]
58    features: WasmFeatures,
59}
60
61#[derive(Debug, Clone)]
62enum State {
63    Header,
64    SectionStart,
65    FunctionBody { remaining: u32, len: u32 },
66}
67
68/// A successful return payload from [`Parser::parse`].
69///
70/// On success one of two possible values can be returned, either that more data
71/// is needed to continue parsing or a chunk of the input was parsed, indicating
72/// how much of it was parsed.
73#[derive(Debug)]
74pub enum Chunk<'a> {
75    /// This can be returned at any time and indicates that more data is needed
76    /// to proceed with parsing. Zero bytes were consumed from the input to
77    /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
78    /// bytes are needed to continue parsing.
79    NeedMoreData(u64),
80
81    /// A chunk was successfully parsed.
82    Parsed {
83        /// This many bytes of the `data` input to [`Parser::parse`] were
84        /// consumed to produce `payload`.
85        consumed: usize,
86        /// The value that we actually parsed.
87        payload: Payload<'a>,
88    },
89}
90
91/// Values that can be parsed from a WebAssembly module or component.
92///
93/// This enumeration is all possible chunks of pieces that can be parsed by a
94/// [`Parser`] from a binary WebAssembly module or component. Note that for many
95/// sections the entire section is parsed all at once, whereas other functions,
96/// like the code section, are parsed incrementally. This is a distinction where some
97/// sections, like the type section, are required to be fully resident in memory
98/// (fully downloaded) before proceeding. Other sections, like the code section,
99/// can be processed in a streaming fashion where each function is extracted
100/// individually so it can possibly be shipped to another thread while you wait
101/// for more functions to get downloaded.
102///
103/// Note that payloads, when returned, do not indicate that the module or component
104/// is valid. For example when you receive a `Payload::TypeSection` the type
105/// section itself has not yet actually been parsed. The reader returned will be
106/// able to parse it, but you'll have to actually iterate the reader to do the
107/// full parse. Each payload returned is intended to be a *window* into the
108/// original `data` passed to [`Parser::parse`] which can be further processed
109/// if necessary.
110pub enum Payload<'a> {
111    /// Indicates the header of a WebAssembly module or component.
112    Version {
113        /// The version number found in the header.
114        num: u16,
115        /// The encoding format being parsed.
116        encoding: Encoding,
117        /// The range of bytes that were parsed to consume the header of the
118        /// module or component. Note that this range is relative to the start
119        /// of the byte stream.
120        range: Range<usize>,
121    },
122
123    /// A module type section was received and the provided reader can be
124    /// used to parse the contents of the type section.
125    TypeSection(TypeSectionReader<'a>),
126    /// A module import section was received and the provided reader can be
127    /// used to parse the contents of the import section.
128    ImportSection(ImportSectionReader<'a>),
129    /// A module function section was received and the provided reader can be
130    /// used to parse the contents of the function section.
131    FunctionSection(FunctionSectionReader<'a>),
132    /// A module table section was received and the provided reader can be
133    /// used to parse the contents of the table section.
134    TableSection(TableSectionReader<'a>),
135    /// A module memory section was received and the provided reader can be
136    /// used to parse the contents of the memory section.
137    MemorySection(MemorySectionReader<'a>),
138    /// A module tag section was received, and the provided reader can be
139    /// used to parse the contents of the tag section.
140    TagSection(TagSectionReader<'a>),
141    /// A module global section was received and the provided reader can be
142    /// used to parse the contents of the global section.
143    GlobalSection(GlobalSectionReader<'a>),
144    /// A module export section was received, and the provided reader can be
145    /// used to parse the contents of the export section.
146    ExportSection(ExportSectionReader<'a>),
147    /// A module start section was received.
148    StartSection {
149        /// The start function index
150        func: u32,
151        /// The range of bytes that specify the `func` field, specified in
152        /// offsets relative to the start of the byte stream.
153        range: Range<usize>,
154    },
155    /// A module element section was received and the provided reader can be
156    /// used to parse the contents of the element section.
157    ElementSection(ElementSectionReader<'a>),
158    /// A module data count section was received.
159    DataCountSection {
160        /// The number of data segments.
161        count: u32,
162        /// The range of bytes that specify the `count` field, specified in
163        /// offsets relative to the start of the byte stream.
164        range: Range<usize>,
165    },
166    /// A module data section was received and the provided reader can be
167    /// used to parse the contents of the data section.
168    DataSection(DataSectionReader<'a>),
169    /// Indicator of the start of the code section of a WebAssembly module.
170    ///
171    /// This entry is returned whenever the code section starts. The `count`
172    /// field indicates how many entries are in this code section. After
173    /// receiving this start marker you're guaranteed that the next `count`
174    /// items will be either `CodeSectionEntry` or an error will be returned.
175    ///
176    /// This, unlike other sections, is intended to be used for streaming the
177    /// contents of the code section. The code section is not required to be
178    /// fully resident in memory when we parse it. Instead a [`Parser`] is
179    /// capable of parsing piece-by-piece of a code section.
180    CodeSectionStart {
181        /// The number of functions in this section.
182        count: u32,
183        /// The range of bytes that represent this section, specified in
184        /// offsets relative to the start of the byte stream.
185        range: Range<usize>,
186        /// The size, in bytes, of the remaining contents of this section.
187        ///
188        /// This can be used in combination with [`Parser::skip_section`]
189        /// where the caller will know how many bytes to skip before feeding
190        /// bytes into `Parser` again.
191        size: u32,
192    },
193    /// An entry of the code section, a function, was parsed from a WebAssembly
194    /// module.
195    ///
196    /// This entry indicates that a function was successfully received from the
197    /// code section, and the payload here is the window into the original input
198    /// where the function resides. Note that the function itself has not been
199    /// parsed, it's only been outlined. You'll need to process the
200    /// `FunctionBody` provided to test whether it parses and/or is valid.
201    CodeSectionEntry(FunctionBody<'a>),
202
203    /// A core module section was received and the provided parser can be
204    /// used to parse the nested module.
205    ///
206    /// This variant is special in that it returns a sub-`Parser`. Upon
207    /// receiving a `ModuleSection` it is expected that the returned
208    /// `Parser` will be used instead of the parent `Parser` until the parse has
209    /// finished. You'll need to feed data into the `Parser` returned until it
210    /// returns `Payload::End`. After that you'll switch back to the parent
211    /// parser to resume parsing the rest of the current component.
212    ///
213    /// Note that binaries will not be parsed correctly if you feed the data for
214    /// a nested module into the parent [`Parser`].
215    ModuleSection {
216        /// The parser for the nested module.
217        parser: Parser,
218        /// The range of bytes that represent the nested module in the
219        /// original byte stream.
220        ///
221        /// Note that, to better support streaming parsing and validation, the
222        /// validator does *not* check that this range is in bounds.
223        unchecked_range: Range<usize>,
224    },
225    /// A core instance section was received and the provided parser can be
226    /// used to parse the contents of the core instance section.
227    ///
228    /// Currently this section is only parsed in a component.
229    InstanceSection(InstanceSectionReader<'a>),
230    /// A core type section was received and the provided parser can be
231    /// used to parse the contents of the core type section.
232    ///
233    /// Currently this section is only parsed in a component.
234    CoreTypeSection(CoreTypeSectionReader<'a>),
235    /// A component section from a WebAssembly component was received and the
236    /// provided parser can be used to parse the nested component.
237    ///
238    /// This variant is special in that it returns a sub-`Parser`. Upon
239    /// receiving a `ComponentSection` it is expected that the returned
240    /// `Parser` will be used instead of the parent `Parser` until the parse has
241    /// finished. You'll need to feed data into the `Parser` returned until it
242    /// returns `Payload::End`. After that you'll switch back to the parent
243    /// parser to resume parsing the rest of the current component.
244    ///
245    /// Note that binaries will not be parsed correctly if you feed the data for
246    /// a nested component into the parent [`Parser`].
247    ComponentSection {
248        /// The parser for the nested component.
249        parser: Parser,
250        /// The range of bytes that represent the nested component in the
251        /// original byte stream.
252        ///
253        /// Note that, to better support streaming parsing and validation, the
254        /// validator does *not* check that this range is in bounds.
255        unchecked_range: Range<usize>,
256    },
257    /// A component instance section was received and the provided reader can be
258    /// used to parse the contents of the component instance section.
259    ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
260    /// A component alias section was received and the provided reader can be
261    /// used to parse the contents of the component alias section.
262    ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
263    /// A component type section was received and the provided reader can be
264    /// used to parse the contents of the component type section.
265    ComponentTypeSection(ComponentTypeSectionReader<'a>),
266    /// A component canonical section was received and the provided reader can be
267    /// used to parse the contents of the component canonical section.
268    ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
269    /// A component start section was received.
270    ComponentStartSection {
271        /// The start function description.
272        start: ComponentStartFunction,
273        /// The range of bytes that specify the `start` field.
274        range: Range<usize>,
275    },
276    /// A component import section was received and the provided reader can be
277    /// used to parse the contents of the component import section.
278    ComponentImportSection(ComponentImportSectionReader<'a>),
279    /// A component export section was received, and the provided reader can be
280    /// used to parse the contents of the component export section.
281    ComponentExportSection(ComponentExportSectionReader<'a>),
282
283    /// A module or component custom section was received.
284    CustomSection(CustomSectionReader<'a>),
285
286    /// An unknown section was found.
287    ///
288    /// This variant is returned for all unknown sections encountered. This
289    /// likely wants to be interpreted as an error by consumers of the parser,
290    /// but this can also be used to parse sections currently unsupported by
291    /// the parser.
292    UnknownSection {
293        /// The 8-bit identifier for this section.
294        id: u8,
295        /// The contents of this section.
296        contents: &'a [u8],
297        /// The range of bytes, relative to the start of the original data
298        /// stream, that the contents of this section reside in.
299        range: Range<usize>,
300    },
301
302    /// The end of the WebAssembly module or component was reached.
303    ///
304    /// The value is the offset in the input byte stream where the end
305    /// was reached.
306    End(usize),
307}
308
309const CUSTOM_SECTION: u8 = 0;
310const TYPE_SECTION: u8 = 1;
311const IMPORT_SECTION: u8 = 2;
312const FUNCTION_SECTION: u8 = 3;
313const TABLE_SECTION: u8 = 4;
314const MEMORY_SECTION: u8 = 5;
315const GLOBAL_SECTION: u8 = 6;
316const EXPORT_SECTION: u8 = 7;
317const START_SECTION: u8 = 8;
318const ELEMENT_SECTION: u8 = 9;
319const CODE_SECTION: u8 = 10;
320const DATA_SECTION: u8 = 11;
321const DATA_COUNT_SECTION: u8 = 12;
322const TAG_SECTION: u8 = 13;
323
324const COMPONENT_MODULE_SECTION: u8 = 1;
325const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
326const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
327const COMPONENT_SECTION: u8 = 4;
328const COMPONENT_INSTANCE_SECTION: u8 = 5;
329const COMPONENT_ALIAS_SECTION: u8 = 6;
330const COMPONENT_TYPE_SECTION: u8 = 7;
331const COMPONENT_CANONICAL_SECTION: u8 = 8;
332const COMPONENT_START_SECTION: u8 = 9;
333const COMPONENT_IMPORT_SECTION: u8 = 10;
334const COMPONENT_EXPORT_SECTION: u8 = 11;
335
336impl Parser {
337    /// Creates a new parser.
338    ///
339    /// Reports errors and ranges relative to `offset` provided, where `offset`
340    /// is some logical offset within the input stream that we're parsing.
341    pub fn new(offset: u64) -> Parser {
342        Parser {
343            state: State::Header,
344            offset,
345            max_size: u64::MAX,
346            // Assume the encoding is a module until we know otherwise
347            encoding: Encoding::Module,
348            #[cfg(feature = "features")]
349            features: WasmFeatures::all(),
350        }
351    }
352
353    /// Tests whether `bytes` looks like a core WebAssembly module.
354    ///
355    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
356    /// starts with the standard core WebAssembly header.
357    pub fn is_core_wasm(bytes: &[u8]) -> bool {
358        const HEADER: [u8; 8] = [
359            WASM_MAGIC_NUMBER[0],
360            WASM_MAGIC_NUMBER[1],
361            WASM_MAGIC_NUMBER[2],
362            WASM_MAGIC_NUMBER[3],
363            WASM_MODULE_VERSION.to_le_bytes()[0],
364            WASM_MODULE_VERSION.to_le_bytes()[1],
365            KIND_MODULE.to_le_bytes()[0],
366            KIND_MODULE.to_le_bytes()[1],
367        ];
368        bytes.starts_with(&HEADER)
369    }
370
371    /// Tests whether `bytes` looks like a WebAssembly component.
372    ///
373    /// This will inspect the first 8 bytes of `bytes` and return `true` if it
374    /// starts with the standard WebAssembly component header.
375    pub fn is_component(bytes: &[u8]) -> bool {
376        const HEADER: [u8; 8] = [
377            WASM_MAGIC_NUMBER[0],
378            WASM_MAGIC_NUMBER[1],
379            WASM_MAGIC_NUMBER[2],
380            WASM_MAGIC_NUMBER[3],
381            WASM_COMPONENT_VERSION.to_le_bytes()[0],
382            WASM_COMPONENT_VERSION.to_le_bytes()[1],
383            KIND_COMPONENT.to_le_bytes()[0],
384            KIND_COMPONENT.to_le_bytes()[1],
385        ];
386        bytes.starts_with(&HEADER)
387    }
388
389    /// Returns the currently active set of wasm features that this parser is
390    /// using while parsing.
391    ///
392    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
393    ///
394    /// For more information see [`BinaryReader::new`].
395    #[cfg(feature = "features")]
396    pub fn features(&self) -> WasmFeatures {
397        self.features
398    }
399
400    /// Sets the wasm features active while parsing to the `features` specified.
401    ///
402    /// The default set of features is [`WasmFeatures::all()`] for new parsers.
403    ///
404    /// For more information see [`BinaryReader::new`].
405    #[cfg(feature = "features")]
406    pub fn set_features(&mut self, features: WasmFeatures) {
407        self.features = features;
408    }
409
410    /// Attempts to parse a chunk of data.
411    ///
412    /// This method will attempt to parse the next incremental portion of a
413    /// WebAssembly binary. Data available for the module or component is
414    /// provided as `data`, and the data can be incomplete if more data has yet
415    /// to arrive. The `eof` flag indicates whether more data will ever be received.
416    ///
417    /// There are two ways parsing can succeed with this method:
418    ///
419    /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
420    ///   in `data` to parse a payload. The caller needs to wait for more data to
421    ///   be available in this situation before calling this method again. It is
422    ///   guaranteed that this is only returned if `eof` is `false`.
423    ///
424    /// * `Chunk::Parsed` - this indicates that a chunk of the input was
425    ///   successfully parsed. The payload is available in this variant of what
426    ///   was parsed, and this also indicates how many bytes of `data` was
427    ///   consumed. It's expected that the caller will not provide these bytes
428    ///   back to the [`Parser`] again.
429    ///
430    /// Note that all `Chunk` return values are connected, with a lifetime, to
431    /// the input buffer. Each parsed chunk borrows the input buffer and is a
432    /// view into it for successfully parsed chunks.
433    ///
434    /// It is expected that you'll call this method until `Payload::End` is
435    /// reached, at which point you're guaranteed that the parse has completed.
436    /// Note that complete parsing, for the top-level module or component,
437    /// implies that `data` is empty and `eof` is `true`.
438    ///
439    /// # Errors
440    ///
441    /// Parse errors are returned as an `Err`. Errors can happen when the
442    /// structure of the data is unexpected or if sections are too large for
443    /// example. Note that errors are not returned for malformed *contents* of
444    /// sections here. Sections are generally not individually parsed and each
445    /// returned [`Payload`] needs to be iterated over further to detect all
446    /// errors.
447    ///
448    /// # Examples
449    ///
450    /// An example of reading a wasm file from a stream (`std::io::Read`) and
451    /// incrementally parsing it.
452    ///
453    /// ```
454    /// use std::io::Read;
455    /// use anyhow::Result;
456    /// use wasmparser::{Parser, Chunk, Payload::*};
457    ///
458    /// fn parse(mut reader: impl Read) -> Result<()> {
459    ///     let mut buf = Vec::new();
460    ///     let mut cur = Parser::new(0);
461    ///     let mut eof = false;
462    ///     let mut stack = Vec::new();
463    ///
464    ///     loop {
465    ///         let (payload, consumed) = match cur.parse(&buf, eof)? {
466    ///             Chunk::NeedMoreData(hint) => {
467    ///                 assert!(!eof); // otherwise an error would be returned
468    ///
469    ///                 // Use the hint to preallocate more space, then read
470    ///                 // some more data into our buffer.
471    ///                 //
472    ///                 // Note that the buffer management here is not ideal,
473    ///                 // but it's compact enough to fit in an example!
474    ///                 let len = buf.len();
475    ///                 buf.extend((0..hint).map(|_| 0u8));
476    ///                 let n = reader.read(&mut buf[len..])?;
477    ///                 buf.truncate(len + n);
478    ///                 eof = n == 0;
479    ///                 continue;
480    ///             }
481    ///
482    ///             Chunk::Parsed { consumed, payload } => (payload, consumed),
483    ///         };
484    ///
485    ///         match payload {
486    ///             // Sections for WebAssembly modules
487    ///             Version { .. } => { /* ... */ }
488    ///             TypeSection(_) => { /* ... */ }
489    ///             ImportSection(_) => { /* ... */ }
490    ///             FunctionSection(_) => { /* ... */ }
491    ///             TableSection(_) => { /* ... */ }
492    ///             MemorySection(_) => { /* ... */ }
493    ///             TagSection(_) => { /* ... */ }
494    ///             GlobalSection(_) => { /* ... */ }
495    ///             ExportSection(_) => { /* ... */ }
496    ///             StartSection { .. } => { /* ... */ }
497    ///             ElementSection(_) => { /* ... */ }
498    ///             DataCountSection { .. } => { /* ... */ }
499    ///             DataSection(_) => { /* ... */ }
500    ///
501    ///             // Here we know how many functions we'll be receiving as
502    ///             // `CodeSectionEntry`, so we can prepare for that, and
503    ///             // afterwards we can parse and handle each function
504    ///             // individually.
505    ///             CodeSectionStart { .. } => { /* ... */ }
506    ///             CodeSectionEntry(body) => {
507    ///                 // here we can iterate over `body` to parse the function
508    ///                 // and its locals
509    ///             }
510    ///
511    ///             // Sections for WebAssembly components
512    ///             InstanceSection(_) => { /* ... */ }
513    ///             CoreTypeSection(_) => { /* ... */ }
514    ///             ComponentInstanceSection(_) => { /* ... */ }
515    ///             ComponentAliasSection(_) => { /* ... */ }
516    ///             ComponentTypeSection(_) => { /* ... */ }
517    ///             ComponentCanonicalSection(_) => { /* ... */ }
518    ///             ComponentStartSection { .. } => { /* ... */ }
519    ///             ComponentImportSection(_) => { /* ... */ }
520    ///             ComponentExportSection(_) => { /* ... */ }
521    ///
522    ///             ModuleSection { parser, .. }
523    ///             | ComponentSection { parser, .. } => {
524    ///                 stack.push(cur.clone());
525    ///                 cur = parser.clone();
526    ///             }
527    ///
528    ///             CustomSection(_) => { /* ... */ }
529    ///
530    ///             // most likely you'd return an error here
531    ///             UnknownSection { id, .. } => { /* ... */ }
532    ///
533    ///             // Once we've reached the end of a parser we either resume
534    ///             // at the parent parser or we break out of the loop because
535    ///             // we're done.
536    ///             End(_) => {
537    ///                 if let Some(parent_parser) = stack.pop() {
538    ///                     cur = parent_parser;
539    ///                 } else {
540    ///                     break;
541    ///                 }
542    ///             }
543    ///         }
544    ///
545    ///         // once we're done processing the payload we can forget the
546    ///         // original.
547    ///         buf.drain(..consumed);
548    ///     }
549    ///
550    ///     Ok(())
551    /// }
552    ///
553    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
554    /// ```
555    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
556        let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
557            (&data[..(self.max_size as usize)], true)
558        } else {
559            (data, eof)
560        };
561        // TODO: thread through `offset: u64` to `BinaryReader`, remove
562        // the cast here.
563        let starting_offset = self.offset as usize;
564        let mut reader = BinaryReader::new(data, starting_offset);
565        #[cfg(feature = "features")]
566        {
567            reader.set_features(self.features);
568        }
569        match self.parse_reader(&mut reader, eof) {
570            Ok(payload) => {
571                // Be sure to update our offset with how far we got in the
572                // reader
573                let consumed = reader.original_position() - starting_offset;
574                self.offset += usize_to_u64(consumed);
575                self.max_size -= usize_to_u64(consumed);
576                Ok(Chunk::Parsed {
577                    consumed: consumed,
578                    payload,
579                })
580            }
581            Err(e) => {
582                // If we're at EOF then there's no way we can recover from any
583                // error, so continue to propagate it.
584                if eof {
585                    return Err(e);
586                }
587
588                // If our error doesn't look like it can be resolved with more
589                // data being pulled down, then propagate it, otherwise switch
590                // the error to "feed me please"
591                match e.inner.needed_hint {
592                    Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
593                    None => Err(e),
594                }
595            }
596        }
597    }
598
599    fn parse_reader<'a>(
600        &mut self,
601        reader: &mut BinaryReader<'a>,
602        eof: bool,
603    ) -> Result<Payload<'a>> {
604        use Payload::*;
605
606        match self.state {
607            State::Header => {
608                let start = reader.original_position();
609                let header_version = reader.read_header_version()?;
610                self.encoding = match (header_version >> 16) as u16 {
611                    KIND_MODULE => Encoding::Module,
612                    KIND_COMPONENT => Encoding::Component,
613                    _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
614                };
615                let num = header_version as u16;
616                self.state = State::SectionStart;
617                Ok(Version {
618                    num,
619                    encoding: self.encoding,
620                    range: start..reader.original_position(),
621                })
622            }
623            State::SectionStart => {
624                // If we're at eof and there are no bytes in our buffer, then
625                // that means we reached the end of the data since it's
626                // just a bunch of sections concatenated after the header.
627                if eof && reader.bytes_remaining() == 0 {
628                    return Ok(Payload::End(reader.original_position()));
629                }
630
631                let id_pos = reader.original_position();
632                let id = reader.read_u8()?;
633                if id & 0x80 != 0 {
634                    return Err(BinaryReaderError::new("malformed section id", id_pos));
635                }
636                let len_pos = reader.original_position();
637                let mut len = reader.read_var_u32()?;
638
639                // Test to make sure that this section actually fits within
640                // `Parser::max_size`. This doesn't matter for top-level modules
641                // but it is required for nested modules/components to correctly ensure
642                // that all sections live entirely within their section of the
643                // file.
644                let consumed = reader.original_position() - id_pos;
645                let section_overflow = self
646                    .max_size
647                    .checked_sub(usize_to_u64(consumed))
648                    .and_then(|s| s.checked_sub(len.into()))
649                    .is_none();
650                if section_overflow {
651                    return Err(BinaryReaderError::new("section too large", len_pos));
652                }
653
654                match (self.encoding, id) {
655                    // Sections for both modules and components.
656                    (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
657
658                    // Module sections
659                    (Encoding::Module, TYPE_SECTION) => {
660                        section(reader, len, TypeSectionReader::new, TypeSection)
661                    }
662                    (Encoding::Module, IMPORT_SECTION) => {
663                        section(reader, len, ImportSectionReader::new, ImportSection)
664                    }
665                    (Encoding::Module, FUNCTION_SECTION) => {
666                        section(reader, len, FunctionSectionReader::new, FunctionSection)
667                    }
668                    (Encoding::Module, TABLE_SECTION) => {
669                        section(reader, len, TableSectionReader::new, TableSection)
670                    }
671                    (Encoding::Module, MEMORY_SECTION) => {
672                        section(reader, len, MemorySectionReader::new, MemorySection)
673                    }
674                    (Encoding::Module, GLOBAL_SECTION) => {
675                        section(reader, len, GlobalSectionReader::new, GlobalSection)
676                    }
677                    (Encoding::Module, EXPORT_SECTION) => {
678                        section(reader, len, ExportSectionReader::new, ExportSection)
679                    }
680                    (Encoding::Module, START_SECTION) => {
681                        let (func, range) = single_item(reader, len, "start")?;
682                        Ok(StartSection { func, range })
683                    }
684                    (Encoding::Module, ELEMENT_SECTION) => {
685                        section(reader, len, ElementSectionReader::new, ElementSection)
686                    }
687                    (Encoding::Module, CODE_SECTION) => {
688                        let start = reader.original_position();
689                        let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
690                        let range = start..reader.original_position() + len as usize;
691                        self.state = State::FunctionBody {
692                            remaining: count,
693                            len,
694                        };
695                        Ok(CodeSectionStart {
696                            count,
697                            range,
698                            size: len,
699                        })
700                    }
701                    (Encoding::Module, DATA_SECTION) => {
702                        section(reader, len, DataSectionReader::new, DataSection)
703                    }
704                    (Encoding::Module, DATA_COUNT_SECTION) => {
705                        let (count, range) = single_item(reader, len, "data count")?;
706                        Ok(DataCountSection { count, range })
707                    }
708                    (Encoding::Module, TAG_SECTION) => {
709                        section(reader, len, TagSectionReader::new, TagSection)
710                    }
711
712                    // Component sections
713                    (Encoding::Component, COMPONENT_MODULE_SECTION)
714                    | (Encoding::Component, COMPONENT_SECTION) => {
715                        if len as usize > MAX_WASM_MODULE_SIZE {
716                            bail!(
717                                len_pos,
718                                "{} section is too large",
719                                if id == 1 { "module" } else { "component " }
720                            );
721                        }
722
723                        let range = reader.original_position()
724                            ..reader.original_position() + usize::try_from(len).unwrap();
725                        self.max_size -= u64::from(len);
726                        self.offset += u64::from(len);
727                        let mut parser = Parser::new(usize_to_u64(reader.original_position()));
728                        #[cfg(feature = "features")]
729                        {
730                            parser.features = self.features;
731                        }
732                        parser.max_size = u64::from(len);
733
734                        Ok(match id {
735                            1 => ModuleSection {
736                                parser,
737                                unchecked_range: range,
738                            },
739                            4 => ComponentSection {
740                                parser,
741                                unchecked_range: range,
742                            },
743                            _ => unreachable!(),
744                        })
745                    }
746                    (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
747                        section(reader, len, InstanceSectionReader::new, InstanceSection)
748                    }
749                    (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
750                        section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
751                    }
752                    (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
753                        reader,
754                        len,
755                        ComponentInstanceSectionReader::new,
756                        ComponentInstanceSection,
757                    ),
758                    (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
759                        section(reader, len, SectionLimited::new, ComponentAliasSection)
760                    }
761                    (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
762                        reader,
763                        len,
764                        ComponentTypeSectionReader::new,
765                        ComponentTypeSection,
766                    ),
767                    (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
768                        reader,
769                        len,
770                        ComponentCanonicalSectionReader::new,
771                        ComponentCanonicalSection,
772                    ),
773                    (Encoding::Component, COMPONENT_START_SECTION) => {
774                        let (start, range) = single_item(reader, len, "component start")?;
775                        Ok(ComponentStartSection { start, range })
776                    }
777                    (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
778                        reader,
779                        len,
780                        ComponentImportSectionReader::new,
781                        ComponentImportSection,
782                    ),
783                    (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
784                        reader,
785                        len,
786                        ComponentExportSectionReader::new,
787                        ComponentExportSection,
788                    ),
789                    (_, id) => {
790                        let offset = reader.original_position();
791                        let contents = reader.read_bytes(len as usize)?;
792                        let range = offset..offset + len as usize;
793                        Ok(UnknownSection {
794                            id,
795                            contents,
796                            range,
797                        })
798                    }
799                }
800            }
801
802            // Once we hit 0 remaining incrementally parsed items, with 0
803            // remaining bytes in each section, we're done and can switch back
804            // to parsing sections.
805            State::FunctionBody {
806                remaining: 0,
807                len: 0,
808            } => {
809                self.state = State::SectionStart;
810                self.parse_reader(reader, eof)
811            }
812
813            // ... otherwise trailing bytes with no remaining entries in these
814            // sections indicates an error.
815            State::FunctionBody { remaining: 0, len } => {
816                debug_assert!(len > 0);
817                let offset = reader.original_position();
818                Err(BinaryReaderError::new(
819                    "trailing bytes at end of section",
820                    offset,
821                ))
822            }
823
824            // Functions are relatively easy to parse when we know there's at
825            // least one remaining and at least one byte available to read
826            // things.
827            //
828            // We use the remaining length try to read a u32 size of the
829            // function, and using that size we require the entire function be
830            // resident in memory. This means that we're reading whole chunks of
831            // functions at a time.
832            //
833            // Limiting via `Parser::max_size` (nested parsing) happens above in
834            // `fn parse`, and limiting by our section size happens via
835            // `delimited`. Actual parsing of the function body is delegated to
836            // the caller to iterate over the `FunctionBody` structure.
837            State::FunctionBody { remaining, mut len } => {
838                let body = delimited(reader, &mut len, |r| {
839                    Ok(FunctionBody::new(r.read_reader()?))
840                })?;
841                self.state = State::FunctionBody {
842                    remaining: remaining - 1,
843                    len,
844                };
845                Ok(CodeSectionEntry(body))
846            }
847        }
848    }
849
850    /// Convenience function that can be used to parse a module or component
851    /// that is entirely resident in memory.
852    ///
853    /// This function will parse the `data` provided as a WebAssembly module
854    /// or component.
855    ///
856    /// Note that when this function yields sections that provide parsers,
857    /// no further action is required for those sections as payloads from
858    /// those parsers will be automatically returned.
859    ///
860    /// # Examples
861    ///
862    /// An example of reading a wasm file from a stream (`std::io::Read`) into
863    /// a buffer and then parsing it.
864    ///
865    /// ```
866    /// use std::io::Read;
867    /// use anyhow::Result;
868    /// use wasmparser::{Parser, Chunk, Payload::*};
869    ///
870    /// fn parse(mut reader: impl Read) -> Result<()> {
871    ///     let mut buf = Vec::new();
872    ///     reader.read_to_end(&mut buf)?;
873    ///     let parser = Parser::new(0);
874    ///
875    ///     for payload in parser.parse_all(&buf) {
876    ///         match payload? {
877    ///             // Sections for WebAssembly modules
878    ///             Version { .. } => { /* ... */ }
879    ///             TypeSection(_) => { /* ... */ }
880    ///             ImportSection(_) => { /* ... */ }
881    ///             FunctionSection(_) => { /* ... */ }
882    ///             TableSection(_) => { /* ... */ }
883    ///             MemorySection(_) => { /* ... */ }
884    ///             TagSection(_) => { /* ... */ }
885    ///             GlobalSection(_) => { /* ... */ }
886    ///             ExportSection(_) => { /* ... */ }
887    ///             StartSection { .. } => { /* ... */ }
888    ///             ElementSection(_) => { /* ... */ }
889    ///             DataCountSection { .. } => { /* ... */ }
890    ///             DataSection(_) => { /* ... */ }
891    ///
892    ///             // Here we know how many functions we'll be receiving as
893    ///             // `CodeSectionEntry`, so we can prepare for that, and
894    ///             // afterwards we can parse and handle each function
895    ///             // individually.
896    ///             CodeSectionStart { .. } => { /* ... */ }
897    ///             CodeSectionEntry(body) => {
898    ///                 // here we can iterate over `body` to parse the function
899    ///                 // and its locals
900    ///             }
901    ///
902    ///             // Sections for WebAssembly components
903    ///             ModuleSection { .. } => { /* ... */ }
904    ///             InstanceSection(_) => { /* ... */ }
905    ///             CoreTypeSection(_) => { /* ... */ }
906    ///             ComponentSection { .. } => { /* ... */ }
907    ///             ComponentInstanceSection(_) => { /* ... */ }
908    ///             ComponentAliasSection(_) => { /* ... */ }
909    ///             ComponentTypeSection(_) => { /* ... */ }
910    ///             ComponentCanonicalSection(_) => { /* ... */ }
911    ///             ComponentStartSection { .. } => { /* ... */ }
912    ///             ComponentImportSection(_) => { /* ... */ }
913    ///             ComponentExportSection(_) => { /* ... */ }
914    ///
915    ///             CustomSection(_) => { /* ... */ }
916    ///
917    ///             // most likely you'd return an error here
918    ///             UnknownSection { id, .. } => { /* ... */ }
919    ///
920    ///             // Once we've reached the end of a parser we either resume
921    ///             // at the parent parser or the payload iterator is at its
922    ///             // end and we're done.
923    ///             End(_) => {}
924    ///         }
925    ///     }
926    ///
927    ///     Ok(())
928    /// }
929    ///
930    /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
931    /// ```
932    pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
933        let mut stack = Vec::new();
934        let mut cur = self;
935        let mut done = false;
936        iter::from_fn(move || {
937            if done {
938                return None;
939            }
940            let payload = match cur.parse(data, true) {
941                // Propagate all errors
942                Err(e) => {
943                    done = true;
944                    return Some(Err(e));
945                }
946
947                // This isn't possible because `eof` is always true.
948                Ok(Chunk::NeedMoreData(_)) => unreachable!(),
949
950                Ok(Chunk::Parsed { payload, consumed }) => {
951                    data = &data[consumed..];
952                    payload
953                }
954            };
955
956            match &payload {
957                Payload::ModuleSection { parser, .. }
958                | Payload::ComponentSection { parser, .. } => {
959                    stack.push(cur.clone());
960                    cur = parser.clone();
961                }
962                Payload::End(_) => match stack.pop() {
963                    Some(p) => cur = p,
964                    None => done = true,
965                },
966
967                _ => {}
968            }
969
970            Some(Ok(payload))
971        })
972    }
973
974    /// Skip parsing the code section entirely.
975    ///
976    /// This function can be used to indicate, after receiving
977    /// `CodeSectionStart`, that the section will not be parsed.
978    ///
979    /// The caller will be responsible for skipping `size` bytes (found in the
980    /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
981    /// after the `size` bytes have been skipped.
982    ///
983    /// # Panics
984    ///
985    /// This function will panic if the parser is not in a state where it's
986    /// parsing the code section.
987    ///
988    /// # Examples
989    ///
990    /// ```
991    /// use wasmparser::{Result, Parser, Chunk, Payload::*};
992    /// use core::ops::Range;
993    ///
994    /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
995    ///     let mut parser = Parser::new(0);
996    ///     loop {
997    ///         let payload = match parser.parse(wasm, true)? {
998    ///             Chunk::Parsed { consumed, payload } => {
999    ///                 wasm = &wasm[consumed..];
1000    ///                 payload
1001    ///             }
1002    ///             // this state isn't possible with `eof = true`
1003    ///             Chunk::NeedMoreData(_) => unreachable!(),
1004    ///         };
1005    ///         match payload {
1006    ///             TypeSection(s) => print_range("type section", &s.range()),
1007    ///             ImportSection(s) => print_range("import section", &s.range()),
1008    ///             // .. other sections
1009    ///
1010    ///             // Print the range of the code section we see, but don't
1011    ///             // actually iterate over each individual function.
1012    ///             CodeSectionStart { range, size, .. } => {
1013    ///                 print_range("code section", &range);
1014    ///                 parser.skip_section();
1015    ///                 wasm = &wasm[size as usize..];
1016    ///             }
1017    ///             End(_) => break,
1018    ///             _ => {}
1019    ///         }
1020    ///     }
1021    ///     Ok(())
1022    /// }
1023    ///
1024    /// fn print_range(section: &str, range: &Range<usize>) {
1025    ///     println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1026    /// }
1027    /// ```
1028    pub fn skip_section(&mut self) {
1029        let skip = match self.state {
1030            State::FunctionBody { remaining: _, len } => len,
1031            _ => panic!("wrong state to call `skip_section`"),
1032        };
1033        self.offset += u64::from(skip);
1034        self.max_size -= u64::from(skip);
1035        self.state = State::SectionStart;
1036    }
1037}
1038
1039fn usize_to_u64(a: usize) -> u64 {
1040    a.try_into().unwrap()
1041}
1042
1043/// Parses an entire section resident in memory into a `Payload`.
1044///
1045/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1046/// to construct the section to return.
1047fn section<'a, T>(
1048    reader: &mut BinaryReader<'a>,
1049    len: u32,
1050    ctor: fn(BinaryReader<'a>) -> Result<T>,
1051    variant: fn(T) -> Payload<'a>,
1052) -> Result<Payload<'a>> {
1053    let reader = reader.skip(|r| {
1054        r.read_bytes(len as usize)?;
1055        Ok(())
1056    })?;
1057    // clear the hint for "need this many more bytes" here because we already
1058    // read all the bytes, so it's not possible to read more bytes if this
1059    // fails.
1060    let reader = ctor(reader).map_err(clear_hint)?;
1061    Ok(variant(reader))
1062}
1063
1064/// Reads a section that is represented by a single uleb-encoded `u32`.
1065fn single_item<'a, T>(
1066    reader: &mut BinaryReader<'a>,
1067    len: u32,
1068    desc: &str,
1069) -> Result<(T, Range<usize>)>
1070where
1071    T: FromReader<'a>,
1072{
1073    let range = reader.original_position()..reader.original_position() + len as usize;
1074    let mut content = reader.skip(|r| {
1075        r.read_bytes(len as usize)?;
1076        Ok(())
1077    })?;
1078    // We can't recover from "unexpected eof" here because our entire section is
1079    // already resident in memory, so clear the hint for how many more bytes are
1080    // expected.
1081    let ret = content.read().map_err(clear_hint)?;
1082    if !content.eof() {
1083        bail!(
1084            content.original_position(),
1085            "unexpected content in the {desc} section",
1086        );
1087    }
1088    Ok((ret, range))
1089}
1090
1091/// Attempts to parse using `f`.
1092///
1093/// This will update `*len` with the number of bytes consumed, and it will cause
1094/// a failure to be returned instead of the number of bytes consumed exceeds
1095/// what `*len` currently is.
1096fn delimited<'a, T>(
1097    reader: &mut BinaryReader<'a>,
1098    len: &mut u32,
1099    f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1100) -> Result<T> {
1101    let start = reader.original_position();
1102    let ret = f(reader)?;
1103    *len = match (reader.original_position() - start)
1104        .try_into()
1105        .ok()
1106        .and_then(|i| len.checked_sub(i))
1107    {
1108        Some(i) => i,
1109        None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1110    };
1111    Ok(ret)
1112}
1113
1114impl Default for Parser {
1115    fn default() -> Parser {
1116        Parser::new(0)
1117    }
1118}
1119
1120impl Payload<'_> {
1121    /// If this `Payload` represents a section in the original wasm module then
1122    /// the section's id and range within the original wasm binary are returned.
1123    ///
1124    /// Not all payloads refer to entire sections, such as the `Version` and
1125    /// `CodeSectionEntry` variants. These variants will return `None` from this
1126    /// function.
1127    ///
1128    /// Otherwise this function will return `Some` where the first element is
1129    /// the byte identifier for the section and the second element is the range
1130    /// of the contents of the section within the original wasm binary.
1131    ///
1132    /// The purpose of this method is to enable tools to easily iterate over
1133    /// entire sections if necessary and handle sections uniformly, for example
1134    /// dropping custom sections while preserving all other sections.
1135    pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1136        use Payload::*;
1137
1138        match self {
1139            Version { .. } => None,
1140            TypeSection(s) => Some((TYPE_SECTION, s.range())),
1141            ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1142            FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1143            TableSection(s) => Some((TABLE_SECTION, s.range())),
1144            MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1145            TagSection(s) => Some((TAG_SECTION, s.range())),
1146            GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1147            ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1148            ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1149            DataSection(s) => Some((DATA_SECTION, s.range())),
1150            StartSection { range, .. } => Some((START_SECTION, range.clone())),
1151            DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1152            CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1153            CodeSectionEntry(_) => None,
1154
1155            ModuleSection {
1156                unchecked_range: range,
1157                ..
1158            } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1159            InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1160            CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1161            ComponentSection {
1162                unchecked_range: range,
1163                ..
1164            } => Some((COMPONENT_SECTION, range.clone())),
1165            ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1166            ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1167            ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1168            ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1169            ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1170            ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1171            ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1172
1173            CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1174
1175            UnknownSection { id, range, .. } => Some((*id, range.clone())),
1176
1177            End(_) => None,
1178        }
1179    }
1180}
1181
1182impl fmt::Debug for Payload<'_> {
1183    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1184        use Payload::*;
1185        match self {
1186            Version {
1187                num,
1188                encoding,
1189                range,
1190            } => f
1191                .debug_struct("Version")
1192                .field("num", num)
1193                .field("encoding", encoding)
1194                .field("range", range)
1195                .finish(),
1196
1197            // Module sections
1198            TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1199            ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1200            FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1201            TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1202            MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1203            TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1204            GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1205            ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1206            ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1207            DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1208            StartSection { func, range } => f
1209                .debug_struct("StartSection")
1210                .field("func", func)
1211                .field("range", range)
1212                .finish(),
1213            DataCountSection { count, range } => f
1214                .debug_struct("DataCountSection")
1215                .field("count", count)
1216                .field("range", range)
1217                .finish(),
1218            CodeSectionStart { count, range, size } => f
1219                .debug_struct("CodeSectionStart")
1220                .field("count", count)
1221                .field("range", range)
1222                .field("size", size)
1223                .finish(),
1224            CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1225
1226            // Component sections
1227            ModuleSection {
1228                parser: _,
1229                unchecked_range: range,
1230            } => f
1231                .debug_struct("ModuleSection")
1232                .field("range", range)
1233                .finish(),
1234            InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1235            CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1236            ComponentSection {
1237                parser: _,
1238                unchecked_range: range,
1239            } => f
1240                .debug_struct("ComponentSection")
1241                .field("range", range)
1242                .finish(),
1243            ComponentInstanceSection(_) => f
1244                .debug_tuple("ComponentInstanceSection")
1245                .field(&"...")
1246                .finish(),
1247            ComponentAliasSection(_) => f
1248                .debug_tuple("ComponentAliasSection")
1249                .field(&"...")
1250                .finish(),
1251            ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1252            ComponentCanonicalSection(_) => f
1253                .debug_tuple("ComponentCanonicalSection")
1254                .field(&"...")
1255                .finish(),
1256            ComponentStartSection { .. } => f
1257                .debug_tuple("ComponentStartSection")
1258                .field(&"...")
1259                .finish(),
1260            ComponentImportSection(_) => f
1261                .debug_tuple("ComponentImportSection")
1262                .field(&"...")
1263                .finish(),
1264            ComponentExportSection(_) => f
1265                .debug_tuple("ComponentExportSection")
1266                .field(&"...")
1267                .finish(),
1268
1269            CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1270
1271            UnknownSection { id, range, .. } => f
1272                .debug_struct("UnknownSection")
1273                .field("id", id)
1274                .field("range", range)
1275                .finish(),
1276
1277            End(offset) => f.debug_tuple("End").field(offset).finish(),
1278        }
1279    }
1280}
1281
1282fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1283    err.inner.needed_hint = None;
1284    err
1285}
1286
1287#[cfg(test)]
1288mod tests {
1289    use super::*;
1290
1291    macro_rules! assert_matches {
1292        ($a:expr, $b:pat $(,)?) => {
1293            match $a {
1294                $b => {}
1295                a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1296            }
1297        };
1298    }
1299
1300    #[test]
1301    fn header() {
1302        assert!(Parser::default().parse(&[], true).is_err());
1303        assert_matches!(
1304            Parser::default().parse(&[], false),
1305            Ok(Chunk::NeedMoreData(4)),
1306        );
1307        assert_matches!(
1308            Parser::default().parse(b"\0", false),
1309            Ok(Chunk::NeedMoreData(3)),
1310        );
1311        assert_matches!(
1312            Parser::default().parse(b"\0asm", false),
1313            Ok(Chunk::NeedMoreData(4)),
1314        );
1315        assert_matches!(
1316            Parser::default().parse(b"\0asm\x01\0\0\0", false),
1317            Ok(Chunk::Parsed {
1318                consumed: 8,
1319                payload: Payload::Version { num: 1, .. },
1320            }),
1321        );
1322    }
1323
1324    #[test]
1325    fn header_iter() {
1326        for _ in Parser::default().parse_all(&[]) {}
1327        for _ in Parser::default().parse_all(b"\0") {}
1328        for _ in Parser::default().parse_all(b"\0asm") {}
1329        for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1330    }
1331
1332    fn parser_after_header() -> Parser {
1333        let mut p = Parser::default();
1334        assert_matches!(
1335            p.parse(b"\0asm\x01\0\0\0", false),
1336            Ok(Chunk::Parsed {
1337                consumed: 8,
1338                payload: Payload::Version {
1339                    num: WASM_MODULE_VERSION,
1340                    encoding: Encoding::Module,
1341                    ..
1342                },
1343            }),
1344        );
1345        p
1346    }
1347
1348    fn parser_after_component_header() -> Parser {
1349        let mut p = Parser::default();
1350        assert_matches!(
1351            p.parse(b"\0asm\x0d\0\x01\0", false),
1352            Ok(Chunk::Parsed {
1353                consumed: 8,
1354                payload: Payload::Version {
1355                    num: WASM_COMPONENT_VERSION,
1356                    encoding: Encoding::Component,
1357                    ..
1358                },
1359            }),
1360        );
1361        p
1362    }
1363
1364    #[test]
1365    fn start_section() {
1366        assert_matches!(
1367            parser_after_header().parse(&[], false),
1368            Ok(Chunk::NeedMoreData(1)),
1369        );
1370        assert!(parser_after_header().parse(&[8], true).is_err());
1371        assert!(parser_after_header().parse(&[8, 1], true).is_err());
1372        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1373        assert_matches!(
1374            parser_after_header().parse(&[8], false),
1375            Ok(Chunk::NeedMoreData(1)),
1376        );
1377        assert_matches!(
1378            parser_after_header().parse(&[8, 1], false),
1379            Ok(Chunk::NeedMoreData(1)),
1380        );
1381        assert_matches!(
1382            parser_after_header().parse(&[8, 2], false),
1383            Ok(Chunk::NeedMoreData(2)),
1384        );
1385        assert_matches!(
1386            parser_after_header().parse(&[8, 1, 1], false),
1387            Ok(Chunk::Parsed {
1388                consumed: 3,
1389                payload: Payload::StartSection { func: 1, .. },
1390            }),
1391        );
1392        assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1393        assert!(parser_after_header().parse(&[8, 0], false).is_err());
1394    }
1395
1396    #[test]
1397    fn end_works() {
1398        assert_matches!(
1399            parser_after_header().parse(&[], true),
1400            Ok(Chunk::Parsed {
1401                consumed: 0,
1402                payload: Payload::End(8),
1403            }),
1404        );
1405    }
1406
1407    #[test]
1408    fn type_section() {
1409        assert!(parser_after_header().parse(&[1], true).is_err());
1410        assert!(parser_after_header().parse(&[1, 0], false).is_err());
1411        assert!(parser_after_header().parse(&[8, 2], true).is_err());
1412        assert_matches!(
1413            parser_after_header().parse(&[1], false),
1414            Ok(Chunk::NeedMoreData(1)),
1415        );
1416        assert_matches!(
1417            parser_after_header().parse(&[1, 1], false),
1418            Ok(Chunk::NeedMoreData(1)),
1419        );
1420        assert_matches!(
1421            parser_after_header().parse(&[1, 1, 1], false),
1422            Ok(Chunk::Parsed {
1423                consumed: 3,
1424                payload: Payload::TypeSection(_),
1425            }),
1426        );
1427        assert_matches!(
1428            parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1429            Ok(Chunk::Parsed {
1430                consumed: 3,
1431                payload: Payload::TypeSection(_),
1432            }),
1433        );
1434    }
1435
1436    #[test]
1437    fn custom_section() {
1438        assert!(parser_after_header().parse(&[0], true).is_err());
1439        assert!(parser_after_header().parse(&[0, 0], false).is_err());
1440        assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1441        assert_matches!(
1442            parser_after_header().parse(&[0, 2, 1], false),
1443            Ok(Chunk::NeedMoreData(1)),
1444        );
1445        assert_custom(
1446            parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1447            3,
1448            "",
1449            11,
1450            b"",
1451            Range { start: 10, end: 11 },
1452        );
1453        assert_custom(
1454            parser_after_header()
1455                .parse(&[0, 2, 1, b'a'], false)
1456                .unwrap(),
1457            4,
1458            "a",
1459            12,
1460            b"",
1461            Range { start: 10, end: 12 },
1462        );
1463        assert_custom(
1464            parser_after_header()
1465                .parse(&[0, 2, 0, b'a'], false)
1466                .unwrap(),
1467            4,
1468            "",
1469            11,
1470            b"a",
1471            Range { start: 10, end: 12 },
1472        );
1473    }
1474
1475    fn assert_custom(
1476        chunk: Chunk<'_>,
1477        expected_consumed: usize,
1478        expected_name: &str,
1479        expected_data_offset: usize,
1480        expected_data: &[u8],
1481        expected_range: Range<usize>,
1482    ) {
1483        let (consumed, s) = match chunk {
1484            Chunk::Parsed {
1485                consumed,
1486                payload: Payload::CustomSection(s),
1487            } => (consumed, s),
1488            _ => panic!("not a custom section payload"),
1489        };
1490        assert_eq!(consumed, expected_consumed);
1491        assert_eq!(s.name(), expected_name);
1492        assert_eq!(s.data_offset(), expected_data_offset);
1493        assert_eq!(s.data(), expected_data);
1494        assert_eq!(s.range(), expected_range);
1495    }
1496
1497    #[test]
1498    fn function_section() {
1499        assert!(parser_after_header().parse(&[10], true).is_err());
1500        assert!(parser_after_header().parse(&[10, 0], true).is_err());
1501        assert!(parser_after_header().parse(&[10, 1], true).is_err());
1502        assert_matches!(
1503            parser_after_header().parse(&[10], false),
1504            Ok(Chunk::NeedMoreData(1))
1505        );
1506        assert_matches!(
1507            parser_after_header().parse(&[10, 1], false),
1508            Ok(Chunk::NeedMoreData(1))
1509        );
1510        let mut p = parser_after_header();
1511        assert_matches!(
1512            p.parse(&[10, 1, 0], false),
1513            Ok(Chunk::Parsed {
1514                consumed: 3,
1515                payload: Payload::CodeSectionStart { count: 0, .. },
1516            }),
1517        );
1518        assert_matches!(
1519            p.parse(&[], true),
1520            Ok(Chunk::Parsed {
1521                consumed: 0,
1522                payload: Payload::End(11),
1523            }),
1524        );
1525        let mut p = parser_after_header();
1526        assert_matches!(
1527            p.parse(&[10, 2, 1, 0], false),
1528            Ok(Chunk::Parsed {
1529                consumed: 3,
1530                payload: Payload::CodeSectionStart { count: 1, .. },
1531            }),
1532        );
1533        assert_matches!(
1534            p.parse(&[0], false),
1535            Ok(Chunk::Parsed {
1536                consumed: 1,
1537                payload: Payload::CodeSectionEntry(_),
1538            }),
1539        );
1540        assert_matches!(
1541            p.parse(&[], true),
1542            Ok(Chunk::Parsed {
1543                consumed: 0,
1544                payload: Payload::End(12),
1545            }),
1546        );
1547
1548        // 1 byte section with 1 function can't read the function body because
1549        // the section is too small
1550        let mut p = parser_after_header();
1551        assert_matches!(
1552            p.parse(&[10, 1, 1], false),
1553            Ok(Chunk::Parsed {
1554                consumed: 3,
1555                payload: Payload::CodeSectionStart { count: 1, .. },
1556            }),
1557        );
1558        assert_eq!(
1559            p.parse(&[0], false).unwrap_err().message(),
1560            "unexpected end-of-file"
1561        );
1562
1563        // section with 2 functions but section is cut off
1564        let mut p = parser_after_header();
1565        assert_matches!(
1566            p.parse(&[10, 2, 2], false),
1567            Ok(Chunk::Parsed {
1568                consumed: 3,
1569                payload: Payload::CodeSectionStart { count: 2, .. },
1570            }),
1571        );
1572        assert_matches!(
1573            p.parse(&[0], false),
1574            Ok(Chunk::Parsed {
1575                consumed: 1,
1576                payload: Payload::CodeSectionEntry(_),
1577            }),
1578        );
1579        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1580        assert_eq!(
1581            p.parse(&[0], false).unwrap_err().message(),
1582            "unexpected end-of-file",
1583        );
1584
1585        // trailing data is bad
1586        let mut p = parser_after_header();
1587        assert_matches!(
1588            p.parse(&[10, 3, 1], false),
1589            Ok(Chunk::Parsed {
1590                consumed: 3,
1591                payload: Payload::CodeSectionStart { count: 1, .. },
1592            }),
1593        );
1594        assert_matches!(
1595            p.parse(&[0], false),
1596            Ok(Chunk::Parsed {
1597                consumed: 1,
1598                payload: Payload::CodeSectionEntry(_),
1599            }),
1600        );
1601        assert_eq!(
1602            p.parse(&[0], false).unwrap_err().message(),
1603            "trailing bytes at end of section",
1604        );
1605    }
1606
1607    #[test]
1608    fn single_module() {
1609        let mut p = parser_after_component_header();
1610        assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1611
1612        // A module that's 8 bytes in length
1613        let mut sub = match p.parse(&[1, 8], false) {
1614            Ok(Chunk::Parsed {
1615                consumed: 2,
1616                payload: Payload::ModuleSection { parser, .. },
1617            }) => parser,
1618            other => panic!("bad parse {:?}", other),
1619        };
1620
1621        // Parse the header of the submodule with the sub-parser.
1622        assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1623        assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1624        assert_matches!(
1625            sub.parse(b"\0asm\x01\0\0\0", false),
1626            Ok(Chunk::Parsed {
1627                consumed: 8,
1628                payload: Payload::Version {
1629                    num: 1,
1630                    encoding: Encoding::Module,
1631                    ..
1632                },
1633            }),
1634        );
1635
1636        // The sub-parser should be byte-limited so the next byte shouldn't get
1637        // consumed, it's intended for the parent parser.
1638        assert_matches!(
1639            sub.parse(&[10], false),
1640            Ok(Chunk::Parsed {
1641                consumed: 0,
1642                payload: Payload::End(18),
1643            }),
1644        );
1645
1646        // The parent parser should now be back to resuming, and we simulate it
1647        // being done with bytes to ensure that it's safely at the end,
1648        // completing the module code section.
1649        assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1650        assert_matches!(
1651            p.parse(&[], true),
1652            Ok(Chunk::Parsed {
1653                consumed: 0,
1654                payload: Payload::End(18),
1655            }),
1656        );
1657    }
1658
1659    #[test]
1660    fn nested_section_too_big() {
1661        let mut p = parser_after_component_header();
1662
1663        // A module that's 10 bytes in length
1664        let mut sub = match p.parse(&[1, 10], false) {
1665            Ok(Chunk::Parsed {
1666                consumed: 2,
1667                payload: Payload::ModuleSection { parser, .. },
1668            }) => parser,
1669            other => panic!("bad parse {:?}", other),
1670        };
1671
1672        // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1673        // module.
1674        assert_matches!(
1675            sub.parse(b"\0asm\x01\0\0\0", false),
1676            Ok(Chunk::Parsed {
1677                consumed: 8,
1678                payload: Payload::Version { num: 1, .. },
1679            }),
1680        );
1681
1682        // We can't parse a section which declares its bigger than the outer
1683        // module. This is a custom section, one byte big, with one content byte. The
1684        // content byte, however, lives outside of the parent's module code
1685        // section.
1686        assert_eq!(
1687            sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1688            "section too large",
1689        );
1690    }
1691}
wasmparser/parser.rs

wasmparser/
parser.rs