wasmparser/parser.rs
1use crate::binary_reader::WASM_MAGIC_NUMBER;
2use crate::prelude::*;
3use crate::CoreTypeSectionReader;
4#[cfg(feature = "features")]
5use crate::WasmFeatures;
6use crate::{
7 limits::MAX_WASM_MODULE_SIZE, BinaryReader, BinaryReaderError, ComponentCanonicalSectionReader,
8 ComponentExportSectionReader, ComponentImportSectionReader, ComponentInstanceSectionReader,
9 ComponentStartFunction, ComponentTypeSectionReader, CustomSectionReader, DataSectionReader,
10 ElementSectionReader, ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader,
11 GlobalSectionReader, ImportSectionReader, InstanceSectionReader, MemorySectionReader, Result,
12 SectionLimited, TableSectionReader, TagSectionReader, TypeSectionReader,
13};
14use core::fmt;
15use core::iter;
16use core::ops::Range;
17
18pub(crate) const WASM_MODULE_VERSION: u16 = 0x1;
19
20// Note that this started at `0xa` and we're incrementing up from there. When
21// the component model is stabilized this will become 0x1. The changes here are:
22//
23// * [????-??-??] 0xa - original version
24// * [2023-01-05] 0xb - `export` introduces an alias
25// * [2023-02-06] 0xc - `export` has an optional type ascribed to it
26// * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which
27// allows for `(import (interface "...") ...)` syntax.
28pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd;
29
30const KIND_MODULE: u16 = 0x00;
31const KIND_COMPONENT: u16 = 0x01;
32
33/// The supported encoding formats for the parser.
34#[derive(Debug, Clone, Copy, Eq, PartialEq)]
35pub enum Encoding {
36 /// The encoding format is a WebAssembly module.
37 Module,
38 /// The encoding format is a WebAssembly component.
39 Component,
40}
41
42/// An incremental parser of a binary WebAssembly module or component.
43///
44/// This type is intended to be used to incrementally parse a WebAssembly module
45/// or component as bytes become available for the module. This can also be used
46/// to parse modules or components that are already entirely resident within memory.
47///
48/// This primary function for a parser is the [`Parser::parse`] function which
49/// will incrementally consume input. You can also use the [`Parser::parse_all`]
50/// function to parse a module or component that is entirely resident in memory.
51#[derive(Debug, Clone)]
52pub struct Parser {
53 state: State,
54 offset: u64,
55 max_size: u64,
56 encoding: Encoding,
57 #[cfg(feature = "features")]
58 features: WasmFeatures,
59}
60
61#[derive(Debug, Clone)]
62enum State {
63 Header,
64 SectionStart,
65 FunctionBody { remaining: u32, len: u32 },
66}
67
68/// A successful return payload from [`Parser::parse`].
69///
70/// On success one of two possible values can be returned, either that more data
71/// is needed to continue parsing or a chunk of the input was parsed, indicating
72/// how much of it was parsed.
73#[derive(Debug)]
74pub enum Chunk<'a> {
75 /// This can be returned at any time and indicates that more data is needed
76 /// to proceed with parsing. Zero bytes were consumed from the input to
77 /// [`Parser::parse`]. The `u64` value here is a hint as to how many more
78 /// bytes are needed to continue parsing.
79 NeedMoreData(u64),
80
81 /// A chunk was successfully parsed.
82 Parsed {
83 /// This many bytes of the `data` input to [`Parser::parse`] were
84 /// consumed to produce `payload`.
85 consumed: usize,
86 /// The value that we actually parsed.
87 payload: Payload<'a>,
88 },
89}
90
91/// Values that can be parsed from a WebAssembly module or component.
92///
93/// This enumeration is all possible chunks of pieces that can be parsed by a
94/// [`Parser`] from a binary WebAssembly module or component. Note that for many
95/// sections the entire section is parsed all at once, whereas other functions,
96/// like the code section, are parsed incrementally. This is a distinction where some
97/// sections, like the type section, are required to be fully resident in memory
98/// (fully downloaded) before proceeding. Other sections, like the code section,
99/// can be processed in a streaming fashion where each function is extracted
100/// individually so it can possibly be shipped to another thread while you wait
101/// for more functions to get downloaded.
102///
103/// Note that payloads, when returned, do not indicate that the module or component
104/// is valid. For example when you receive a `Payload::TypeSection` the type
105/// section itself has not yet actually been parsed. The reader returned will be
106/// able to parse it, but you'll have to actually iterate the reader to do the
107/// full parse. Each payload returned is intended to be a *window* into the
108/// original `data` passed to [`Parser::parse`] which can be further processed
109/// if necessary.
110pub enum Payload<'a> {
111 /// Indicates the header of a WebAssembly module or component.
112 Version {
113 /// The version number found in the header.
114 num: u16,
115 /// The encoding format being parsed.
116 encoding: Encoding,
117 /// The range of bytes that were parsed to consume the header of the
118 /// module or component. Note that this range is relative to the start
119 /// of the byte stream.
120 range: Range<usize>,
121 },
122
123 /// A module type section was received and the provided reader can be
124 /// used to parse the contents of the type section.
125 TypeSection(TypeSectionReader<'a>),
126 /// A module import section was received and the provided reader can be
127 /// used to parse the contents of the import section.
128 ImportSection(ImportSectionReader<'a>),
129 /// A module function section was received and the provided reader can be
130 /// used to parse the contents of the function section.
131 FunctionSection(FunctionSectionReader<'a>),
132 /// A module table section was received and the provided reader can be
133 /// used to parse the contents of the table section.
134 TableSection(TableSectionReader<'a>),
135 /// A module memory section was received and the provided reader can be
136 /// used to parse the contents of the memory section.
137 MemorySection(MemorySectionReader<'a>),
138 /// A module tag section was received, and the provided reader can be
139 /// used to parse the contents of the tag section.
140 TagSection(TagSectionReader<'a>),
141 /// A module global section was received and the provided reader can be
142 /// used to parse the contents of the global section.
143 GlobalSection(GlobalSectionReader<'a>),
144 /// A module export section was received, and the provided reader can be
145 /// used to parse the contents of the export section.
146 ExportSection(ExportSectionReader<'a>),
147 /// A module start section was received.
148 StartSection {
149 /// The start function index
150 func: u32,
151 /// The range of bytes that specify the `func` field, specified in
152 /// offsets relative to the start of the byte stream.
153 range: Range<usize>,
154 },
155 /// A module element section was received and the provided reader can be
156 /// used to parse the contents of the element section.
157 ElementSection(ElementSectionReader<'a>),
158 /// A module data count section was received.
159 DataCountSection {
160 /// The number of data segments.
161 count: u32,
162 /// The range of bytes that specify the `count` field, specified in
163 /// offsets relative to the start of the byte stream.
164 range: Range<usize>,
165 },
166 /// A module data section was received and the provided reader can be
167 /// used to parse the contents of the data section.
168 DataSection(DataSectionReader<'a>),
169 /// Indicator of the start of the code section of a WebAssembly module.
170 ///
171 /// This entry is returned whenever the code section starts. The `count`
172 /// field indicates how many entries are in this code section. After
173 /// receiving this start marker you're guaranteed that the next `count`
174 /// items will be either `CodeSectionEntry` or an error will be returned.
175 ///
176 /// This, unlike other sections, is intended to be used for streaming the
177 /// contents of the code section. The code section is not required to be
178 /// fully resident in memory when we parse it. Instead a [`Parser`] is
179 /// capable of parsing piece-by-piece of a code section.
180 CodeSectionStart {
181 /// The number of functions in this section.
182 count: u32,
183 /// The range of bytes that represent this section, specified in
184 /// offsets relative to the start of the byte stream.
185 range: Range<usize>,
186 /// The size, in bytes, of the remaining contents of this section.
187 ///
188 /// This can be used in combination with [`Parser::skip_section`]
189 /// where the caller will know how many bytes to skip before feeding
190 /// bytes into `Parser` again.
191 size: u32,
192 },
193 /// An entry of the code section, a function, was parsed from a WebAssembly
194 /// module.
195 ///
196 /// This entry indicates that a function was successfully received from the
197 /// code section, and the payload here is the window into the original input
198 /// where the function resides. Note that the function itself has not been
199 /// parsed, it's only been outlined. You'll need to process the
200 /// `FunctionBody` provided to test whether it parses and/or is valid.
201 CodeSectionEntry(FunctionBody<'a>),
202
203 /// A core module section was received and the provided parser can be
204 /// used to parse the nested module.
205 ///
206 /// This variant is special in that it returns a sub-`Parser`. Upon
207 /// receiving a `ModuleSection` it is expected that the returned
208 /// `Parser` will be used instead of the parent `Parser` until the parse has
209 /// finished. You'll need to feed data into the `Parser` returned until it
210 /// returns `Payload::End`. After that you'll switch back to the parent
211 /// parser to resume parsing the rest of the current component.
212 ///
213 /// Note that binaries will not be parsed correctly if you feed the data for
214 /// a nested module into the parent [`Parser`].
215 ModuleSection {
216 /// The parser for the nested module.
217 parser: Parser,
218 /// The range of bytes that represent the nested module in the
219 /// original byte stream.
220 ///
221 /// Note that, to better support streaming parsing and validation, the
222 /// validator does *not* check that this range is in bounds.
223 unchecked_range: Range<usize>,
224 },
225 /// A core instance section was received and the provided parser can be
226 /// used to parse the contents of the core instance section.
227 ///
228 /// Currently this section is only parsed in a component.
229 InstanceSection(InstanceSectionReader<'a>),
230 /// A core type section was received and the provided parser can be
231 /// used to parse the contents of the core type section.
232 ///
233 /// Currently this section is only parsed in a component.
234 CoreTypeSection(CoreTypeSectionReader<'a>),
235 /// A component section from a WebAssembly component was received and the
236 /// provided parser can be used to parse the nested component.
237 ///
238 /// This variant is special in that it returns a sub-`Parser`. Upon
239 /// receiving a `ComponentSection` it is expected that the returned
240 /// `Parser` will be used instead of the parent `Parser` until the parse has
241 /// finished. You'll need to feed data into the `Parser` returned until it
242 /// returns `Payload::End`. After that you'll switch back to the parent
243 /// parser to resume parsing the rest of the current component.
244 ///
245 /// Note that binaries will not be parsed correctly if you feed the data for
246 /// a nested component into the parent [`Parser`].
247 ComponentSection {
248 /// The parser for the nested component.
249 parser: Parser,
250 /// The range of bytes that represent the nested component in the
251 /// original byte stream.
252 ///
253 /// Note that, to better support streaming parsing and validation, the
254 /// validator does *not* check that this range is in bounds.
255 unchecked_range: Range<usize>,
256 },
257 /// A component instance section was received and the provided reader can be
258 /// used to parse the contents of the component instance section.
259 ComponentInstanceSection(ComponentInstanceSectionReader<'a>),
260 /// A component alias section was received and the provided reader can be
261 /// used to parse the contents of the component alias section.
262 ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>),
263 /// A component type section was received and the provided reader can be
264 /// used to parse the contents of the component type section.
265 ComponentTypeSection(ComponentTypeSectionReader<'a>),
266 /// A component canonical section was received and the provided reader can be
267 /// used to parse the contents of the component canonical section.
268 ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>),
269 /// A component start section was received.
270 ComponentStartSection {
271 /// The start function description.
272 start: ComponentStartFunction,
273 /// The range of bytes that specify the `start` field.
274 range: Range<usize>,
275 },
276 /// A component import section was received and the provided reader can be
277 /// used to parse the contents of the component import section.
278 ComponentImportSection(ComponentImportSectionReader<'a>),
279 /// A component export section was received, and the provided reader can be
280 /// used to parse the contents of the component export section.
281 ComponentExportSection(ComponentExportSectionReader<'a>),
282
283 /// A module or component custom section was received.
284 CustomSection(CustomSectionReader<'a>),
285
286 /// An unknown section was found.
287 ///
288 /// This variant is returned for all unknown sections encountered. This
289 /// likely wants to be interpreted as an error by consumers of the parser,
290 /// but this can also be used to parse sections currently unsupported by
291 /// the parser.
292 UnknownSection {
293 /// The 8-bit identifier for this section.
294 id: u8,
295 /// The contents of this section.
296 contents: &'a [u8],
297 /// The range of bytes, relative to the start of the original data
298 /// stream, that the contents of this section reside in.
299 range: Range<usize>,
300 },
301
302 /// The end of the WebAssembly module or component was reached.
303 ///
304 /// The value is the offset in the input byte stream where the end
305 /// was reached.
306 End(usize),
307}
308
309const CUSTOM_SECTION: u8 = 0;
310const TYPE_SECTION: u8 = 1;
311const IMPORT_SECTION: u8 = 2;
312const FUNCTION_SECTION: u8 = 3;
313const TABLE_SECTION: u8 = 4;
314const MEMORY_SECTION: u8 = 5;
315const GLOBAL_SECTION: u8 = 6;
316const EXPORT_SECTION: u8 = 7;
317const START_SECTION: u8 = 8;
318const ELEMENT_SECTION: u8 = 9;
319const CODE_SECTION: u8 = 10;
320const DATA_SECTION: u8 = 11;
321const DATA_COUNT_SECTION: u8 = 12;
322const TAG_SECTION: u8 = 13;
323
324const COMPONENT_MODULE_SECTION: u8 = 1;
325const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2;
326const COMPONENT_CORE_TYPE_SECTION: u8 = 3;
327const COMPONENT_SECTION: u8 = 4;
328const COMPONENT_INSTANCE_SECTION: u8 = 5;
329const COMPONENT_ALIAS_SECTION: u8 = 6;
330const COMPONENT_TYPE_SECTION: u8 = 7;
331const COMPONENT_CANONICAL_SECTION: u8 = 8;
332const COMPONENT_START_SECTION: u8 = 9;
333const COMPONENT_IMPORT_SECTION: u8 = 10;
334const COMPONENT_EXPORT_SECTION: u8 = 11;
335
336impl Parser {
337 /// Creates a new parser.
338 ///
339 /// Reports errors and ranges relative to `offset` provided, where `offset`
340 /// is some logical offset within the input stream that we're parsing.
341 pub fn new(offset: u64) -> Parser {
342 Parser {
343 state: State::Header,
344 offset,
345 max_size: u64::MAX,
346 // Assume the encoding is a module until we know otherwise
347 encoding: Encoding::Module,
348 #[cfg(feature = "features")]
349 features: WasmFeatures::all(),
350 }
351 }
352
353 /// Tests whether `bytes` looks like a core WebAssembly module.
354 ///
355 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
356 /// starts with the standard core WebAssembly header.
357 pub fn is_core_wasm(bytes: &[u8]) -> bool {
358 const HEADER: [u8; 8] = [
359 WASM_MAGIC_NUMBER[0],
360 WASM_MAGIC_NUMBER[1],
361 WASM_MAGIC_NUMBER[2],
362 WASM_MAGIC_NUMBER[3],
363 WASM_MODULE_VERSION.to_le_bytes()[0],
364 WASM_MODULE_VERSION.to_le_bytes()[1],
365 KIND_MODULE.to_le_bytes()[0],
366 KIND_MODULE.to_le_bytes()[1],
367 ];
368 bytes.starts_with(&HEADER)
369 }
370
371 /// Tests whether `bytes` looks like a WebAssembly component.
372 ///
373 /// This will inspect the first 8 bytes of `bytes` and return `true` if it
374 /// starts with the standard WebAssembly component header.
375 pub fn is_component(bytes: &[u8]) -> bool {
376 const HEADER: [u8; 8] = [
377 WASM_MAGIC_NUMBER[0],
378 WASM_MAGIC_NUMBER[1],
379 WASM_MAGIC_NUMBER[2],
380 WASM_MAGIC_NUMBER[3],
381 WASM_COMPONENT_VERSION.to_le_bytes()[0],
382 WASM_COMPONENT_VERSION.to_le_bytes()[1],
383 KIND_COMPONENT.to_le_bytes()[0],
384 KIND_COMPONENT.to_le_bytes()[1],
385 ];
386 bytes.starts_with(&HEADER)
387 }
388
389 /// Returns the currently active set of wasm features that this parser is
390 /// using while parsing.
391 ///
392 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
393 ///
394 /// For more information see [`BinaryReader::new`].
395 #[cfg(feature = "features")]
396 pub fn features(&self) -> WasmFeatures {
397 self.features
398 }
399
400 /// Sets the wasm features active while parsing to the `features` specified.
401 ///
402 /// The default set of features is [`WasmFeatures::all()`] for new parsers.
403 ///
404 /// For more information see [`BinaryReader::new`].
405 #[cfg(feature = "features")]
406 pub fn set_features(&mut self, features: WasmFeatures) {
407 self.features = features;
408 }
409
410 /// Attempts to parse a chunk of data.
411 ///
412 /// This method will attempt to parse the next incremental portion of a
413 /// WebAssembly binary. Data available for the module or component is
414 /// provided as `data`, and the data can be incomplete if more data has yet
415 /// to arrive. The `eof` flag indicates whether more data will ever be received.
416 ///
417 /// There are two ways parsing can succeed with this method:
418 ///
419 /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes
420 /// in `data` to parse a payload. The caller needs to wait for more data to
421 /// be available in this situation before calling this method again. It is
422 /// guaranteed that this is only returned if `eof` is `false`.
423 ///
424 /// * `Chunk::Parsed` - this indicates that a chunk of the input was
425 /// successfully parsed. The payload is available in this variant of what
426 /// was parsed, and this also indicates how many bytes of `data` was
427 /// consumed. It's expected that the caller will not provide these bytes
428 /// back to the [`Parser`] again.
429 ///
430 /// Note that all `Chunk` return values are connected, with a lifetime, to
431 /// the input buffer. Each parsed chunk borrows the input buffer and is a
432 /// view into it for successfully parsed chunks.
433 ///
434 /// It is expected that you'll call this method until `Payload::End` is
435 /// reached, at which point you're guaranteed that the parse has completed.
436 /// Note that complete parsing, for the top-level module or component,
437 /// implies that `data` is empty and `eof` is `true`.
438 ///
439 /// # Errors
440 ///
441 /// Parse errors are returned as an `Err`. Errors can happen when the
442 /// structure of the data is unexpected or if sections are too large for
443 /// example. Note that errors are not returned for malformed *contents* of
444 /// sections here. Sections are generally not individually parsed and each
445 /// returned [`Payload`] needs to be iterated over further to detect all
446 /// errors.
447 ///
448 /// # Examples
449 ///
450 /// An example of reading a wasm file from a stream (`std::io::Read`) and
451 /// incrementally parsing it.
452 ///
453 /// ```
454 /// use std::io::Read;
455 /// use anyhow::Result;
456 /// use wasmparser::{Parser, Chunk, Payload::*};
457 ///
458 /// fn parse(mut reader: impl Read) -> Result<()> {
459 /// let mut buf = Vec::new();
460 /// let mut cur = Parser::new(0);
461 /// let mut eof = false;
462 /// let mut stack = Vec::new();
463 ///
464 /// loop {
465 /// let (payload, consumed) = match cur.parse(&buf, eof)? {
466 /// Chunk::NeedMoreData(hint) => {
467 /// assert!(!eof); // otherwise an error would be returned
468 ///
469 /// // Use the hint to preallocate more space, then read
470 /// // some more data into our buffer.
471 /// //
472 /// // Note that the buffer management here is not ideal,
473 /// // but it's compact enough to fit in an example!
474 /// let len = buf.len();
475 /// buf.extend((0..hint).map(|_| 0u8));
476 /// let n = reader.read(&mut buf[len..])?;
477 /// buf.truncate(len + n);
478 /// eof = n == 0;
479 /// continue;
480 /// }
481 ///
482 /// Chunk::Parsed { consumed, payload } => (payload, consumed),
483 /// };
484 ///
485 /// match payload {
486 /// // Sections for WebAssembly modules
487 /// Version { .. } => { /* ... */ }
488 /// TypeSection(_) => { /* ... */ }
489 /// ImportSection(_) => { /* ... */ }
490 /// FunctionSection(_) => { /* ... */ }
491 /// TableSection(_) => { /* ... */ }
492 /// MemorySection(_) => { /* ... */ }
493 /// TagSection(_) => { /* ... */ }
494 /// GlobalSection(_) => { /* ... */ }
495 /// ExportSection(_) => { /* ... */ }
496 /// StartSection { .. } => { /* ... */ }
497 /// ElementSection(_) => { /* ... */ }
498 /// DataCountSection { .. } => { /* ... */ }
499 /// DataSection(_) => { /* ... */ }
500 ///
501 /// // Here we know how many functions we'll be receiving as
502 /// // `CodeSectionEntry`, so we can prepare for that, and
503 /// // afterwards we can parse and handle each function
504 /// // individually.
505 /// CodeSectionStart { .. } => { /* ... */ }
506 /// CodeSectionEntry(body) => {
507 /// // here we can iterate over `body` to parse the function
508 /// // and its locals
509 /// }
510 ///
511 /// // Sections for WebAssembly components
512 /// InstanceSection(_) => { /* ... */ }
513 /// CoreTypeSection(_) => { /* ... */ }
514 /// ComponentInstanceSection(_) => { /* ... */ }
515 /// ComponentAliasSection(_) => { /* ... */ }
516 /// ComponentTypeSection(_) => { /* ... */ }
517 /// ComponentCanonicalSection(_) => { /* ... */ }
518 /// ComponentStartSection { .. } => { /* ... */ }
519 /// ComponentImportSection(_) => { /* ... */ }
520 /// ComponentExportSection(_) => { /* ... */ }
521 ///
522 /// ModuleSection { parser, .. }
523 /// | ComponentSection { parser, .. } => {
524 /// stack.push(cur.clone());
525 /// cur = parser.clone();
526 /// }
527 ///
528 /// CustomSection(_) => { /* ... */ }
529 ///
530 /// // most likely you'd return an error here
531 /// UnknownSection { id, .. } => { /* ... */ }
532 ///
533 /// // Once we've reached the end of a parser we either resume
534 /// // at the parent parser or we break out of the loop because
535 /// // we're done.
536 /// End(_) => {
537 /// if let Some(parent_parser) = stack.pop() {
538 /// cur = parent_parser;
539 /// } else {
540 /// break;
541 /// }
542 /// }
543 /// }
544 ///
545 /// // once we're done processing the payload we can forget the
546 /// // original.
547 /// buf.drain(..consumed);
548 /// }
549 ///
550 /// Ok(())
551 /// }
552 ///
553 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
554 /// ```
555 pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
556 let (data, eof) = if usize_to_u64(data.len()) > self.max_size {
557 (&data[..(self.max_size as usize)], true)
558 } else {
559 (data, eof)
560 };
561 // TODO: thread through `offset: u64` to `BinaryReader`, remove
562 // the cast here.
563 let starting_offset = self.offset as usize;
564 let mut reader = BinaryReader::new(data, starting_offset);
565 #[cfg(feature = "features")]
566 {
567 reader.set_features(self.features);
568 }
569 match self.parse_reader(&mut reader, eof) {
570 Ok(payload) => {
571 // Be sure to update our offset with how far we got in the
572 // reader
573 let consumed = reader.original_position() - starting_offset;
574 self.offset += usize_to_u64(consumed);
575 self.max_size -= usize_to_u64(consumed);
576 Ok(Chunk::Parsed {
577 consumed: consumed,
578 payload,
579 })
580 }
581 Err(e) => {
582 // If we're at EOF then there's no way we can recover from any
583 // error, so continue to propagate it.
584 if eof {
585 return Err(e);
586 }
587
588 // If our error doesn't look like it can be resolved with more
589 // data being pulled down, then propagate it, otherwise switch
590 // the error to "feed me please"
591 match e.inner.needed_hint {
592 Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))),
593 None => Err(e),
594 }
595 }
596 }
597 }
598
599 fn parse_reader<'a>(
600 &mut self,
601 reader: &mut BinaryReader<'a>,
602 eof: bool,
603 ) -> Result<Payload<'a>> {
604 use Payload::*;
605
606 match self.state {
607 State::Header => {
608 let start = reader.original_position();
609 let header_version = reader.read_header_version()?;
610 self.encoding = match (header_version >> 16) as u16 {
611 KIND_MODULE => Encoding::Module,
612 KIND_COMPONENT => Encoding::Component,
613 _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"),
614 };
615 let num = header_version as u16;
616 self.state = State::SectionStart;
617 Ok(Version {
618 num,
619 encoding: self.encoding,
620 range: start..reader.original_position(),
621 })
622 }
623 State::SectionStart => {
624 // If we're at eof and there are no bytes in our buffer, then
625 // that means we reached the end of the data since it's
626 // just a bunch of sections concatenated after the header.
627 if eof && reader.bytes_remaining() == 0 {
628 return Ok(Payload::End(reader.original_position()));
629 }
630
631 let id_pos = reader.original_position();
632 let id = reader.read_u8()?;
633 if id & 0x80 != 0 {
634 return Err(BinaryReaderError::new("malformed section id", id_pos));
635 }
636 let len_pos = reader.original_position();
637 let mut len = reader.read_var_u32()?;
638
639 // Test to make sure that this section actually fits within
640 // `Parser::max_size`. This doesn't matter for top-level modules
641 // but it is required for nested modules/components to correctly ensure
642 // that all sections live entirely within their section of the
643 // file.
644 let consumed = reader.original_position() - id_pos;
645 let section_overflow = self
646 .max_size
647 .checked_sub(usize_to_u64(consumed))
648 .and_then(|s| s.checked_sub(len.into()))
649 .is_none();
650 if section_overflow {
651 return Err(BinaryReaderError::new("section too large", len_pos));
652 }
653
654 match (self.encoding, id) {
655 // Sections for both modules and components.
656 (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection),
657
658 // Module sections
659 (Encoding::Module, TYPE_SECTION) => {
660 section(reader, len, TypeSectionReader::new, TypeSection)
661 }
662 (Encoding::Module, IMPORT_SECTION) => {
663 section(reader, len, ImportSectionReader::new, ImportSection)
664 }
665 (Encoding::Module, FUNCTION_SECTION) => {
666 section(reader, len, FunctionSectionReader::new, FunctionSection)
667 }
668 (Encoding::Module, TABLE_SECTION) => {
669 section(reader, len, TableSectionReader::new, TableSection)
670 }
671 (Encoding::Module, MEMORY_SECTION) => {
672 section(reader, len, MemorySectionReader::new, MemorySection)
673 }
674 (Encoding::Module, GLOBAL_SECTION) => {
675 section(reader, len, GlobalSectionReader::new, GlobalSection)
676 }
677 (Encoding::Module, EXPORT_SECTION) => {
678 section(reader, len, ExportSectionReader::new, ExportSection)
679 }
680 (Encoding::Module, START_SECTION) => {
681 let (func, range) = single_item(reader, len, "start")?;
682 Ok(StartSection { func, range })
683 }
684 (Encoding::Module, ELEMENT_SECTION) => {
685 section(reader, len, ElementSectionReader::new, ElementSection)
686 }
687 (Encoding::Module, CODE_SECTION) => {
688 let start = reader.original_position();
689 let count = delimited(reader, &mut len, |r| r.read_var_u32())?;
690 let range = start..reader.original_position() + len as usize;
691 self.state = State::FunctionBody {
692 remaining: count,
693 len,
694 };
695 Ok(CodeSectionStart {
696 count,
697 range,
698 size: len,
699 })
700 }
701 (Encoding::Module, DATA_SECTION) => {
702 section(reader, len, DataSectionReader::new, DataSection)
703 }
704 (Encoding::Module, DATA_COUNT_SECTION) => {
705 let (count, range) = single_item(reader, len, "data count")?;
706 Ok(DataCountSection { count, range })
707 }
708 (Encoding::Module, TAG_SECTION) => {
709 section(reader, len, TagSectionReader::new, TagSection)
710 }
711
712 // Component sections
713 (Encoding::Component, COMPONENT_MODULE_SECTION)
714 | (Encoding::Component, COMPONENT_SECTION) => {
715 if len as usize > MAX_WASM_MODULE_SIZE {
716 bail!(
717 len_pos,
718 "{} section is too large",
719 if id == 1 { "module" } else { "component " }
720 );
721 }
722
723 let range = reader.original_position()
724 ..reader.original_position() + usize::try_from(len).unwrap();
725 self.max_size -= u64::from(len);
726 self.offset += u64::from(len);
727 let mut parser = Parser::new(usize_to_u64(reader.original_position()));
728 #[cfg(feature = "features")]
729 {
730 parser.features = self.features;
731 }
732 parser.max_size = u64::from(len);
733
734 Ok(match id {
735 1 => ModuleSection {
736 parser,
737 unchecked_range: range,
738 },
739 4 => ComponentSection {
740 parser,
741 unchecked_range: range,
742 },
743 _ => unreachable!(),
744 })
745 }
746 (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => {
747 section(reader, len, InstanceSectionReader::new, InstanceSection)
748 }
749 (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => {
750 section(reader, len, CoreTypeSectionReader::new, CoreTypeSection)
751 }
752 (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section(
753 reader,
754 len,
755 ComponentInstanceSectionReader::new,
756 ComponentInstanceSection,
757 ),
758 (Encoding::Component, COMPONENT_ALIAS_SECTION) => {
759 section(reader, len, SectionLimited::new, ComponentAliasSection)
760 }
761 (Encoding::Component, COMPONENT_TYPE_SECTION) => section(
762 reader,
763 len,
764 ComponentTypeSectionReader::new,
765 ComponentTypeSection,
766 ),
767 (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section(
768 reader,
769 len,
770 ComponentCanonicalSectionReader::new,
771 ComponentCanonicalSection,
772 ),
773 (Encoding::Component, COMPONENT_START_SECTION) => {
774 let (start, range) = single_item(reader, len, "component start")?;
775 Ok(ComponentStartSection { start, range })
776 }
777 (Encoding::Component, COMPONENT_IMPORT_SECTION) => section(
778 reader,
779 len,
780 ComponentImportSectionReader::new,
781 ComponentImportSection,
782 ),
783 (Encoding::Component, COMPONENT_EXPORT_SECTION) => section(
784 reader,
785 len,
786 ComponentExportSectionReader::new,
787 ComponentExportSection,
788 ),
789 (_, id) => {
790 let offset = reader.original_position();
791 let contents = reader.read_bytes(len as usize)?;
792 let range = offset..offset + len as usize;
793 Ok(UnknownSection {
794 id,
795 contents,
796 range,
797 })
798 }
799 }
800 }
801
802 // Once we hit 0 remaining incrementally parsed items, with 0
803 // remaining bytes in each section, we're done and can switch back
804 // to parsing sections.
805 State::FunctionBody {
806 remaining: 0,
807 len: 0,
808 } => {
809 self.state = State::SectionStart;
810 self.parse_reader(reader, eof)
811 }
812
813 // ... otherwise trailing bytes with no remaining entries in these
814 // sections indicates an error.
815 State::FunctionBody { remaining: 0, len } => {
816 debug_assert!(len > 0);
817 let offset = reader.original_position();
818 Err(BinaryReaderError::new(
819 "trailing bytes at end of section",
820 offset,
821 ))
822 }
823
824 // Functions are relatively easy to parse when we know there's at
825 // least one remaining and at least one byte available to read
826 // things.
827 //
828 // We use the remaining length try to read a u32 size of the
829 // function, and using that size we require the entire function be
830 // resident in memory. This means that we're reading whole chunks of
831 // functions at a time.
832 //
833 // Limiting via `Parser::max_size` (nested parsing) happens above in
834 // `fn parse`, and limiting by our section size happens via
835 // `delimited`. Actual parsing of the function body is delegated to
836 // the caller to iterate over the `FunctionBody` structure.
837 State::FunctionBody { remaining, mut len } => {
838 let body = delimited(reader, &mut len, |r| {
839 Ok(FunctionBody::new(r.read_reader()?))
840 })?;
841 self.state = State::FunctionBody {
842 remaining: remaining - 1,
843 len,
844 };
845 Ok(CodeSectionEntry(body))
846 }
847 }
848 }
849
850 /// Convenience function that can be used to parse a module or component
851 /// that is entirely resident in memory.
852 ///
853 /// This function will parse the `data` provided as a WebAssembly module
854 /// or component.
855 ///
856 /// Note that when this function yields sections that provide parsers,
857 /// no further action is required for those sections as payloads from
858 /// those parsers will be automatically returned.
859 ///
860 /// # Examples
861 ///
862 /// An example of reading a wasm file from a stream (`std::io::Read`) into
863 /// a buffer and then parsing it.
864 ///
865 /// ```
866 /// use std::io::Read;
867 /// use anyhow::Result;
868 /// use wasmparser::{Parser, Chunk, Payload::*};
869 ///
870 /// fn parse(mut reader: impl Read) -> Result<()> {
871 /// let mut buf = Vec::new();
872 /// reader.read_to_end(&mut buf)?;
873 /// let parser = Parser::new(0);
874 ///
875 /// for payload in parser.parse_all(&buf) {
876 /// match payload? {
877 /// // Sections for WebAssembly modules
878 /// Version { .. } => { /* ... */ }
879 /// TypeSection(_) => { /* ... */ }
880 /// ImportSection(_) => { /* ... */ }
881 /// FunctionSection(_) => { /* ... */ }
882 /// TableSection(_) => { /* ... */ }
883 /// MemorySection(_) => { /* ... */ }
884 /// TagSection(_) => { /* ... */ }
885 /// GlobalSection(_) => { /* ... */ }
886 /// ExportSection(_) => { /* ... */ }
887 /// StartSection { .. } => { /* ... */ }
888 /// ElementSection(_) => { /* ... */ }
889 /// DataCountSection { .. } => { /* ... */ }
890 /// DataSection(_) => { /* ... */ }
891 ///
892 /// // Here we know how many functions we'll be receiving as
893 /// // `CodeSectionEntry`, so we can prepare for that, and
894 /// // afterwards we can parse and handle each function
895 /// // individually.
896 /// CodeSectionStart { .. } => { /* ... */ }
897 /// CodeSectionEntry(body) => {
898 /// // here we can iterate over `body` to parse the function
899 /// // and its locals
900 /// }
901 ///
902 /// // Sections for WebAssembly components
903 /// ModuleSection { .. } => { /* ... */ }
904 /// InstanceSection(_) => { /* ... */ }
905 /// CoreTypeSection(_) => { /* ... */ }
906 /// ComponentSection { .. } => { /* ... */ }
907 /// ComponentInstanceSection(_) => { /* ... */ }
908 /// ComponentAliasSection(_) => { /* ... */ }
909 /// ComponentTypeSection(_) => { /* ... */ }
910 /// ComponentCanonicalSection(_) => { /* ... */ }
911 /// ComponentStartSection { .. } => { /* ... */ }
912 /// ComponentImportSection(_) => { /* ... */ }
913 /// ComponentExportSection(_) => { /* ... */ }
914 ///
915 /// CustomSection(_) => { /* ... */ }
916 ///
917 /// // most likely you'd return an error here
918 /// UnknownSection { id, .. } => { /* ... */ }
919 ///
920 /// // Once we've reached the end of a parser we either resume
921 /// // at the parent parser or the payload iterator is at its
922 /// // end and we're done.
923 /// End(_) => {}
924 /// }
925 /// }
926 ///
927 /// Ok(())
928 /// }
929 ///
930 /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap();
931 /// ```
932 pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> {
933 let mut stack = Vec::new();
934 let mut cur = self;
935 let mut done = false;
936 iter::from_fn(move || {
937 if done {
938 return None;
939 }
940 let payload = match cur.parse(data, true) {
941 // Propagate all errors
942 Err(e) => {
943 done = true;
944 return Some(Err(e));
945 }
946
947 // This isn't possible because `eof` is always true.
948 Ok(Chunk::NeedMoreData(_)) => unreachable!(),
949
950 Ok(Chunk::Parsed { payload, consumed }) => {
951 data = &data[consumed..];
952 payload
953 }
954 };
955
956 match &payload {
957 Payload::ModuleSection { parser, .. }
958 | Payload::ComponentSection { parser, .. } => {
959 stack.push(cur.clone());
960 cur = parser.clone();
961 }
962 Payload::End(_) => match stack.pop() {
963 Some(p) => cur = p,
964 None => done = true,
965 },
966
967 _ => {}
968 }
969
970 Some(Ok(payload))
971 })
972 }
973
974 /// Skip parsing the code section entirely.
975 ///
976 /// This function can be used to indicate, after receiving
977 /// `CodeSectionStart`, that the section will not be parsed.
978 ///
979 /// The caller will be responsible for skipping `size` bytes (found in the
980 /// `CodeSectionStart` payload). Bytes should only be fed into `parse`
981 /// after the `size` bytes have been skipped.
982 ///
983 /// # Panics
984 ///
985 /// This function will panic if the parser is not in a state where it's
986 /// parsing the code section.
987 ///
988 /// # Examples
989 ///
990 /// ```
991 /// use wasmparser::{Result, Parser, Chunk, Payload::*};
992 /// use core::ops::Range;
993 ///
994 /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> {
995 /// let mut parser = Parser::new(0);
996 /// loop {
997 /// let payload = match parser.parse(wasm, true)? {
998 /// Chunk::Parsed { consumed, payload } => {
999 /// wasm = &wasm[consumed..];
1000 /// payload
1001 /// }
1002 /// // this state isn't possible with `eof = true`
1003 /// Chunk::NeedMoreData(_) => unreachable!(),
1004 /// };
1005 /// match payload {
1006 /// TypeSection(s) => print_range("type section", &s.range()),
1007 /// ImportSection(s) => print_range("import section", &s.range()),
1008 /// // .. other sections
1009 ///
1010 /// // Print the range of the code section we see, but don't
1011 /// // actually iterate over each individual function.
1012 /// CodeSectionStart { range, size, .. } => {
1013 /// print_range("code section", &range);
1014 /// parser.skip_section();
1015 /// wasm = &wasm[size as usize..];
1016 /// }
1017 /// End(_) => break,
1018 /// _ => {}
1019 /// }
1020 /// }
1021 /// Ok(())
1022 /// }
1023 ///
1024 /// fn print_range(section: &str, range: &Range<usize>) {
1025 /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end);
1026 /// }
1027 /// ```
1028 pub fn skip_section(&mut self) {
1029 let skip = match self.state {
1030 State::FunctionBody { remaining: _, len } => len,
1031 _ => panic!("wrong state to call `skip_section`"),
1032 };
1033 self.offset += u64::from(skip);
1034 self.max_size -= u64::from(skip);
1035 self.state = State::SectionStart;
1036 }
1037}
1038
1039fn usize_to_u64(a: usize) -> u64 {
1040 a.try_into().unwrap()
1041}
1042
1043/// Parses an entire section resident in memory into a `Payload`.
1044///
1045/// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant`
1046/// to construct the section to return.
1047fn section<'a, T>(
1048 reader: &mut BinaryReader<'a>,
1049 len: u32,
1050 ctor: fn(BinaryReader<'a>) -> Result<T>,
1051 variant: fn(T) -> Payload<'a>,
1052) -> Result<Payload<'a>> {
1053 let reader = reader.skip(|r| {
1054 r.read_bytes(len as usize)?;
1055 Ok(())
1056 })?;
1057 // clear the hint for "need this many more bytes" here because we already
1058 // read all the bytes, so it's not possible to read more bytes if this
1059 // fails.
1060 let reader = ctor(reader).map_err(clear_hint)?;
1061 Ok(variant(reader))
1062}
1063
1064/// Reads a section that is represented by a single uleb-encoded `u32`.
1065fn single_item<'a, T>(
1066 reader: &mut BinaryReader<'a>,
1067 len: u32,
1068 desc: &str,
1069) -> Result<(T, Range<usize>)>
1070where
1071 T: FromReader<'a>,
1072{
1073 let range = reader.original_position()..reader.original_position() + len as usize;
1074 let mut content = reader.skip(|r| {
1075 r.read_bytes(len as usize)?;
1076 Ok(())
1077 })?;
1078 // We can't recover from "unexpected eof" here because our entire section is
1079 // already resident in memory, so clear the hint for how many more bytes are
1080 // expected.
1081 let ret = content.read().map_err(clear_hint)?;
1082 if !content.eof() {
1083 bail!(
1084 content.original_position(),
1085 "unexpected content in the {desc} section",
1086 );
1087 }
1088 Ok((ret, range))
1089}
1090
1091/// Attempts to parse using `f`.
1092///
1093/// This will update `*len` with the number of bytes consumed, and it will cause
1094/// a failure to be returned instead of the number of bytes consumed exceeds
1095/// what `*len` currently is.
1096fn delimited<'a, T>(
1097 reader: &mut BinaryReader<'a>,
1098 len: &mut u32,
1099 f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>,
1100) -> Result<T> {
1101 let start = reader.original_position();
1102 let ret = f(reader)?;
1103 *len = match (reader.original_position() - start)
1104 .try_into()
1105 .ok()
1106 .and_then(|i| len.checked_sub(i))
1107 {
1108 Some(i) => i,
1109 None => return Err(BinaryReaderError::new("unexpected end-of-file", start)),
1110 };
1111 Ok(ret)
1112}
1113
1114impl Default for Parser {
1115 fn default() -> Parser {
1116 Parser::new(0)
1117 }
1118}
1119
1120impl Payload<'_> {
1121 /// If this `Payload` represents a section in the original wasm module then
1122 /// the section's id and range within the original wasm binary are returned.
1123 ///
1124 /// Not all payloads refer to entire sections, such as the `Version` and
1125 /// `CodeSectionEntry` variants. These variants will return `None` from this
1126 /// function.
1127 ///
1128 /// Otherwise this function will return `Some` where the first element is
1129 /// the byte identifier for the section and the second element is the range
1130 /// of the contents of the section within the original wasm binary.
1131 ///
1132 /// The purpose of this method is to enable tools to easily iterate over
1133 /// entire sections if necessary and handle sections uniformly, for example
1134 /// dropping custom sections while preserving all other sections.
1135 pub fn as_section(&self) -> Option<(u8, Range<usize>)> {
1136 use Payload::*;
1137
1138 match self {
1139 Version { .. } => None,
1140 TypeSection(s) => Some((TYPE_SECTION, s.range())),
1141 ImportSection(s) => Some((IMPORT_SECTION, s.range())),
1142 FunctionSection(s) => Some((FUNCTION_SECTION, s.range())),
1143 TableSection(s) => Some((TABLE_SECTION, s.range())),
1144 MemorySection(s) => Some((MEMORY_SECTION, s.range())),
1145 TagSection(s) => Some((TAG_SECTION, s.range())),
1146 GlobalSection(s) => Some((GLOBAL_SECTION, s.range())),
1147 ExportSection(s) => Some((EXPORT_SECTION, s.range())),
1148 ElementSection(s) => Some((ELEMENT_SECTION, s.range())),
1149 DataSection(s) => Some((DATA_SECTION, s.range())),
1150 StartSection { range, .. } => Some((START_SECTION, range.clone())),
1151 DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())),
1152 CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())),
1153 CodeSectionEntry(_) => None,
1154
1155 ModuleSection {
1156 unchecked_range: range,
1157 ..
1158 } => Some((COMPONENT_MODULE_SECTION, range.clone())),
1159 InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())),
1160 CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())),
1161 ComponentSection {
1162 unchecked_range: range,
1163 ..
1164 } => Some((COMPONENT_SECTION, range.clone())),
1165 ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())),
1166 ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())),
1167 ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())),
1168 ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())),
1169 ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())),
1170 ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())),
1171 ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())),
1172
1173 CustomSection(c) => Some((CUSTOM_SECTION, c.range())),
1174
1175 UnknownSection { id, range, .. } => Some((*id, range.clone())),
1176
1177 End(_) => None,
1178 }
1179 }
1180}
1181
1182impl fmt::Debug for Payload<'_> {
1183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1184 use Payload::*;
1185 match self {
1186 Version {
1187 num,
1188 encoding,
1189 range,
1190 } => f
1191 .debug_struct("Version")
1192 .field("num", num)
1193 .field("encoding", encoding)
1194 .field("range", range)
1195 .finish(),
1196
1197 // Module sections
1198 TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(),
1199 ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(),
1200 FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(),
1201 TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(),
1202 MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(),
1203 TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(),
1204 GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(),
1205 ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(),
1206 ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(),
1207 DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(),
1208 StartSection { func, range } => f
1209 .debug_struct("StartSection")
1210 .field("func", func)
1211 .field("range", range)
1212 .finish(),
1213 DataCountSection { count, range } => f
1214 .debug_struct("DataCountSection")
1215 .field("count", count)
1216 .field("range", range)
1217 .finish(),
1218 CodeSectionStart { count, range, size } => f
1219 .debug_struct("CodeSectionStart")
1220 .field("count", count)
1221 .field("range", range)
1222 .field("size", size)
1223 .finish(),
1224 CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(),
1225
1226 // Component sections
1227 ModuleSection {
1228 parser: _,
1229 unchecked_range: range,
1230 } => f
1231 .debug_struct("ModuleSection")
1232 .field("range", range)
1233 .finish(),
1234 InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(),
1235 CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(),
1236 ComponentSection {
1237 parser: _,
1238 unchecked_range: range,
1239 } => f
1240 .debug_struct("ComponentSection")
1241 .field("range", range)
1242 .finish(),
1243 ComponentInstanceSection(_) => f
1244 .debug_tuple("ComponentInstanceSection")
1245 .field(&"...")
1246 .finish(),
1247 ComponentAliasSection(_) => f
1248 .debug_tuple("ComponentAliasSection")
1249 .field(&"...")
1250 .finish(),
1251 ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(),
1252 ComponentCanonicalSection(_) => f
1253 .debug_tuple("ComponentCanonicalSection")
1254 .field(&"...")
1255 .finish(),
1256 ComponentStartSection { .. } => f
1257 .debug_tuple("ComponentStartSection")
1258 .field(&"...")
1259 .finish(),
1260 ComponentImportSection(_) => f
1261 .debug_tuple("ComponentImportSection")
1262 .field(&"...")
1263 .finish(),
1264 ComponentExportSection(_) => f
1265 .debug_tuple("ComponentExportSection")
1266 .field(&"...")
1267 .finish(),
1268
1269 CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(),
1270
1271 UnknownSection { id, range, .. } => f
1272 .debug_struct("UnknownSection")
1273 .field("id", id)
1274 .field("range", range)
1275 .finish(),
1276
1277 End(offset) => f.debug_tuple("End").field(offset).finish(),
1278 }
1279 }
1280}
1281
1282fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError {
1283 err.inner.needed_hint = None;
1284 err
1285}
1286
1287#[cfg(test)]
1288mod tests {
1289 use super::*;
1290
1291 macro_rules! assert_matches {
1292 ($a:expr, $b:pat $(,)?) => {
1293 match $a {
1294 $b => {}
1295 a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)),
1296 }
1297 };
1298 }
1299
1300 #[test]
1301 fn header() {
1302 assert!(Parser::default().parse(&[], true).is_err());
1303 assert_matches!(
1304 Parser::default().parse(&[], false),
1305 Ok(Chunk::NeedMoreData(4)),
1306 );
1307 assert_matches!(
1308 Parser::default().parse(b"\0", false),
1309 Ok(Chunk::NeedMoreData(3)),
1310 );
1311 assert_matches!(
1312 Parser::default().parse(b"\0asm", false),
1313 Ok(Chunk::NeedMoreData(4)),
1314 );
1315 assert_matches!(
1316 Parser::default().parse(b"\0asm\x01\0\0\0", false),
1317 Ok(Chunk::Parsed {
1318 consumed: 8,
1319 payload: Payload::Version { num: 1, .. },
1320 }),
1321 );
1322 }
1323
1324 #[test]
1325 fn header_iter() {
1326 for _ in Parser::default().parse_all(&[]) {}
1327 for _ in Parser::default().parse_all(b"\0") {}
1328 for _ in Parser::default().parse_all(b"\0asm") {}
1329 for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {}
1330 }
1331
1332 fn parser_after_header() -> Parser {
1333 let mut p = Parser::default();
1334 assert_matches!(
1335 p.parse(b"\0asm\x01\0\0\0", false),
1336 Ok(Chunk::Parsed {
1337 consumed: 8,
1338 payload: Payload::Version {
1339 num: WASM_MODULE_VERSION,
1340 encoding: Encoding::Module,
1341 ..
1342 },
1343 }),
1344 );
1345 p
1346 }
1347
1348 fn parser_after_component_header() -> Parser {
1349 let mut p = Parser::default();
1350 assert_matches!(
1351 p.parse(b"\0asm\x0d\0\x01\0", false),
1352 Ok(Chunk::Parsed {
1353 consumed: 8,
1354 payload: Payload::Version {
1355 num: WASM_COMPONENT_VERSION,
1356 encoding: Encoding::Component,
1357 ..
1358 },
1359 }),
1360 );
1361 p
1362 }
1363
1364 #[test]
1365 fn start_section() {
1366 assert_matches!(
1367 parser_after_header().parse(&[], false),
1368 Ok(Chunk::NeedMoreData(1)),
1369 );
1370 assert!(parser_after_header().parse(&[8], true).is_err());
1371 assert!(parser_after_header().parse(&[8, 1], true).is_err());
1372 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1373 assert_matches!(
1374 parser_after_header().parse(&[8], false),
1375 Ok(Chunk::NeedMoreData(1)),
1376 );
1377 assert_matches!(
1378 parser_after_header().parse(&[8, 1], false),
1379 Ok(Chunk::NeedMoreData(1)),
1380 );
1381 assert_matches!(
1382 parser_after_header().parse(&[8, 2], false),
1383 Ok(Chunk::NeedMoreData(2)),
1384 );
1385 assert_matches!(
1386 parser_after_header().parse(&[8, 1, 1], false),
1387 Ok(Chunk::Parsed {
1388 consumed: 3,
1389 payload: Payload::StartSection { func: 1, .. },
1390 }),
1391 );
1392 assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err());
1393 assert!(parser_after_header().parse(&[8, 0], false).is_err());
1394 }
1395
1396 #[test]
1397 fn end_works() {
1398 assert_matches!(
1399 parser_after_header().parse(&[], true),
1400 Ok(Chunk::Parsed {
1401 consumed: 0,
1402 payload: Payload::End(8),
1403 }),
1404 );
1405 }
1406
1407 #[test]
1408 fn type_section() {
1409 assert!(parser_after_header().parse(&[1], true).is_err());
1410 assert!(parser_after_header().parse(&[1, 0], false).is_err());
1411 assert!(parser_after_header().parse(&[8, 2], true).is_err());
1412 assert_matches!(
1413 parser_after_header().parse(&[1], false),
1414 Ok(Chunk::NeedMoreData(1)),
1415 );
1416 assert_matches!(
1417 parser_after_header().parse(&[1, 1], false),
1418 Ok(Chunk::NeedMoreData(1)),
1419 );
1420 assert_matches!(
1421 parser_after_header().parse(&[1, 1, 1], false),
1422 Ok(Chunk::Parsed {
1423 consumed: 3,
1424 payload: Payload::TypeSection(_),
1425 }),
1426 );
1427 assert_matches!(
1428 parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false),
1429 Ok(Chunk::Parsed {
1430 consumed: 3,
1431 payload: Payload::TypeSection(_),
1432 }),
1433 );
1434 }
1435
1436 #[test]
1437 fn custom_section() {
1438 assert!(parser_after_header().parse(&[0], true).is_err());
1439 assert!(parser_after_header().parse(&[0, 0], false).is_err());
1440 assert!(parser_after_header().parse(&[0, 1, 1], false).is_err());
1441 assert_matches!(
1442 parser_after_header().parse(&[0, 2, 1], false),
1443 Ok(Chunk::NeedMoreData(1)),
1444 );
1445 assert_custom(
1446 parser_after_header().parse(&[0, 1, 0], false).unwrap(),
1447 3,
1448 "",
1449 11,
1450 b"",
1451 Range { start: 10, end: 11 },
1452 );
1453 assert_custom(
1454 parser_after_header()
1455 .parse(&[0, 2, 1, b'a'], false)
1456 .unwrap(),
1457 4,
1458 "a",
1459 12,
1460 b"",
1461 Range { start: 10, end: 12 },
1462 );
1463 assert_custom(
1464 parser_after_header()
1465 .parse(&[0, 2, 0, b'a'], false)
1466 .unwrap(),
1467 4,
1468 "",
1469 11,
1470 b"a",
1471 Range { start: 10, end: 12 },
1472 );
1473 }
1474
1475 fn assert_custom(
1476 chunk: Chunk<'_>,
1477 expected_consumed: usize,
1478 expected_name: &str,
1479 expected_data_offset: usize,
1480 expected_data: &[u8],
1481 expected_range: Range<usize>,
1482 ) {
1483 let (consumed, s) = match chunk {
1484 Chunk::Parsed {
1485 consumed,
1486 payload: Payload::CustomSection(s),
1487 } => (consumed, s),
1488 _ => panic!("not a custom section payload"),
1489 };
1490 assert_eq!(consumed, expected_consumed);
1491 assert_eq!(s.name(), expected_name);
1492 assert_eq!(s.data_offset(), expected_data_offset);
1493 assert_eq!(s.data(), expected_data);
1494 assert_eq!(s.range(), expected_range);
1495 }
1496
1497 #[test]
1498 fn function_section() {
1499 assert!(parser_after_header().parse(&[10], true).is_err());
1500 assert!(parser_after_header().parse(&[10, 0], true).is_err());
1501 assert!(parser_after_header().parse(&[10, 1], true).is_err());
1502 assert_matches!(
1503 parser_after_header().parse(&[10], false),
1504 Ok(Chunk::NeedMoreData(1))
1505 );
1506 assert_matches!(
1507 parser_after_header().parse(&[10, 1], false),
1508 Ok(Chunk::NeedMoreData(1))
1509 );
1510 let mut p = parser_after_header();
1511 assert_matches!(
1512 p.parse(&[10, 1, 0], false),
1513 Ok(Chunk::Parsed {
1514 consumed: 3,
1515 payload: Payload::CodeSectionStart { count: 0, .. },
1516 }),
1517 );
1518 assert_matches!(
1519 p.parse(&[], true),
1520 Ok(Chunk::Parsed {
1521 consumed: 0,
1522 payload: Payload::End(11),
1523 }),
1524 );
1525 let mut p = parser_after_header();
1526 assert_matches!(
1527 p.parse(&[10, 2, 1, 0], false),
1528 Ok(Chunk::Parsed {
1529 consumed: 3,
1530 payload: Payload::CodeSectionStart { count: 1, .. },
1531 }),
1532 );
1533 assert_matches!(
1534 p.parse(&[0], false),
1535 Ok(Chunk::Parsed {
1536 consumed: 1,
1537 payload: Payload::CodeSectionEntry(_),
1538 }),
1539 );
1540 assert_matches!(
1541 p.parse(&[], true),
1542 Ok(Chunk::Parsed {
1543 consumed: 0,
1544 payload: Payload::End(12),
1545 }),
1546 );
1547
1548 // 1 byte section with 1 function can't read the function body because
1549 // the section is too small
1550 let mut p = parser_after_header();
1551 assert_matches!(
1552 p.parse(&[10, 1, 1], false),
1553 Ok(Chunk::Parsed {
1554 consumed: 3,
1555 payload: Payload::CodeSectionStart { count: 1, .. },
1556 }),
1557 );
1558 assert_eq!(
1559 p.parse(&[0], false).unwrap_err().message(),
1560 "unexpected end-of-file"
1561 );
1562
1563 // section with 2 functions but section is cut off
1564 let mut p = parser_after_header();
1565 assert_matches!(
1566 p.parse(&[10, 2, 2], false),
1567 Ok(Chunk::Parsed {
1568 consumed: 3,
1569 payload: Payload::CodeSectionStart { count: 2, .. },
1570 }),
1571 );
1572 assert_matches!(
1573 p.parse(&[0], false),
1574 Ok(Chunk::Parsed {
1575 consumed: 1,
1576 payload: Payload::CodeSectionEntry(_),
1577 }),
1578 );
1579 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1580 assert_eq!(
1581 p.parse(&[0], false).unwrap_err().message(),
1582 "unexpected end-of-file",
1583 );
1584
1585 // trailing data is bad
1586 let mut p = parser_after_header();
1587 assert_matches!(
1588 p.parse(&[10, 3, 1], false),
1589 Ok(Chunk::Parsed {
1590 consumed: 3,
1591 payload: Payload::CodeSectionStart { count: 1, .. },
1592 }),
1593 );
1594 assert_matches!(
1595 p.parse(&[0], false),
1596 Ok(Chunk::Parsed {
1597 consumed: 1,
1598 payload: Payload::CodeSectionEntry(_),
1599 }),
1600 );
1601 assert_eq!(
1602 p.parse(&[0], false).unwrap_err().message(),
1603 "trailing bytes at end of section",
1604 );
1605 }
1606
1607 #[test]
1608 fn single_module() {
1609 let mut p = parser_after_component_header();
1610 assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1)));
1611
1612 // A module that's 8 bytes in length
1613 let mut sub = match p.parse(&[1, 8], false) {
1614 Ok(Chunk::Parsed {
1615 consumed: 2,
1616 payload: Payload::ModuleSection { parser, .. },
1617 }) => parser,
1618 other => panic!("bad parse {:?}", other),
1619 };
1620
1621 // Parse the header of the submodule with the sub-parser.
1622 assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4)));
1623 assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4)));
1624 assert_matches!(
1625 sub.parse(b"\0asm\x01\0\0\0", false),
1626 Ok(Chunk::Parsed {
1627 consumed: 8,
1628 payload: Payload::Version {
1629 num: 1,
1630 encoding: Encoding::Module,
1631 ..
1632 },
1633 }),
1634 );
1635
1636 // The sub-parser should be byte-limited so the next byte shouldn't get
1637 // consumed, it's intended for the parent parser.
1638 assert_matches!(
1639 sub.parse(&[10], false),
1640 Ok(Chunk::Parsed {
1641 consumed: 0,
1642 payload: Payload::End(18),
1643 }),
1644 );
1645
1646 // The parent parser should now be back to resuming, and we simulate it
1647 // being done with bytes to ensure that it's safely at the end,
1648 // completing the module code section.
1649 assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1)));
1650 assert_matches!(
1651 p.parse(&[], true),
1652 Ok(Chunk::Parsed {
1653 consumed: 0,
1654 payload: Payload::End(18),
1655 }),
1656 );
1657 }
1658
1659 #[test]
1660 fn nested_section_too_big() {
1661 let mut p = parser_after_component_header();
1662
1663 // A module that's 10 bytes in length
1664 let mut sub = match p.parse(&[1, 10], false) {
1665 Ok(Chunk::Parsed {
1666 consumed: 2,
1667 payload: Payload::ModuleSection { parser, .. },
1668 }) => parser,
1669 other => panic!("bad parse {:?}", other),
1670 };
1671
1672 // use 8 bytes to parse the header, leaving 2 remaining bytes in our
1673 // module.
1674 assert_matches!(
1675 sub.parse(b"\0asm\x01\0\0\0", false),
1676 Ok(Chunk::Parsed {
1677 consumed: 8,
1678 payload: Payload::Version { num: 1, .. },
1679 }),
1680 );
1681
1682 // We can't parse a section which declares its bigger than the outer
1683 // module. This is a custom section, one byte big, with one content byte. The
1684 // content byte, however, lives outside of the parent's module code
1685 // section.
1686 assert_eq!(
1687 sub.parse(&[0, 1, 0], false).unwrap_err().message(),
1688 "section too large",
1689 );
1690 }
1691}