cranelift_codegen/ir/
memflags.rs

1//! Memory operation flags.
2
3use super::TrapCode;
4use core::fmt;
5use core::str::FromStr;
6
7#[cfg(feature = "enable-serde")]
8use serde_derive::{Deserialize, Serialize};
9
10/// Endianness of a memory access.
11#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
12pub enum Endianness {
13    /// Little-endian
14    Little,
15    /// Big-endian
16    Big,
17}
18
19/// Which disjoint region of aliasing memory is accessed in this memory
20/// operation.
21#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
22#[repr(u8)]
23#[allow(missing_docs)]
24#[rustfmt::skip]
25pub enum AliasRegion {
26    // None = 0b00;
27    Heap    = 0b01,
28    Table   = 0b10,
29    Vmctx   = 0b11,
30}
31
32impl AliasRegion {
33    const fn from_bits(bits: u8) -> Option<Self> {
34        match bits {
35            0b00 => None,
36            0b01 => Some(Self::Heap),
37            0b10 => Some(Self::Table),
38            0b11 => Some(Self::Vmctx),
39            _ => panic!("invalid alias region bits"),
40        }
41    }
42
43    const fn to_bits(region: Option<Self>) -> u8 {
44        match region {
45            None => 0b00,
46            Some(r) => r as u8,
47        }
48    }
49}
50
51/// Flags for memory operations like load/store.
52///
53/// Each of these flags introduce a limited form of undefined behavior. The flags each enable
54/// certain optimizations that need to make additional assumptions. Generally, the semantics of a
55/// program does not change when a flag is removed, but adding a flag will.
56///
57/// In addition, the flags determine the endianness of the memory access.  By default,
58/// any memory access uses the native endianness determined by the target ISA.  This can
59/// be overridden for individual accesses by explicitly specifying little- or big-endian
60/// semantics via the flags.
61#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
62#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
63pub struct MemFlags {
64    // Initialized to all zeros to have all flags have their default value.
65    // This is interpreted through various methods below. Currently the bits of
66    // this are defined as:
67    //
68    // * 0 - aligned flag
69    // * 1 - readonly flag
70    // * 2 - little endian flag
71    // * 3 - big endian flag
72    // * 4 - checked flag
73    // * 5/6 - alias region
74    // * 7/8/9/10 - trap code
75    // * 11/12/13/14/15 - unallocated
76    //
77    // Current properties upheld are:
78    //
79    // * only one of little/big endian is set
80    // * only one alias region can be set - once set it cannot be changed
81    bits: u16,
82}
83
84/// Guaranteed to use "natural alignment" for the given type. This
85/// may enable better instruction selection.
86const BIT_ALIGNED: u16 = 1 << 0;
87
88/// A load that reads data in memory that does not change for the
89/// duration of the function's execution. This may enable
90/// additional optimizations to be performed.
91const BIT_READONLY: u16 = 1 << 1;
92
93/// Load multi-byte values from memory in a little-endian format.
94const BIT_LITTLE_ENDIAN: u16 = 1 << 2;
95
96/// Load multi-byte values from memory in a big-endian format.
97const BIT_BIG_ENDIAN: u16 = 1 << 3;
98
99/// Check this load or store for safety when using the
100/// proof-carrying-code framework. The address must have a
101/// `PointsTo` fact attached with a sufficiently large valid range
102/// for the accessed size.
103const BIT_CHECKED: u16 = 1 << 4;
104
105/// Used for alias analysis, indicates which disjoint part of the abstract state
106/// is being accessed.
107const MASK_ALIAS_REGION: u16 = 0b11 << ALIAS_REGION_OFFSET;
108const ALIAS_REGION_OFFSET: u16 = 5;
109
110/// Trap code, if any, for this memory operation.
111const MASK_TRAP_CODE: u16 = 0b1111 << TRAP_CODE_OFFSET;
112const TRAP_CODE_OFFSET: u16 = 7;
113
114impl MemFlags {
115    /// Create a new empty set of flags.
116    pub const fn new() -> Self {
117        Self { bits: 0 }
118    }
119
120    /// Create a set of flags representing an access from a "trusted" address, meaning it's
121    /// known to be aligned and non-trapping.
122    pub const fn trusted() -> Self {
123        Self::new().with_notrap().with_aligned()
124    }
125
126    /// Read a flag bit.
127    const fn read_bit(self, bit: u16) -> bool {
128        self.bits & bit != 0
129    }
130
131    /// Return a new `MemFlags` with this flag bit set.
132    const fn with_bit(mut self, bit: u16) -> Self {
133        self.bits |= bit;
134        self
135    }
136
137    /// Reads the alias region that this memory operation works with.
138    pub const fn alias_region(self) -> Option<AliasRegion> {
139        AliasRegion::from_bits(((self.bits & MASK_ALIAS_REGION) >> ALIAS_REGION_OFFSET) as u8)
140    }
141
142    /// Sets the alias region that this works on to the specified `region`.
143    pub const fn with_alias_region(mut self, region: Option<AliasRegion>) -> Self {
144        let bits = AliasRegion::to_bits(region);
145        self.bits &= !MASK_ALIAS_REGION;
146        self.bits |= (bits as u16) << ALIAS_REGION_OFFSET;
147        self
148    }
149
150    /// Sets the alias region that this works on to the specified `region`.
151    pub fn set_alias_region(&mut self, region: Option<AliasRegion>) {
152        *self = self.with_alias_region(region);
153    }
154
155    /// Set a flag bit by name.
156    ///
157    /// Returns true if the flag was found and set, false for an unknown flag
158    /// name.
159    ///
160    /// # Errors
161    ///
162    /// Returns an error message if the `name` is known but couldn't be applied
163    /// due to it being a semantic error.
164    pub fn set_by_name(&mut self, name: &str) -> Result<bool, &'static str> {
165        *self = match name {
166            "notrap" => self.with_trap_code(None),
167            "aligned" => self.with_aligned(),
168            "readonly" => self.with_readonly(),
169            "little" => {
170                if self.read_bit(BIT_BIG_ENDIAN) {
171                    return Err("cannot set both big and little endian bits");
172                }
173                self.with_endianness(Endianness::Little)
174            }
175            "big" => {
176                if self.read_bit(BIT_LITTLE_ENDIAN) {
177                    return Err("cannot set both big and little endian bits");
178                }
179                self.with_endianness(Endianness::Big)
180            }
181            "heap" => {
182                if self.alias_region().is_some() {
183                    return Err("cannot set more than one alias region");
184                }
185                self.with_alias_region(Some(AliasRegion::Heap))
186            }
187            "table" => {
188                if self.alias_region().is_some() {
189                    return Err("cannot set more than one alias region");
190                }
191                self.with_alias_region(Some(AliasRegion::Table))
192            }
193            "vmctx" => {
194                if self.alias_region().is_some() {
195                    return Err("cannot set more than one alias region");
196                }
197                self.with_alias_region(Some(AliasRegion::Vmctx))
198            }
199            "checked" => self.with_checked(),
200
201            other => match TrapCode::from_str(other) {
202                Ok(TrapCode::User(_)) => return Err("cannot set user trap code on mem flags"),
203                Ok(code) => self.with_trap_code(Some(code)),
204                Err(()) => return Ok(false),
205            },
206        };
207        Ok(true)
208    }
209
210    /// Return endianness of the memory access.  This will return the endianness
211    /// explicitly specified by the flags if any, and will default to the native
212    /// endianness otherwise.  The native endianness has to be provided by the
213    /// caller since it is not explicitly encoded in CLIF IR -- this allows a
214    /// front end to create IR without having to know the target endianness.
215    pub const fn endianness(self, native_endianness: Endianness) -> Endianness {
216        if self.read_bit(BIT_LITTLE_ENDIAN) {
217            Endianness::Little
218        } else if self.read_bit(BIT_BIG_ENDIAN) {
219            Endianness::Big
220        } else {
221            native_endianness
222        }
223    }
224
225    /// Set endianness of the memory access.
226    pub fn set_endianness(&mut self, endianness: Endianness) {
227        *self = self.with_endianness(endianness);
228    }
229
230    /// Set endianness of the memory access, returning new flags.
231    pub const fn with_endianness(self, endianness: Endianness) -> Self {
232        let res = match endianness {
233            Endianness::Little => self.with_bit(BIT_LITTLE_ENDIAN),
234            Endianness::Big => self.with_bit(BIT_BIG_ENDIAN),
235        };
236        assert!(!(res.read_bit(BIT_LITTLE_ENDIAN) && res.read_bit(BIT_BIG_ENDIAN)));
237        res
238    }
239
240    /// Test if this memory operation cannot trap.
241    ///
242    /// By default `MemFlags` will assume that any load/store can trap and is
243    /// associated with a `TrapCode::HeapOutOfBounds` code. If the trap code is
244    /// configured to `None` though then this method will return `true` and
245    /// indicates that the memory operation will not trap.
246    ///
247    /// If this returns `true` then the memory is *accessible*, which means
248    /// that accesses will not trap. This makes it possible to delete an unused
249    /// load or a dead store instruction.
250    pub const fn notrap(self) -> bool {
251        self.trap_code().is_none()
252    }
253
254    /// Sets the trap code for this `MemFlags` to `None`.
255    pub fn set_notrap(&mut self) {
256        *self = self.with_notrap();
257    }
258
259    /// Sets the trap code for this `MemFlags` to `None`, returning the new
260    /// flags.
261    pub const fn with_notrap(self) -> Self {
262        self.with_trap_code(None)
263    }
264
265    /// Test if the `aligned` flag is set.
266    ///
267    /// By default, Cranelift memory instructions work with any unaligned effective address. If the
268    /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the
269    /// effective address is misaligned.
270    pub const fn aligned(self) -> bool {
271        self.read_bit(BIT_ALIGNED)
272    }
273
274    /// Set the `aligned` flag.
275    pub fn set_aligned(&mut self) {
276        *self = self.with_aligned();
277    }
278
279    /// Set the `aligned` flag, returning new flags.
280    pub const fn with_aligned(self) -> Self {
281        self.with_bit(BIT_ALIGNED)
282    }
283
284    /// Test if the `readonly` flag is set.
285    ///
286    /// Loads with this flag have no memory dependencies.
287    /// This results in undefined behavior if the dereferenced memory is mutated at any time
288    /// between when the function is called and when it is exited.
289    pub const fn readonly(self) -> bool {
290        self.read_bit(BIT_READONLY)
291    }
292
293    /// Set the `readonly` flag.
294    pub fn set_readonly(&mut self) {
295        *self = self.with_readonly();
296    }
297
298    /// Set the `readonly` flag, returning new flags.
299    pub const fn with_readonly(self) -> Self {
300        self.with_bit(BIT_READONLY)
301    }
302
303    /// Test if the `checked` bit is set.
304    ///
305    /// Loads and stores with this flag are verified to access
306    /// pointers only with a validated `PointsTo` fact attached, and
307    /// with that fact validated, when using the proof-carrying-code
308    /// framework. If initial facts on program inputs are correct
309    /// (i.e., correctly denote the shape and types of data structures
310    /// in memory), and if PCC validates the compiled output, then all
311    /// `checked`-marked memory accesses are guaranteed (up to the
312    /// checker's correctness) to access valid memory. This can be
313    /// used to ensure memory safety and sandboxing.
314    pub const fn checked(self) -> bool {
315        self.read_bit(BIT_CHECKED)
316    }
317
318    /// Set the `checked` bit.
319    pub fn set_checked(&mut self) {
320        *self = self.with_checked();
321    }
322
323    /// Set the `checked` bit, returning new flags.
324    pub const fn with_checked(self) -> Self {
325        self.with_bit(BIT_CHECKED)
326    }
327
328    /// Get the trap code to report if this memory access traps.
329    ///
330    /// A `None` trap code indicates that this memory access does not trap.
331    pub const fn trap_code(self) -> Option<TrapCode> {
332        // NB: keep this encoding in sync with `with_trap_code` below.
333        //
334        // Also note that the default, all zeros, is `HeapOutOfBounds`. It is
335        // intentionally not `None` so memory operations are all considered
336        // effect-ful by default.
337        match (self.bits & MASK_TRAP_CODE) >> TRAP_CODE_OFFSET {
338            0b0000 => Some(TrapCode::HeapOutOfBounds),
339            0b0001 => Some(TrapCode::StackOverflow),
340            0b0010 => Some(TrapCode::HeapMisaligned),
341            0b0011 => Some(TrapCode::TableOutOfBounds),
342            0b0100 => Some(TrapCode::IndirectCallToNull),
343            0b0101 => Some(TrapCode::BadSignature),
344            0b0110 => Some(TrapCode::IntegerOverflow),
345            0b0111 => Some(TrapCode::IntegerDivisionByZero),
346            0b1000 => Some(TrapCode::BadConversionToInteger),
347            0b1001 => Some(TrapCode::UnreachableCodeReached),
348            0b1010 => Some(TrapCode::Interrupt),
349            0b1011 => Some(TrapCode::NullReference),
350            0b1100 => Some(TrapCode::NullI31Ref),
351            // 0b1101 => {} not allocated
352            // 0b1110 => {} not allocated
353            0b1111 => None,
354            _ => unreachable!(),
355        }
356    }
357
358    /// Configures these flags with the specified trap code `code`.
359    ///
360    /// Note that `TrapCode::User(_)` cannot be set in `MemFlags`. A trap code
361    /// indicates that this memory operation cannot be optimized away and it
362    /// must "stay where it is" in the programs. Traps are considered side
363    /// effects, for example, and have meaning through the trap code that is
364    /// communicated and which instruction trapped.
365    pub const fn with_trap_code(mut self, code: Option<TrapCode>) -> Self {
366        let bits = match code {
367            Some(TrapCode::HeapOutOfBounds) => 0b0000,
368            Some(TrapCode::StackOverflow) => 0b0001,
369            Some(TrapCode::HeapMisaligned) => 0b0010,
370            Some(TrapCode::TableOutOfBounds) => 0b0011,
371            Some(TrapCode::IndirectCallToNull) => 0b0100,
372            Some(TrapCode::BadSignature) => 0b0101,
373            Some(TrapCode::IntegerOverflow) => 0b0110,
374            Some(TrapCode::IntegerDivisionByZero) => 0b0111,
375            Some(TrapCode::BadConversionToInteger) => 0b1000,
376            Some(TrapCode::UnreachableCodeReached) => 0b1001,
377            Some(TrapCode::Interrupt) => 0b1010,
378            Some(TrapCode::NullReference) => 0b1011,
379            Some(TrapCode::NullI31Ref) => 0b1100,
380            None => 0b1111,
381
382            Some(TrapCode::User(_)) => panic!("cannot set user trap code in mem flags"),
383        };
384        self.bits &= !MASK_TRAP_CODE;
385        self.bits |= bits << TRAP_CODE_OFFSET;
386        self
387    }
388}
389
390impl fmt::Display for MemFlags {
391    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
392        match self.trap_code() {
393            None => write!(f, " notrap")?,
394            // This is the default trap code, so don't print anything extra
395            // for this.
396            Some(TrapCode::HeapOutOfBounds) => {}
397            Some(t) => write!(f, " {t}")?,
398        }
399        if self.aligned() {
400            write!(f, " aligned")?;
401        }
402        if self.readonly() {
403            write!(f, " readonly")?;
404        }
405        if self.read_bit(BIT_BIG_ENDIAN) {
406            write!(f, " big")?;
407        }
408        if self.read_bit(BIT_LITTLE_ENDIAN) {
409            write!(f, " little")?;
410        }
411        if self.checked() {
412            write!(f, " checked")?;
413        }
414        match self.alias_region() {
415            None => {}
416            Some(AliasRegion::Heap) => write!(f, " heap")?,
417            Some(AliasRegion::Table) => write!(f, " table")?,
418            Some(AliasRegion::Vmctx) => write!(f, " vmctx")?,
419        }
420        Ok(())
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn roundtrip_traps() {
430        for trap in TrapCode::non_user_traps().iter().copied() {
431            let flags = MemFlags::new().with_trap_code(Some(trap));
432            assert_eq!(flags.trap_code(), Some(trap));
433        }
434        let flags = MemFlags::new().with_trap_code(None);
435        assert_eq!(flags.trap_code(), None);
436    }
437
438    #[test]
439    fn cannot_set_big_and_little() {
440        let mut big = MemFlags::new().with_endianness(Endianness::Big);
441        assert!(big.set_by_name("little").is_err());
442
443        let mut little = MemFlags::new().with_endianness(Endianness::Little);
444        assert!(little.set_by_name("big").is_err());
445    }
446
447    #[test]
448    fn only_one_region() {
449        let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Heap));
450        assert!(big.set_by_name("table").is_err());
451        assert!(big.set_by_name("vmctx").is_err());
452
453        let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Table));
454        assert!(big.set_by_name("heap").is_err());
455        assert!(big.set_by_name("vmctx").is_err());
456
457        let mut big = MemFlags::new().with_alias_region(Some(AliasRegion::Vmctx));
458        assert!(big.set_by_name("heap").is_err());
459        assert!(big.set_by_name("table").is_err());
460    }
461}