cranelift_codegen/machinst/abi.rs
1//! Implementation of a vanilla ABI, shared between several machines. The
2//! implementation here assumes that arguments will be passed in registers
3//! first, then additional args on the stack; that the stack grows downward,
4//! contains a standard frame (return address and frame pointer), and the
5//! compiler is otherwise free to allocate space below that with its choice of
6//! layout; and that the machine has some notion of caller- and callee-save
7//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this
8//! mold and thus both of these backends use this shared implementation.
9//!
10//! See the documentation in specific machine backends for the "instantiation"
11//! of this generic ABI, i.e., which registers are caller/callee-save, arguments
12//! and return values, and any other special requirements.
13//!
14//! For now the implementation here assumes a 64-bit machine, but we intend to
15//! make this 32/64-bit-generic shortly.
16//!
17//! # Vanilla ABI
18//!
19//! First, arguments and return values are passed in registers up to a certain
20//! fixed count, after which they overflow onto the stack. Multiple return
21//! values either fit in registers, or are returned in a separate return-value
22//! area on the stack, given by a hidden extra parameter.
23//!
24//! Note that the exact stack layout is up to us. We settled on the
25//! below design based on several requirements. In particular, we need
26//! to be able to generate instructions (or instruction sequences) to
27//! access arguments, stack slots, and spill slots before we know how
28//! many spill slots or clobber-saves there will be, because of our
29//! pass structure. We also prefer positive offsets to negative
30//! offsets because of an asymmetry in some machines' addressing modes
31//! (e.g., on AArch64, positive offsets have a larger possible range
32//! without a long-form sequence to synthesize an arbitrary
33//! offset). We also need clobber-save registers to be "near" the
34//! frame pointer: Windows unwind information requires it to be within
35//! 240 bytes of RBP. Finally, it is not allowed to access memory
36//! below the current SP value.
37//!
38//! We assume that a prologue first pushes the frame pointer (and
39//! return address above that, if the machine does not do that in
40//! hardware). We set FP to point to this two-word frame record. We
41//! store all other frame slots below this two-word frame record, as
42//! well as enough space for arguments to the largest possible
43//! function call. The stack pointer then remains at this position
44//! for the duration of the function, allowing us to address all
45//! frame storage at positive offsets from SP.
46//!
47//! Note that if we ever support dynamic stack-space allocation (for
48//! `alloca`), we will need a way to reference spill slots and stack
49//! slots relative to a dynamic SP, because we will no longer be able
50//! to know a static offset from SP to the slots at any particular
51//! program point. Probably the best solution at that point will be to
52//! revert to using the frame pointer as the reference for all slots,
53//! to allow generating spill/reload and stackslot accesses before we
54//! know how large the clobber-saves will be.
55//!
56//! # Stack Layout
57//!
58//! The stack looks like:
59//!
60//! ```plain
61//! (high address)
62//! | ... |
63//! | caller frames |
64//! | ... |
65//! +===========================+
66//! | ... |
67//! | stack args |
68//! Canonical Frame Address --> | (accessed via FP) |
69//! +---------------------------+
70//! SP at function entry -----> | return address |
71//! +---------------------------+
72//! FP after prologue --------> | FP (pushed by prologue) |
73//! +---------------------------+ -----
74//! | ... | |
75//! | clobbered callee-saves | |
76//! unwind-frame base --------> | (pushed by prologue) | |
77//! +---------------------------+ ----- |
78//! | ... | | |
79//! | spill slots | | |
80//! | (accessed via SP) | fixed active
81//! | ... | frame size
82//! | stack slots | storage |
83//! | (accessed via SP) | size |
84//! | (alloc'd by prologue) | | |
85//! +---------------------------+ ----- |
86//! | [alignment as needed] | |
87//! | ... | |
88//! | args for largest call | |
89//! SP -----------------------> | (alloc'd by prologue) | |
90//! +===========================+ -----
91//!
92//! (low address)
93//! ```
94//!
95//! # Multi-value Returns
96//!
97//! We support multi-value returns by using multiple return-value
98//! registers. In some cases this is an extension of the base system
99//! ABI. See each platform's `abi.rs` implementation for details.
100
101use crate::entity::SecondaryMap;
102use crate::ir::types::*;
103use crate::ir::{ArgumentExtension, ArgumentPurpose, Signature};
104use crate::isa::TargetIsa;
105use crate::settings::ProbestackStrategy;
106use crate::CodegenError;
107use crate::{ir, isa};
108use crate::{machinst::*, trace};
109use regalloc2::{MachineEnv, PReg, PRegSet};
110use rustc_hash::FxHashMap;
111use smallvec::smallvec;
112use std::collections::HashMap;
113use std::marker::PhantomData;
114use std::mem;
115
116/// A small vector of instructions (with some reasonable size); appropriate for
117/// a small fixed sequence implementing one operation.
118pub type SmallInstVec<I> = SmallVec<[I; 4]>;
119
120/// A type used by backends to track argument-binding info in the "args"
121/// pseudoinst. The pseudoinst holds a vec of `ArgPair` structs.
122#[derive(Clone, Debug)]
123pub struct ArgPair {
124 /// The vreg that is defined by this args pseudoinst.
125 pub vreg: Writable<Reg>,
126 /// The preg that the arg arrives in; this constrains the vreg's
127 /// placement at the pseudoinst.
128 pub preg: Reg,
129}
130
131/// A type used by backends to track return register binding info in the "ret"
132/// pseudoinst. The pseudoinst holds a vec of `RetPair` structs.
133#[derive(Clone, Debug)]
134pub struct RetPair {
135 /// The vreg that is returned by this pseudionst.
136 pub vreg: Reg,
137 /// The preg that the arg is returned through; this constrains the vreg's
138 /// placement at the pseudoinst.
139 pub preg: Reg,
140}
141
142/// A location for (part of) an argument or return value. These "storage slots"
143/// are specified for each register-sized part of an argument.
144#[derive(Clone, Copy, Debug, PartialEq, Eq)]
145pub enum ABIArgSlot {
146 /// In a real register.
147 Reg {
148 /// Register that holds this arg.
149 reg: RealReg,
150 /// Value type of this arg.
151 ty: ir::Type,
152 /// Should this arg be zero- or sign-extended?
153 extension: ir::ArgumentExtension,
154 },
155 /// Arguments only: on stack, at given offset from SP at entry.
156 Stack {
157 /// Offset of this arg relative to the base of stack args.
158 offset: i64,
159 /// Value type of this arg.
160 ty: ir::Type,
161 /// Should this arg be zero- or sign-extended?
162 extension: ir::ArgumentExtension,
163 },
164}
165
166impl ABIArgSlot {
167 /// The type of the value that will be stored in this slot.
168 pub fn get_type(&self) -> ir::Type {
169 match self {
170 ABIArgSlot::Reg { ty, .. } => *ty,
171 ABIArgSlot::Stack { ty, .. } => *ty,
172 }
173 }
174}
175
176/// A vector of `ABIArgSlot`s. Inline capacity for one element because basically
177/// 100% of values use one slot. Only `i128`s need multiple slots, and they are
178/// super rare (and never happen with Wasm).
179pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 1]>;
180
181/// An ABIArg is composed of one or more parts. This allows for a CLIF-level
182/// Value to be passed with its parts in more than one location at the ABI
183/// level. For example, a 128-bit integer may be passed in two 64-bit registers,
184/// or even a 64-bit register and a 64-bit stack slot, on a 64-bit machine. The
185/// number of "parts" should correspond to the number of registers used to store
186/// this type according to the machine backend.
187///
188/// As an invariant, the `purpose` for every part must match. As a further
189/// invariant, a `StructArg` part cannot appear with any other part.
190#[derive(Clone, Debug)]
191pub enum ABIArg {
192 /// Storage slots (registers or stack locations) for each part of the
193 /// argument value. The number of slots must equal the number of register
194 /// parts used to store a value of this type.
195 Slots {
196 /// Slots, one per register part.
197 slots: ABIArgSlotVec,
198 /// Purpose of this arg.
199 purpose: ir::ArgumentPurpose,
200 },
201 /// Structure argument. We reserve stack space for it, but the CLIF-level
202 /// semantics are a little weird: the value passed to the call instruction,
203 /// and received in the corresponding block param, is a *pointer*. On the
204 /// caller side, we memcpy the data from the passed-in pointer to the stack
205 /// area; on the callee side, we compute a pointer to this stack area and
206 /// provide that as the argument's value.
207 StructArg {
208 /// Register or stack slot holding a pointer to the buffer as passed
209 /// by the caller to the callee. If None, the ABI defines the buffer
210 /// to reside at a well-known location (i.e. at `offset` below).
211 pointer: Option<ABIArgSlot>,
212 /// Offset of this arg relative to base of stack args.
213 offset: i64,
214 /// Size of this arg on the stack.
215 size: u64,
216 /// Purpose of this arg.
217 purpose: ir::ArgumentPurpose,
218 },
219 /// Implicit argument. Similar to a StructArg, except that we have the
220 /// target type, not a pointer type, at the CLIF-level. This argument is
221 /// still being passed via reference implicitly.
222 ImplicitPtrArg {
223 /// Register or stack slot holding a pointer to the buffer.
224 pointer: ABIArgSlot,
225 /// Offset of the argument buffer.
226 offset: i64,
227 /// Type of the implicit argument.
228 ty: Type,
229 /// Purpose of this arg.
230 purpose: ir::ArgumentPurpose,
231 },
232}
233
234impl ABIArg {
235 /// Create an ABIArg from one register.
236 pub fn reg(
237 reg: RealReg,
238 ty: ir::Type,
239 extension: ir::ArgumentExtension,
240 purpose: ir::ArgumentPurpose,
241 ) -> ABIArg {
242 ABIArg::Slots {
243 slots: smallvec![ABIArgSlot::Reg { reg, ty, extension }],
244 purpose,
245 }
246 }
247
248 /// Create an ABIArg from one stack slot.
249 pub fn stack(
250 offset: i64,
251 ty: ir::Type,
252 extension: ir::ArgumentExtension,
253 purpose: ir::ArgumentPurpose,
254 ) -> ABIArg {
255 ABIArg::Slots {
256 slots: smallvec![ABIArgSlot::Stack {
257 offset,
258 ty,
259 extension,
260 }],
261 purpose,
262 }
263 }
264}
265
266/// Are we computing information about arguments or return values? Much of the
267/// handling is factored out into common routines; this enum allows us to
268/// distinguish which case we're handling.
269#[derive(Clone, Copy, Debug, PartialEq, Eq)]
270pub enum ArgsOrRets {
271 /// Arguments.
272 Args,
273 /// Return values.
274 Rets,
275}
276
277/// Abstract location for a machine-specific ABI impl to translate into the
278/// appropriate addressing mode.
279#[derive(Clone, Copy, Debug)]
280pub enum StackAMode {
281 /// Offset into the current frame's argument area.
282 IncomingArg(i64, u32),
283 /// Offset within the stack slots in the current frame.
284 Slot(i64),
285 /// Offset into the callee frame's argument area.
286 OutgoingArg(i64),
287}
288
289/// Trait implemented by machine-specific backend to represent ISA flags.
290pub trait IsaFlags: Clone {
291 /// Get a flag indicating whether forward-edge CFI is enabled.
292 fn is_forward_edge_cfi_enabled(&self) -> bool {
293 false
294 }
295}
296
297/// Used as an out-parameter to accumulate a sequence of `ABIArg`s in
298/// `ABIMachineSpec::compute_arg_locs`. Wraps the shared allocation for all
299/// `ABIArg`s in `SigSet` and exposes just the args for the current
300/// `compute_arg_locs` call.
301pub struct ArgsAccumulator<'a> {
302 sig_set_abi_args: &'a mut Vec<ABIArg>,
303 start: usize,
304 non_formal_flag: bool,
305}
306
307impl<'a> ArgsAccumulator<'a> {
308 fn new(sig_set_abi_args: &'a mut Vec<ABIArg>) -> Self {
309 let start = sig_set_abi_args.len();
310 ArgsAccumulator {
311 sig_set_abi_args,
312 start,
313 non_formal_flag: false,
314 }
315 }
316
317 #[inline]
318 pub fn push(&mut self, arg: ABIArg) {
319 debug_assert!(!self.non_formal_flag);
320 self.sig_set_abi_args.push(arg)
321 }
322
323 #[inline]
324 pub fn push_non_formal(&mut self, arg: ABIArg) {
325 self.non_formal_flag = true;
326 self.sig_set_abi_args.push(arg)
327 }
328
329 #[inline]
330 pub fn args(&self) -> &[ABIArg] {
331 &self.sig_set_abi_args[self.start..]
332 }
333
334 #[inline]
335 pub fn args_mut(&mut self) -> &mut [ABIArg] {
336 &mut self.sig_set_abi_args[self.start..]
337 }
338}
339
340/// Trait implemented by machine-specific backend to provide information about
341/// register assignments and to allow generating the specific instructions for
342/// stack loads/saves, prologues/epilogues, etc.
343pub trait ABIMachineSpec {
344 /// The instruction type.
345 type I: VCodeInst;
346
347 /// The ISA flags type.
348 type F: IsaFlags;
349
350 /// Returns the number of bits in a word, that is 32/64 for 32/64-bit architecture.
351 fn word_bits() -> u32;
352
353 /// Returns the number of bytes in a word.
354 fn word_bytes() -> u32 {
355 return Self::word_bits() / 8;
356 }
357
358 /// Returns word-size integer type.
359 fn word_type() -> Type {
360 match Self::word_bits() {
361 32 => I32,
362 64 => I64,
363 _ => unreachable!(),
364 }
365 }
366
367 /// Returns word register class.
368 fn word_reg_class() -> RegClass {
369 RegClass::Int
370 }
371
372 /// Returns required stack alignment in bytes.
373 fn stack_align(call_conv: isa::CallConv) -> u32;
374
375 /// Process a list of parameters or return values and allocate them to registers
376 /// and stack slots.
377 ///
378 /// The argument locations should be pushed onto the given `ArgsAccumulator`
379 /// in order. Any extra arguments added (such as return area pointers)
380 /// should come at the end of the list so that the first N lowered
381 /// parameters align with the N clif parameters.
382 ///
383 /// Returns the stack-space used (rounded up to as alignment requires), and
384 /// if `add_ret_area_ptr` was passed, the index of the extra synthetic arg
385 /// that was added.
386 fn compute_arg_locs(
387 call_conv: isa::CallConv,
388 flags: &settings::Flags,
389 params: &[ir::AbiParam],
390 args_or_rets: ArgsOrRets,
391 add_ret_area_ptr: bool,
392 args: ArgsAccumulator,
393 ) -> CodegenResult<(u32, Option<usize>)>;
394
395 /// Generate a load from the stack.
396 fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
397
398 /// Generate a store to the stack.
399 fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I;
400
401 /// Generate a move.
402 fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I;
403
404 /// Generate an integer-extend operation.
405 fn gen_extend(
406 to_reg: Writable<Reg>,
407 from_reg: Reg,
408 is_signed: bool,
409 from_bits: u8,
410 to_bits: u8,
411 ) -> Self::I;
412
413 /// Generate an "args" pseudo-instruction to capture input args in
414 /// registers.
415 fn gen_args(args: Vec<ArgPair>) -> Self::I;
416
417 /// Generate a "rets" pseudo-instruction that moves vregs to return
418 /// registers.
419 fn gen_rets(rets: Vec<RetPair>) -> Self::I;
420
421 /// Generate an add-with-immediate. Note that even if this uses a scratch
422 /// register, it must satisfy two requirements:
423 ///
424 /// - The add-imm sequence must only clobber caller-save registers that are
425 /// not used for arguments, because it will be placed in the prologue
426 /// before the clobbered callee-save registers are saved.
427 ///
428 /// - The add-imm sequence must work correctly when `from_reg` and/or
429 /// `into_reg` are the register returned by `get_stacklimit_reg()`.
430 fn gen_add_imm(
431 call_conv: isa::CallConv,
432 into_reg: Writable<Reg>,
433 from_reg: Reg,
434 imm: u32,
435 ) -> SmallInstVec<Self::I>;
436
437 /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
438 /// the stack pointer is less than the given limit register (assuming the
439 /// stack grows downward).
440 fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec<Self::I>;
441
442 /// Generate an instruction to compute an address of a stack slot (FP- or
443 /// SP-based offset).
444 fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>) -> Self::I;
445
446 /// Get a fixed register to use to compute a stack limit. This is needed for
447 /// certain sequences generated after the register allocator has already
448 /// run. This must satisfy two requirements:
449 ///
450 /// - It must be a caller-save register that is not used for arguments,
451 /// because it will be clobbered in the prologue before the clobbered
452 /// callee-save registers are saved.
453 ///
454 /// - It must be safe to pass as an argument and/or destination to
455 /// `gen_add_imm()`. This is relevant when an addition with a large
456 /// immediate needs its own temporary; it cannot use the same fixed
457 /// temporary as this one.
458 fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg;
459
460 /// Generate a load to the given [base+offset] address.
461 fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
462
463 /// Generate a store from the given [base+offset] address.
464 fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
465
466 /// Adjust the stack pointer up or down.
467 fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec<Self::I>;
468
469 /// Compute a FrameLayout structure containing a sorted list of all clobbered
470 /// registers that are callee-saved according to the ABI, as well as the sizes
471 /// of all parts of the stack frame. The result is used to emit the prologue
472 /// and epilogue routines.
473 fn compute_frame_layout(
474 call_conv: isa::CallConv,
475 flags: &settings::Flags,
476 sig: &Signature,
477 regs: &[Writable<RealReg>],
478 is_leaf: bool,
479 incoming_args_size: u32,
480 tail_args_size: u32,
481 fixed_frame_storage_size: u32,
482 outgoing_args_size: u32,
483 ) -> FrameLayout;
484
485 /// Generate the usual frame-setup sequence for this architecture: e.g.,
486 /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
487 /// AArch64.
488 fn gen_prologue_frame_setup(
489 call_conv: isa::CallConv,
490 flags: &settings::Flags,
491 isa_flags: &Self::F,
492 frame_layout: &FrameLayout,
493 ) -> SmallInstVec<Self::I>;
494
495 /// Generate the usual frame-restore sequence for this architecture.
496 fn gen_epilogue_frame_restore(
497 call_conv: isa::CallConv,
498 flags: &settings::Flags,
499 isa_flags: &Self::F,
500 frame_layout: &FrameLayout,
501 ) -> SmallInstVec<Self::I>;
502
503 /// Generate a return instruction.
504 fn gen_return(
505 call_conv: isa::CallConv,
506 isa_flags: &Self::F,
507 frame_layout: &FrameLayout,
508 ) -> SmallInstVec<Self::I>;
509
510 /// Generate a probestack call.
511 fn gen_probestack(insts: &mut SmallInstVec<Self::I>, frame_size: u32);
512
513 /// Generate a inline stack probe.
514 fn gen_inline_probestack(
515 insts: &mut SmallInstVec<Self::I>,
516 call_conv: isa::CallConv,
517 frame_size: u32,
518 guard_size: u32,
519 );
520
521 /// Generate a clobber-save sequence. The implementation here should return
522 /// a sequence of instructions that "push" or otherwise save to the stack all
523 /// registers written/modified by the function body that are callee-saved.
524 /// The sequence of instructions should adjust the stack pointer downward,
525 /// and should align as necessary according to ABI requirements.
526 fn gen_clobber_save(
527 call_conv: isa::CallConv,
528 flags: &settings::Flags,
529 frame_layout: &FrameLayout,
530 ) -> SmallVec<[Self::I; 16]>;
531
532 /// Generate a clobber-restore sequence. This sequence should perform the
533 /// opposite of the clobber-save sequence generated above, assuming that SP
534 /// going into the sequence is at the same point that it was left when the
535 /// clobber-save sequence finished.
536 fn gen_clobber_restore(
537 call_conv: isa::CallConv,
538 flags: &settings::Flags,
539 frame_layout: &FrameLayout,
540 ) -> SmallVec<[Self::I; 16]>;
541
542 /// Generate a call instruction/sequence. This method is provided one
543 /// temporary register to use to synthesize the called address, if needed.
544 fn gen_call(dest: &CallDest, tmp: Writable<Reg>, info: CallInfo<()>) -> SmallVec<[Self::I; 2]>;
545
546 /// Generate a memcpy invocation. Used to set up struct
547 /// args. Takes `src`, `dst` as read-only inputs and passes a temporary
548 /// allocator.
549 fn gen_memcpy<F: FnMut(Type) -> Writable<Reg>>(
550 call_conv: isa::CallConv,
551 dst: Reg,
552 src: Reg,
553 size: usize,
554 alloc_tmp: F,
555 ) -> SmallVec<[Self::I; 8]>;
556
557 /// Get the number of spillslots required for the given register-class.
558 fn get_number_of_spillslots_for_value(
559 rc: RegClass,
560 target_vector_bytes: u32,
561 isa_flags: &Self::F,
562 ) -> u32;
563
564 /// Get the ABI-dependent MachineEnv for managing register allocation.
565 fn get_machine_env(flags: &settings::Flags, call_conv: isa::CallConv) -> &MachineEnv;
566
567 /// Get all caller-save registers, that is, registers that we expect
568 /// not to be saved across a call to a callee with the given ABI.
569 fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet;
570
571 /// Get the needed extension mode, given the mode attached to the argument
572 /// in the signature and the calling convention. The input (the attribute in
573 /// the signature) specifies what extension type should be done *if* the ABI
574 /// requires extension to the full register; this method's return value
575 /// indicates whether the extension actually *will* be done.
576 fn get_ext_mode(
577 call_conv: isa::CallConv,
578 specified: ir::ArgumentExtension,
579 ) -> ir::ArgumentExtension;
580}
581
582/// Out-of-line data for calls, to keep the size of `Inst` down.
583#[derive(Clone, Debug)]
584pub struct CallInfo<T> {
585 /// Receiver of this call
586 pub dest: T,
587 /// Register uses of this call.
588 pub uses: CallArgList,
589 /// Register defs of this call.
590 pub defs: CallRetList,
591 /// Registers clobbered by this call, as per its calling convention.
592 pub clobbers: PRegSet,
593 /// The calling convention of the callee.
594 pub callee_conv: isa::CallConv,
595 /// The calling convention of the caller.
596 pub caller_conv: isa::CallConv,
597 /// The number of bytes that the callee will pop from the stack for the
598 /// caller, if any. (Used for popping stack arguments with the `tail`
599 /// calling convention.)
600 pub callee_pop_size: u32,
601}
602
603impl<T> CallInfo<T> {
604 /// Creates an empty set of info with no clobbers/uses/etc with the
605 /// specified ABI
606 pub fn empty(dest: T, call_conv: isa::CallConv) -> CallInfo<T> {
607 CallInfo {
608 dest,
609 uses: smallvec![],
610 defs: smallvec![],
611 clobbers: PRegSet::empty(),
612 caller_conv: call_conv,
613 callee_conv: call_conv,
614 callee_pop_size: 0,
615 }
616 }
617
618 /// Change the `T` payload on this info to `U`.
619 pub fn map<U>(self, f: impl FnOnce(T) -> U) -> CallInfo<U> {
620 CallInfo {
621 dest: f(self.dest),
622 uses: self.uses,
623 defs: self.defs,
624 clobbers: self.clobbers,
625 caller_conv: self.caller_conv,
626 callee_conv: self.callee_conv,
627 callee_pop_size: self.callee_pop_size,
628 }
629 }
630}
631
632/// The id of an ABI signature within the `SigSet`.
633#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
634pub struct Sig(u32);
635cranelift_entity::entity_impl!(Sig);
636
637impl Sig {
638 fn prev(self) -> Option<Sig> {
639 self.0.checked_sub(1).map(Sig)
640 }
641}
642
643/// ABI information shared between body (callee) and caller.
644#[derive(Clone, Debug)]
645pub struct SigData {
646 /// Currently both return values and arguments are stored in a continuous space vector
647 /// in `SigSet::abi_args`.
648 ///
649 /// ```plain
650 /// +----------------------------------------------+
651 /// | return values |
652 /// | ... |
653 /// rets_end --> +----------------------------------------------+
654 /// | arguments |
655 /// | ... |
656 /// args_end --> +----------------------------------------------+
657 ///
658 /// ```
659 ///
660 /// Note we only store two offsets as rets_end == args_start, and rets_start == prev.args_end.
661 ///
662 /// Argument location ending offset (regs or stack slots). Stack offsets are relative to
663 /// SP on entry to function.
664 ///
665 /// This is a index into the `SigSet::abi_args`.
666 args_end: u32,
667
668 /// Return-value location ending offset. Stack offsets are relative to the return-area
669 /// pointer.
670 ///
671 /// This is a index into the `SigSet::abi_args`.
672 rets_end: u32,
673
674 /// Space on stack used to store arguments. We're storing the size in u32 to
675 /// reduce the size of the struct.
676 sized_stack_arg_space: u32,
677
678 /// Space on stack used to store return values. We're storing the size in u32 to
679 /// reduce the size of the struct.
680 sized_stack_ret_space: u32,
681
682 /// Index in `args` of the stack-return-value-area argument.
683 stack_ret_arg: Option<u16>,
684
685 /// Calling convention used.
686 call_conv: isa::CallConv,
687}
688
689impl SigData {
690 /// Get total stack space required for arguments.
691 pub fn sized_stack_arg_space(&self) -> i64 {
692 self.sized_stack_arg_space.into()
693 }
694
695 /// Get total stack space required for return values.
696 pub fn sized_stack_ret_space(&self) -> i64 {
697 self.sized_stack_ret_space.into()
698 }
699
700 /// Get calling convention used.
701 pub fn call_conv(&self) -> isa::CallConv {
702 self.call_conv
703 }
704
705 /// The index of the stack-return-value-area argument, if any.
706 pub fn stack_ret_arg(&self) -> Option<u16> {
707 self.stack_ret_arg
708 }
709}
710
711/// A (mostly) deduplicated set of ABI signatures.
712///
713/// We say "mostly" because we do not dedupe between signatures interned via
714/// `ir::SigRef` (direct and indirect calls; the vast majority of signatures in
715/// this set) vs via `ir::Signature` (the callee itself and libcalls). Doing
716/// this final bit of deduplication would require filling out the
717/// `ir_signature_to_abi_sig`, which is a bunch of allocations (not just the
718/// hash map itself but params and returns vecs in each signature) that we want
719/// to avoid.
720///
721/// In general, prefer using the `ir::SigRef`-taking methods to the
722/// `ir::Signature`-taking methods when you can get away with it, as they don't
723/// require cloning non-copy types that will trigger heap allocations.
724///
725/// This type can be indexed by `Sig` to access its associated `SigData`.
726pub struct SigSet {
727 /// Interned `ir::Signature`s that we already have an ABI signature for.
728 ir_signature_to_abi_sig: FxHashMap<ir::Signature, Sig>,
729
730 /// Interned `ir::SigRef`s that we already have an ABI signature for.
731 ir_sig_ref_to_abi_sig: SecondaryMap<ir::SigRef, Option<Sig>>,
732
733 /// A single, shared allocation for all `ABIArg`s used by all
734 /// `SigData`s. Each `SigData` references its args/rets via indices into
735 /// this allocation.
736 abi_args: Vec<ABIArg>,
737
738 /// The actual ABI signatures, keyed by `Sig`.
739 sigs: PrimaryMap<Sig, SigData>,
740}
741
742impl SigSet {
743 /// Construct a new `SigSet`, interning all of the signatures used by the
744 /// given function.
745 pub fn new<M>(func: &ir::Function, flags: &settings::Flags) -> CodegenResult<Self>
746 where
747 M: ABIMachineSpec,
748 {
749 let arg_estimate = func.dfg.signatures.len() * 6;
750
751 let mut sigs = SigSet {
752 ir_signature_to_abi_sig: FxHashMap::default(),
753 ir_sig_ref_to_abi_sig: SecondaryMap::with_capacity(func.dfg.signatures.len()),
754 abi_args: Vec::with_capacity(arg_estimate),
755 sigs: PrimaryMap::with_capacity(1 + func.dfg.signatures.len()),
756 };
757
758 sigs.make_abi_sig_from_ir_signature::<M>(func.signature.clone(), flags)?;
759 for sig_ref in func.dfg.signatures.keys() {
760 sigs.make_abi_sig_from_ir_sig_ref::<M>(sig_ref, &func.dfg, flags)?;
761 }
762
763 Ok(sigs)
764 }
765
766 /// Have we already interned an ABI signature for the given `ir::Signature`?
767 pub fn have_abi_sig_for_signature(&self, signature: &ir::Signature) -> bool {
768 self.ir_signature_to_abi_sig.contains_key(signature)
769 }
770
771 /// Construct and intern an ABI signature for the given `ir::Signature`.
772 pub fn make_abi_sig_from_ir_signature<M>(
773 &mut self,
774 signature: ir::Signature,
775 flags: &settings::Flags,
776 ) -> CodegenResult<Sig>
777 where
778 M: ABIMachineSpec,
779 {
780 // Because the `HashMap` entry API requires taking ownership of the
781 // lookup key -- and we want to avoid unnecessary clones of
782 // `ir::Signature`s, even at the cost of duplicate lookups -- we can't
783 // have a single, get-or-create-style method for interning
784 // `ir::Signature`s into ABI signatures. So at least (debug) assert that
785 // we aren't creating duplicate ABI signatures for the same
786 // `ir::Signature`.
787 debug_assert!(!self.have_abi_sig_for_signature(&signature));
788
789 let sig_data = self.from_func_sig::<M>(&signature, flags)?;
790 let sig = self.sigs.push(sig_data);
791 self.ir_signature_to_abi_sig.insert(signature, sig);
792 Ok(sig)
793 }
794
795 fn make_abi_sig_from_ir_sig_ref<M>(
796 &mut self,
797 sig_ref: ir::SigRef,
798 dfg: &ir::DataFlowGraph,
799 flags: &settings::Flags,
800 ) -> CodegenResult<Sig>
801 where
802 M: ABIMachineSpec,
803 {
804 if let Some(sig) = self.ir_sig_ref_to_abi_sig[sig_ref] {
805 return Ok(sig);
806 }
807 let signature = &dfg.signatures[sig_ref];
808 let sig_data = self.from_func_sig::<M>(signature, flags)?;
809 let sig = self.sigs.push(sig_data);
810 self.ir_sig_ref_to_abi_sig[sig_ref] = Some(sig);
811 Ok(sig)
812 }
813
814 /// Get the already-interned ABI signature id for the given `ir::SigRef`.
815 pub fn abi_sig_for_sig_ref(&self, sig_ref: ir::SigRef) -> Sig {
816 self.ir_sig_ref_to_abi_sig
817 .get(sig_ref)
818 // Should have a secondary map entry...
819 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
820 // ...and that entry should be initialized.
821 .expect("must call `make_abi_sig_from_ir_sig_ref` before `get_abi_sig_for_sig_ref`")
822 }
823
824 /// Get the already-interned ABI signature id for the given `ir::Signature`.
825 pub fn abi_sig_for_signature(&self, signature: &ir::Signature) -> Sig {
826 self.ir_signature_to_abi_sig
827 .get(signature)
828 .copied()
829 .expect("must call `make_abi_sig_from_ir_signature` before `get_abi_sig_for_signature`")
830 }
831
832 pub fn from_func_sig<M: ABIMachineSpec>(
833 &mut self,
834 sig: &ir::Signature,
835 flags: &settings::Flags,
836 ) -> CodegenResult<SigData> {
837 use std::borrow::Cow;
838
839 let returns = if let Some(sret) = missing_struct_return(sig) {
840 Cow::from_iter(std::iter::once(&sret).chain(&sig.returns).copied())
841 } else {
842 Cow::from(sig.returns.as_slice())
843 };
844
845 // Compute args and retvals from signature. Handle retvals first,
846 // because we may need to add a return-area arg to the args.
847
848 // NOTE: We rely on the order of the args (rets -> args) inserted to compute the offsets in
849 // `SigSet::args()` and `SigSet::rets()`. Therefore, we cannot change the two
850 // compute_arg_locs order.
851 let (sized_stack_ret_space, _) = M::compute_arg_locs(
852 sig.call_conv,
853 flags,
854 &returns,
855 ArgsOrRets::Rets,
856 /* extra ret-area ptr = */ false,
857 ArgsAccumulator::new(&mut self.abi_args),
858 )?;
859 let rets_end = u32::try_from(self.abi_args.len()).unwrap();
860
861 let need_stack_return_area = sized_stack_ret_space > 0;
862 let (sized_stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
863 sig.call_conv,
864 flags,
865 &sig.params,
866 ArgsOrRets::Args,
867 need_stack_return_area,
868 ArgsAccumulator::new(&mut self.abi_args),
869 )?;
870 let args_end = u32::try_from(self.abi_args.len()).unwrap();
871
872 trace!(
873 "ABISig: sig {:?} => args end = {} rets end = {}
874 arg stack = {} ret stack = {} stack_ret_arg = {:?}",
875 sig,
876 args_end,
877 rets_end,
878 sized_stack_arg_space,
879 sized_stack_ret_space,
880 need_stack_return_area,
881 );
882
883 let stack_ret_arg = stack_ret_arg.map(|s| u16::try_from(s).unwrap());
884 Ok(SigData {
885 args_end,
886 rets_end,
887 sized_stack_arg_space,
888 sized_stack_ret_space,
889 stack_ret_arg,
890 call_conv: sig.call_conv,
891 })
892 }
893
894 /// Get this signature's ABI arguments.
895 pub fn args(&self, sig: Sig) -> &[ABIArg] {
896 let sig_data = &self.sigs[sig];
897 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
898 let start = usize::try_from(sig_data.rets_end).unwrap();
899 let end = usize::try_from(sig_data.args_end).unwrap();
900 &self.abi_args[start..end]
901 }
902
903 /// Get information specifying how to pass the implicit pointer
904 /// to the return-value area on the stack, if required.
905 pub fn get_ret_arg(&self, sig: Sig) -> Option<ABIArg> {
906 let sig_data = &self.sigs[sig];
907 if let Some(i) = sig_data.stack_ret_arg {
908 Some(self.args(sig)[usize::from(i)].clone())
909 } else {
910 None
911 }
912 }
913
914 /// Get information specifying how to pass one argument.
915 pub fn get_arg(&self, sig: Sig, idx: usize) -> ABIArg {
916 self.args(sig)[idx].clone()
917 }
918
919 /// Get this signature's ABI returns.
920 pub fn rets(&self, sig: Sig) -> &[ABIArg] {
921 let sig_data = &self.sigs[sig];
922 // Please see comments in `SigSet::from_func_sig` of how we store the offsets.
923 let start = usize::try_from(sig.prev().map_or(0, |prev| self.sigs[prev].args_end)).unwrap();
924 let end = usize::try_from(sig_data.rets_end).unwrap();
925 &self.abi_args[start..end]
926 }
927
928 /// Get information specifying how to pass one return value.
929 pub fn get_ret(&self, sig: Sig, idx: usize) -> ABIArg {
930 self.rets(sig)[idx].clone()
931 }
932
933 /// Get the number of arguments expected.
934 pub fn num_args(&self, sig: Sig) -> usize {
935 let len = self.args(sig).len();
936 if self.sigs[sig].stack_ret_arg.is_some() {
937 len - 1
938 } else {
939 len
940 }
941 }
942
943 /// Get the number of return values expected.
944 pub fn num_rets(&self, sig: Sig) -> usize {
945 self.rets(sig).len()
946 }
947}
948
949// NB: we do _not_ implement `IndexMut` because these signatures are
950// deduplicated and shared!
951impl std::ops::Index<Sig> for SigSet {
952 type Output = SigData;
953
954 fn index(&self, sig: Sig) -> &Self::Output {
955 &self.sigs[sig]
956 }
957}
958
959/// Structure describing the layout of a function's stack frame.
960#[derive(Clone, Debug, Default)]
961pub struct FrameLayout {
962 /// N.B. The areas whose sizes are given in this structure fully
963 /// cover the current function's stack frame, from high to low
964 /// stack addresses in the sequence below. Each size contains
965 /// any alignment padding that may be required by the ABI.
966
967 /// Size of incoming arguments on the stack. This is not technically
968 /// part of this function's frame, but code in the function will still
969 /// need to access it. Depending on the ABI, we may need to set up a
970 /// frame pointer to do so; we also may need to pop this area from the
971 /// stack upon return.
972 pub incoming_args_size: u32,
973
974 /// The size of the incoming argument area, taking into account any
975 /// potential increase in size required for tail calls present in the
976 /// function. In the case that no tail calls are present, this value
977 /// will be the same as [`Self::incoming_args_size`].
978 pub tail_args_size: u32,
979
980 /// Size of the "setup area", typically holding the return address
981 /// and/or the saved frame pointer. This may be written either during
982 /// the call itself (e.g. a pushed return address) or by code emitted
983 /// from gen_prologue_frame_setup. In any case, after that code has
984 /// completed execution, the stack pointer is expected to point to the
985 /// bottom of this area. The same holds at the start of code emitted
986 /// by gen_epilogue_frame_restore.
987 pub setup_area_size: u32,
988
989 /// Size of the area used to save callee-saved clobbered registers.
990 /// This area is accessed by code emitted from gen_clobber_save and
991 /// gen_clobber_restore.
992 pub clobber_size: u32,
993
994 /// Storage allocated for the fixed part of the stack frame.
995 /// This contains stack slots and spill slots.
996 pub fixed_frame_storage_size: u32,
997
998 /// Stack size to be reserved for outgoing arguments, if used by
999 /// the current ABI, or 0 otherwise. After gen_clobber_save and
1000 /// before gen_clobber_restore, the stack pointer points to the
1001 /// bottom of this area.
1002 pub outgoing_args_size: u32,
1003
1004 /// Sorted list of callee-saved registers that are clobbered
1005 /// according to the ABI. These registers will be saved and
1006 /// restored by gen_clobber_save and gen_clobber_restore.
1007 pub clobbered_callee_saves: Vec<Writable<RealReg>>,
1008}
1009
1010impl FrameLayout {
1011 /// Split the clobbered callee-save registers into integer-class and
1012 /// float-class groups.
1013 ///
1014 /// This method does not currently support vector-class callee-save
1015 /// registers because no current backend has them.
1016 pub fn clobbered_callee_saves_by_class(&self) -> (&[Writable<RealReg>], &[Writable<RealReg>]) {
1017 let (ints, floats) = self.clobbered_callee_saves.split_at(
1018 self.clobbered_callee_saves
1019 .partition_point(|r| r.to_reg().class() == RegClass::Int),
1020 );
1021 debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float));
1022 (ints, floats)
1023 }
1024
1025 /// The size of FP to SP while the frame is active (not during prologue
1026 /// setup or epilogue tear down).
1027 pub fn active_size(&self) -> u32 {
1028 self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size
1029 }
1030
1031 /// Get the offset from the SP to the sized stack slots area.
1032 pub fn sp_to_sized_stack_slots(&self) -> u32 {
1033 self.outgoing_args_size
1034 }
1035}
1036
1037/// ABI object for a function body.
1038pub struct Callee<M: ABIMachineSpec> {
1039 /// CLIF-level signature, possibly normalized.
1040 ir_sig: ir::Signature,
1041 /// Signature: arg and retval regs.
1042 sig: Sig,
1043 /// Defined dynamic types.
1044 dynamic_type_sizes: HashMap<Type, u32>,
1045 /// Offsets to each dynamic stackslot.
1046 dynamic_stackslots: PrimaryMap<DynamicStackSlot, u32>,
1047 /// Offsets to each sized stackslot.
1048 sized_stackslots: PrimaryMap<StackSlot, u32>,
1049 /// Total stack size of all stackslots
1050 stackslots_size: u32,
1051 /// Stack size to be reserved for outgoing arguments.
1052 outgoing_args_size: u32,
1053 /// Initially the number of bytes originating in the callers frame where stack arguments will
1054 /// live. After lowering this number may be larger than the size expected by the function being
1055 /// compiled, as tail calls potentially require more space for stack arguments.
1056 tail_args_size: u32,
1057 /// Register-argument defs, to be provided to the `args`
1058 /// pseudo-inst, and pregs to constrain them to.
1059 reg_args: Vec<ArgPair>,
1060 /// Finalized frame layout for this function.
1061 frame_layout: Option<FrameLayout>,
1062 /// The register holding the return-area pointer, if needed.
1063 ret_area_ptr: Option<Reg>,
1064 /// Calling convention this function expects.
1065 call_conv: isa::CallConv,
1066 /// The settings controlling this function's compilation.
1067 flags: settings::Flags,
1068 /// The ISA-specific flag values controlling this function's compilation.
1069 isa_flags: M::F,
1070 /// Whether or not this function is a "leaf", meaning it calls no other
1071 /// functions
1072 is_leaf: bool,
1073 /// If this function has a stack limit specified, then `Reg` is where the
1074 /// stack limit will be located after the instructions specified have been
1075 /// executed.
1076 ///
1077 /// Note that this is intended for insertion into the prologue, if
1078 /// present. Also note that because the instructions here execute in the
1079 /// prologue this happens after legalization/register allocation/etc so we
1080 /// need to be extremely careful with each instruction. The instructions are
1081 /// manually register-allocated and carefully only use caller-saved
1082 /// registers and keep nothing live after this sequence of instructions.
1083 stack_limit: Option<(Reg, SmallInstVec<M::I>)>,
1084
1085 _mach: PhantomData<M>,
1086}
1087
1088fn get_special_purpose_param_register(
1089 f: &ir::Function,
1090 sigs: &SigSet,
1091 sig: Sig,
1092 purpose: ir::ArgumentPurpose,
1093) -> Option<Reg> {
1094 let idx = f.signature.special_param_index(purpose)?;
1095 match &sigs.args(sig)[idx] {
1096 &ABIArg::Slots { ref slots, .. } => match &slots[0] {
1097 &ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
1098 _ => None,
1099 },
1100 _ => None,
1101 }
1102}
1103
1104fn checked_round_up(val: u32, mask: u32) -> Option<u32> {
1105 Some(val.checked_add(mask)? & !mask)
1106}
1107
1108impl<M: ABIMachineSpec> Callee<M> {
1109 /// Create a new body ABI instance.
1110 pub fn new(
1111 f: &ir::Function,
1112 isa: &dyn TargetIsa,
1113 isa_flags: &M::F,
1114 sigs: &SigSet,
1115 ) -> CodegenResult<Self> {
1116 trace!("ABI: func signature {:?}", f.signature);
1117
1118 let flags = isa.flags().clone();
1119 let sig = sigs.abi_sig_for_signature(&f.signature);
1120
1121 let call_conv = f.signature.call_conv;
1122 // Only these calling conventions are supported.
1123 debug_assert!(
1124 call_conv == isa::CallConv::SystemV
1125 || call_conv == isa::CallConv::Tail
1126 || call_conv == isa::CallConv::Fast
1127 || call_conv == isa::CallConv::Cold
1128 || call_conv.extends_windows_fastcall()
1129 || call_conv == isa::CallConv::WasmtimeSystemV
1130 || call_conv == isa::CallConv::AppleAarch64
1131 || call_conv == isa::CallConv::Winch,
1132 "Unsupported calling convention: {call_conv:?}"
1133 );
1134
1135 // Compute sized stackslot locations and total stackslot size.
1136 let mut sized_stack_offset: u32 = 0;
1137 let mut sized_stackslots = PrimaryMap::new();
1138 for (stackslot, data) in f.sized_stack_slots.iter() {
1139 let off = sized_stack_offset;
1140 sized_stack_offset = sized_stack_offset
1141 .checked_add(data.size)
1142 .ok_or(CodegenError::ImplLimitExceeded)?;
1143 // Always at least machine-word-align slots, but also
1144 // satisfy the user's requested alignment.
1145 debug_assert!(data.align_shift < 32);
1146 let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
1147 let mask = align - 1;
1148 sized_stack_offset = checked_round_up(sized_stack_offset, mask)
1149 .ok_or(CodegenError::ImplLimitExceeded)?;
1150 debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
1151 sized_stackslots.push(off);
1152 }
1153
1154 // Compute dynamic stackslot locations and total stackslot size.
1155 let mut dynamic_stackslots = PrimaryMap::new();
1156 let mut dynamic_stack_offset: u32 = sized_stack_offset;
1157 for (stackslot, data) in f.dynamic_stack_slots.iter() {
1158 debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len());
1159 let off = dynamic_stack_offset;
1160 let ty = f.get_concrete_dynamic_ty(data.dyn_ty).ok_or_else(|| {
1161 CodegenError::Unsupported(format!("invalid dynamic vector type: {}", data.dyn_ty))
1162 })?;
1163 dynamic_stack_offset = dynamic_stack_offset
1164 .checked_add(isa.dynamic_vector_bytes(ty))
1165 .ok_or(CodegenError::ImplLimitExceeded)?;
1166 let mask = M::word_bytes() - 1;
1167 dynamic_stack_offset = checked_round_up(dynamic_stack_offset, mask)
1168 .ok_or(CodegenError::ImplLimitExceeded)?;
1169 dynamic_stackslots.push(off);
1170 }
1171 let stackslots_size = dynamic_stack_offset;
1172
1173 let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len());
1174 for (dyn_ty, _data) in f.dfg.dynamic_types.iter() {
1175 let ty = f
1176 .get_concrete_dynamic_ty(dyn_ty)
1177 .unwrap_or_else(|| panic!("invalid dynamic vector type: {dyn_ty}"));
1178 let size = isa.dynamic_vector_bytes(ty);
1179 dynamic_type_sizes.insert(ty, size);
1180 }
1181
1182 // Figure out what instructions, if any, will be needed to check the
1183 // stack limit. This can either be specified as a special-purpose
1184 // argument or as a global value which often calculates the stack limit
1185 // from the arguments.
1186 let stack_limit = f
1187 .stack_limit
1188 .map(|gv| gen_stack_limit::<M>(f, sigs, sig, gv));
1189
1190 let tail_args_size = sigs[sig].sized_stack_arg_space;
1191
1192 Ok(Self {
1193 ir_sig: ensure_struct_return_ptr_is_returned(&f.signature),
1194 sig,
1195 dynamic_stackslots,
1196 dynamic_type_sizes,
1197 sized_stackslots,
1198 stackslots_size,
1199 outgoing_args_size: 0,
1200 tail_args_size,
1201 reg_args: vec![],
1202 frame_layout: None,
1203 ret_area_ptr: None,
1204 call_conv,
1205 flags,
1206 isa_flags: isa_flags.clone(),
1207 is_leaf: f.is_leaf(),
1208 stack_limit,
1209 _mach: PhantomData,
1210 })
1211 }
1212
1213 /// Inserts instructions necessary for checking the stack limit into the
1214 /// prologue.
1215 ///
1216 /// This function will generate instructions necessary for perform a stack
1217 /// check at the header of a function. The stack check is intended to trap
1218 /// if the stack pointer goes below a particular threshold, preventing stack
1219 /// overflow in wasm or other code. The `stack_limit` argument here is the
1220 /// register which holds the threshold below which we're supposed to trap.
1221 /// This function is known to allocate `stack_size` bytes and we'll push
1222 /// instructions onto `insts`.
1223 ///
1224 /// Note that the instructions generated here are special because this is
1225 /// happening so late in the pipeline (e.g. after register allocation). This
1226 /// means that we need to do manual register allocation here and also be
1227 /// careful to not clobber any callee-saved or argument registers. For now
1228 /// this routine makes do with the `spilltmp_reg` as one temporary
1229 /// register, and a second register of `tmp2` which is caller-saved. This
1230 /// should be fine for us since no spills should happen in this sequence of
1231 /// instructions, so our register won't get accidentally clobbered.
1232 ///
1233 /// No values can be live after the prologue, but in this case that's ok
1234 /// because we just need to perform a stack check before progressing with
1235 /// the rest of the function.
1236 fn insert_stack_check(
1237 &self,
1238 stack_limit: Reg,
1239 stack_size: u32,
1240 insts: &mut SmallInstVec<M::I>,
1241 ) {
1242 // With no explicit stack allocated we can just emit the simple check of
1243 // the stack registers against the stack limit register, and trap if
1244 // it's out of bounds.
1245 if stack_size == 0 {
1246 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1247 return;
1248 }
1249
1250 // Note that the 32k stack size here is pretty special. See the
1251 // documentation in x86/abi.rs for why this is here. The general idea is
1252 // that we're protecting against overflow in the addition that happens
1253 // below.
1254 if stack_size >= 32 * 1024 {
1255 insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
1256 }
1257
1258 // Add the `stack_size` to `stack_limit`, placing the result in
1259 // `scratch`.
1260 //
1261 // Note though that `stack_limit`'s register may be the same as
1262 // `scratch`. If our stack size doesn't fit into an immediate this
1263 // means we need a second scratch register for loading the stack size
1264 // into a register.
1265 let scratch = Writable::from_reg(M::get_stacklimit_reg(self.call_conv));
1266 insts.extend(M::gen_add_imm(self.call_conv, scratch, stack_limit, stack_size).into_iter());
1267 insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
1268 }
1269}
1270
1271/// Generates the instructions necessary for the `gv` to be materialized into a
1272/// register.
1273///
1274/// This function will return a register that will contain the result of
1275/// evaluating `gv`. It will also return any instructions necessary to calculate
1276/// the value of the register.
1277///
1278/// Note that global values are typically lowered to instructions via the
1279/// standard legalization pass. Unfortunately though prologue generation happens
1280/// so late in the pipeline that we can't use these legalization passes to
1281/// generate the instructions for `gv`. As a result we duplicate some lowering
1282/// of `gv` here and support only some global values. This is similar to what
1283/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
1284/// in the future too!
1285///
1286/// Also note that this function will make use of `writable_spilltmp_reg()` as a
1287/// temporary register to store values in if necessary. Currently after we write
1288/// to this register there's guaranteed to be no spilled values between where
1289/// it's used, because we're not participating in register allocation anyway!
1290fn gen_stack_limit<M: ABIMachineSpec>(
1291 f: &ir::Function,
1292 sigs: &SigSet,
1293 sig: Sig,
1294 gv: ir::GlobalValue,
1295) -> (Reg, SmallInstVec<M::I>) {
1296 let mut insts = smallvec![];
1297 let reg = generate_gv::<M>(f, sigs, sig, gv, &mut insts);
1298 return (reg, insts);
1299}
1300
1301fn generate_gv<M: ABIMachineSpec>(
1302 f: &ir::Function,
1303 sigs: &SigSet,
1304 sig: Sig,
1305 gv: ir::GlobalValue,
1306 insts: &mut SmallInstVec<M::I>,
1307) -> Reg {
1308 match f.global_values[gv] {
1309 // Return the direct register the vmcontext is in
1310 ir::GlobalValueData::VMContext => {
1311 get_special_purpose_param_register(f, sigs, sig, ir::ArgumentPurpose::VMContext)
1312 .expect("no vmcontext parameter found")
1313 }
1314 // Load our base value into a register, then load from that register
1315 // in to a temporary register.
1316 ir::GlobalValueData::Load {
1317 base,
1318 offset,
1319 global_type: _,
1320 flags: _,
1321 } => {
1322 let base = generate_gv::<M>(f, sigs, sig, base, insts);
1323 let into_reg = Writable::from_reg(M::get_stacklimit_reg(f.stencil.signature.call_conv));
1324 insts.push(M::gen_load_base_offset(
1325 into_reg,
1326 base,
1327 offset.into(),
1328 M::word_type(),
1329 ));
1330 return into_reg.to_reg();
1331 }
1332 ref other => panic!("global value for stack limit not supported: {other}"),
1333 }
1334}
1335
1336/// If the signature needs to be legalized, then return the struct-return
1337/// parameter that should be prepended to its returns. Otherwise, return `None`.
1338fn missing_struct_return(sig: &ir::Signature) -> Option<ir::AbiParam> {
1339 let struct_ret_index = sig.special_param_index(ArgumentPurpose::StructReturn)?;
1340 if !sig.uses_special_return(ArgumentPurpose::StructReturn) {
1341 return Some(sig.params[struct_ret_index]);
1342 }
1343
1344 None
1345}
1346
1347fn ensure_struct_return_ptr_is_returned(sig: &ir::Signature) -> ir::Signature {
1348 let mut sig = sig.clone();
1349 if let Some(sret) = missing_struct_return(&sig) {
1350 sig.returns.insert(0, sret);
1351 }
1352 sig
1353}
1354
1355/// ### Pre-Regalloc Functions
1356///
1357/// These methods of `Callee` may only be called before regalloc.
1358impl<M: ABIMachineSpec> Callee<M> {
1359 /// Access the (possibly legalized) signature.
1360 pub fn signature(&self) -> &ir::Signature {
1361 debug_assert!(
1362 missing_struct_return(&self.ir_sig).is_none(),
1363 "`Callee::ir_sig` is always legalized"
1364 );
1365 &self.ir_sig
1366 }
1367
1368 /// Initialize. This is called after the Callee is constructed because it
1369 /// may allocate a temp vreg, which can only be allocated once the lowering
1370 /// context exists.
1371 pub fn init_retval_area(
1372 &mut self,
1373 sigs: &SigSet,
1374 vregs: &mut VRegAllocator<M::I>,
1375 ) -> CodegenResult<()> {
1376 if sigs[self.sig].stack_ret_arg.is_some() {
1377 let ret_area_ptr = vregs.alloc(M::word_type())?;
1378 self.ret_area_ptr = Some(ret_area_ptr.only_reg().unwrap());
1379 }
1380 Ok(())
1381 }
1382
1383 /// Get the return area pointer register, if any.
1384 pub fn ret_area_ptr(&self) -> Option<Reg> {
1385 self.ret_area_ptr
1386 }
1387
1388 /// Accumulate outgoing arguments.
1389 ///
1390 /// This ensures that at least `size` bytes are allocated in the prologue to
1391 /// be available for use in function calls to hold arguments and/or return
1392 /// values. If this function is called multiple times, the maximum of all
1393 /// `size` values will be available.
1394 pub fn accumulate_outgoing_args_size(&mut self, size: u32) {
1395 if size > self.outgoing_args_size {
1396 self.outgoing_args_size = size;
1397 }
1398 }
1399
1400 /// Accumulate the incoming argument area size requirements for a tail call,
1401 /// as it could be larger than the incoming arguments of the function
1402 /// currently being compiled.
1403 pub fn accumulate_tail_args_size(&mut self, size: u32) {
1404 if size > self.tail_args_size {
1405 self.tail_args_size = size;
1406 }
1407 }
1408
1409 pub fn is_forward_edge_cfi_enabled(&self) -> bool {
1410 self.isa_flags.is_forward_edge_cfi_enabled()
1411 }
1412
1413 /// Get the calling convention implemented by this ABI object.
1414 pub fn call_conv(&self, sigs: &SigSet) -> isa::CallConv {
1415 sigs[self.sig].call_conv
1416 }
1417
1418 /// Get the ABI-dependent MachineEnv for managing register allocation.
1419 pub fn machine_env(&self, sigs: &SigSet) -> &MachineEnv {
1420 M::get_machine_env(&self.flags, self.call_conv(sigs))
1421 }
1422
1423 /// The offsets of all sized stack slots (not spill slots) for debuginfo purposes.
1424 pub fn sized_stackslot_offsets(&self) -> &PrimaryMap<StackSlot, u32> {
1425 &self.sized_stackslots
1426 }
1427
1428 /// The offsets of all dynamic stack slots (not spill slots) for debuginfo purposes.
1429 pub fn dynamic_stackslot_offsets(&self) -> &PrimaryMap<DynamicStackSlot, u32> {
1430 &self.dynamic_stackslots
1431 }
1432
1433 /// Generate an instruction which copies an argument to a destination
1434 /// register.
1435 pub fn gen_copy_arg_to_regs(
1436 &mut self,
1437 sigs: &SigSet,
1438 idx: usize,
1439 into_regs: ValueRegs<Writable<Reg>>,
1440 vregs: &mut VRegAllocator<M::I>,
1441 ) -> SmallInstVec<M::I> {
1442 let mut insts = smallvec![];
1443 let mut copy_arg_slot_to_reg = |slot: &ABIArgSlot, into_reg: &Writable<Reg>| {
1444 match slot {
1445 &ABIArgSlot::Reg { reg, .. } => {
1446 // Add a preg -> def pair to the eventual `args`
1447 // instruction. Extension mode doesn't matter
1448 // (we're copying out, not in; we ignore high bits
1449 // by convention).
1450 let arg = ArgPair {
1451 vreg: *into_reg,
1452 preg: reg.into(),
1453 };
1454 self.reg_args.push(arg);
1455 }
1456 &ABIArgSlot::Stack {
1457 offset,
1458 ty,
1459 extension,
1460 ..
1461 } => {
1462 // However, we have to respect the extension mode for stack
1463 // slots, or else we grab the wrong bytes on big-endian.
1464 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1465 let ty =
1466 if ext != ArgumentExtension::None && M::word_bits() > ty_bits(ty) as u32 {
1467 M::word_type()
1468 } else {
1469 ty
1470 };
1471 insts.push(M::gen_load_stack(
1472 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1473 *into_reg,
1474 ty,
1475 ));
1476 }
1477 }
1478 };
1479
1480 match &sigs.args(self.sig)[idx] {
1481 &ABIArg::Slots { ref slots, .. } => {
1482 assert_eq!(into_regs.len(), slots.len());
1483 for (slot, into_reg) in slots.iter().zip(into_regs.regs().iter()) {
1484 copy_arg_slot_to_reg(&slot, &into_reg);
1485 }
1486 }
1487 &ABIArg::StructArg {
1488 pointer, offset, ..
1489 } => {
1490 let into_reg = into_regs.only_reg().unwrap();
1491 if let Some(slot) = pointer {
1492 // Buffer address is passed in a register or stack slot.
1493 copy_arg_slot_to_reg(&slot, &into_reg);
1494 } else {
1495 // Buffer address is implicitly defined by the ABI.
1496 insts.push(M::gen_get_stack_addr(
1497 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1498 into_reg,
1499 ));
1500 }
1501 }
1502 &ABIArg::ImplicitPtrArg { pointer, ty, .. } => {
1503 let into_reg = into_regs.only_reg().unwrap();
1504 // We need to dereference the pointer.
1505 let base = match &pointer {
1506 &ABIArgSlot::Reg { reg, ty, .. } => {
1507 let tmp = vregs.alloc_with_deferred_error(ty).only_reg().unwrap();
1508 self.reg_args.push(ArgPair {
1509 vreg: Writable::from_reg(tmp),
1510 preg: reg.into(),
1511 });
1512 tmp
1513 }
1514 &ABIArgSlot::Stack { offset, ty, .. } => {
1515 let addr_reg = writable_value_regs(vregs.alloc_with_deferred_error(ty))
1516 .only_reg()
1517 .unwrap();
1518 insts.push(M::gen_load_stack(
1519 StackAMode::IncomingArg(offset, sigs[self.sig].sized_stack_arg_space),
1520 addr_reg,
1521 ty,
1522 ));
1523 addr_reg.to_reg()
1524 }
1525 };
1526 insts.push(M::gen_load_base_offset(into_reg, base, 0, ty));
1527 }
1528 }
1529 insts
1530 }
1531
1532 /// Generate an instruction which copies a source register to a return value slot.
1533 pub fn gen_copy_regs_to_retval(
1534 &self,
1535 sigs: &SigSet,
1536 idx: usize,
1537 from_regs: ValueRegs<Reg>,
1538 vregs: &mut VRegAllocator<M::I>,
1539 ) -> (SmallVec<[RetPair; 2]>, SmallInstVec<M::I>) {
1540 let mut reg_pairs = smallvec![];
1541 let mut ret = smallvec![];
1542 let word_bits = M::word_bits() as u8;
1543 match &sigs.rets(self.sig)[idx] {
1544 &ABIArg::Slots { ref slots, .. } => {
1545 assert_eq!(from_regs.len(), slots.len());
1546 for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
1547 match slot {
1548 &ABIArgSlot::Reg {
1549 reg, ty, extension, ..
1550 } => {
1551 let from_bits = ty_bits(ty) as u8;
1552 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1553 let vreg = match (ext, from_bits) {
1554 (ir::ArgumentExtension::Uext, n)
1555 | (ir::ArgumentExtension::Sext, n)
1556 if n < word_bits =>
1557 {
1558 let signed = ext == ir::ArgumentExtension::Sext;
1559 let dst =
1560 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1561 .only_reg()
1562 .unwrap();
1563 ret.push(M::gen_extend(
1564 dst, from_reg, signed, from_bits,
1565 /* to_bits = */ word_bits,
1566 ));
1567 dst.to_reg()
1568 }
1569 _ => {
1570 // No move needed, regalloc2 will emit it using the constraint
1571 // added by the RetPair.
1572 from_reg
1573 }
1574 };
1575 reg_pairs.push(RetPair {
1576 vreg,
1577 preg: Reg::from(reg),
1578 });
1579 }
1580 &ABIArgSlot::Stack {
1581 offset,
1582 ty,
1583 extension,
1584 ..
1585 } => {
1586 let mut ty = ty;
1587 let from_bits = ty_bits(ty) as u8;
1588 // A machine ABI implementation should ensure that stack frames
1589 // have "reasonable" size. All current ABIs for machinst
1590 // backends (aarch64 and x64) enforce a 128MB limit.
1591 let off = i32::try_from(offset).expect(
1592 "Argument stack offset greater than 2GB; should hit impl limit first",
1593 );
1594 let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
1595 // Trash the from_reg; it should be its last use.
1596 match (ext, from_bits) {
1597 (ir::ArgumentExtension::Uext, n)
1598 | (ir::ArgumentExtension::Sext, n)
1599 if n < word_bits =>
1600 {
1601 assert_eq!(M::word_reg_class(), from_reg.class());
1602 let signed = ext == ir::ArgumentExtension::Sext;
1603 let dst =
1604 writable_value_regs(vregs.alloc_with_deferred_error(ty))
1605 .only_reg()
1606 .unwrap();
1607 ret.push(M::gen_extend(
1608 dst, from_reg, signed, from_bits,
1609 /* to_bits = */ word_bits,
1610 ));
1611 // Store the extended version.
1612 ty = M::word_type();
1613 }
1614 _ => {}
1615 };
1616 ret.push(M::gen_store_base_offset(
1617 self.ret_area_ptr.unwrap(),
1618 off,
1619 from_reg,
1620 ty,
1621 ));
1622 }
1623 }
1624 }
1625 }
1626 ABIArg::StructArg { .. } => {
1627 panic!("StructArg in return position is unsupported");
1628 }
1629 ABIArg::ImplicitPtrArg { .. } => {
1630 panic!("ImplicitPtrArg in return position is unsupported");
1631 }
1632 }
1633 (reg_pairs, ret)
1634 }
1635
1636 /// Generate any setup instruction needed to save values to the
1637 /// return-value area. This is usually used when were are multiple return
1638 /// values or an otherwise large return value that must be passed on the
1639 /// stack; typically the ABI specifies an extra hidden argument that is a
1640 /// pointer to that memory.
1641 pub fn gen_retval_area_setup(
1642 &mut self,
1643 sigs: &SigSet,
1644 vregs: &mut VRegAllocator<M::I>,
1645 ) -> Option<M::I> {
1646 if let Some(i) = sigs[self.sig].stack_ret_arg {
1647 let ret_area_ptr = Writable::from_reg(self.ret_area_ptr.unwrap());
1648 let insts =
1649 self.gen_copy_arg_to_regs(sigs, i.into(), ValueRegs::one(ret_area_ptr), vregs);
1650 insts.into_iter().next().map(|inst| {
1651 trace!(
1652 "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
1653 inst,
1654 ret_area_ptr.to_reg()
1655 );
1656 inst
1657 })
1658 } else {
1659 trace!("gen_retval_area_setup: not needed");
1660 None
1661 }
1662 }
1663
1664 /// Generate a return instruction.
1665 pub fn gen_rets(&self, rets: Vec<RetPair>) -> M::I {
1666 M::gen_rets(rets)
1667 }
1668
1669 /// Produce an instruction that computes a sized stackslot address.
1670 pub fn sized_stackslot_addr(
1671 &self,
1672 slot: StackSlot,
1673 offset: u32,
1674 into_reg: Writable<Reg>,
1675 ) -> M::I {
1676 // Offset from beginning of stackslot area.
1677 let stack_off = self.sized_stackslots[slot] as i64;
1678 let sp_off: i64 = stack_off + (offset as i64);
1679 M::gen_get_stack_addr(StackAMode::Slot(sp_off), into_reg)
1680 }
1681
1682 /// Produce an instruction that computes a dynamic stackslot address.
1683 pub fn dynamic_stackslot_addr(&self, slot: DynamicStackSlot, into_reg: Writable<Reg>) -> M::I {
1684 let stack_off = self.dynamic_stackslots[slot] as i64;
1685 M::gen_get_stack_addr(StackAMode::Slot(stack_off), into_reg)
1686 }
1687
1688 /// Get an `args` pseudo-inst, if any, that should appear at the
1689 /// very top of the function body prior to regalloc.
1690 pub fn take_args(&mut self) -> Option<M::I> {
1691 if self.reg_args.len() > 0 {
1692 // Very first instruction is an `args` pseudo-inst that
1693 // establishes live-ranges for in-register arguments and
1694 // constrains them at the start of the function to the
1695 // locations defined by the ABI.
1696 Some(M::gen_args(std::mem::take(&mut self.reg_args)))
1697 } else {
1698 None
1699 }
1700 }
1701}
1702
1703/// ### Post-Regalloc Functions
1704///
1705/// These methods of `Callee` may only be called after
1706/// regalloc.
1707impl<M: ABIMachineSpec> Callee<M> {
1708 /// Compute the final frame layout, post-regalloc.
1709 ///
1710 /// This must be called before gen_prologue or gen_epilogue.
1711 pub fn compute_frame_layout(
1712 &mut self,
1713 sigs: &SigSet,
1714 spillslots: usize,
1715 clobbered: Vec<Writable<RealReg>>,
1716 ) {
1717 let bytes = M::word_bytes();
1718 let total_stacksize = self.stackslots_size + bytes * spillslots as u32;
1719 let mask = M::stack_align(self.call_conv) - 1;
1720 let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
1721 self.frame_layout = Some(M::compute_frame_layout(
1722 self.call_conv,
1723 &self.flags,
1724 self.signature(),
1725 &clobbered,
1726 self.is_leaf,
1727 self.stack_args_size(sigs),
1728 self.tail_args_size,
1729 total_stacksize,
1730 self.outgoing_args_size,
1731 ));
1732 }
1733
1734 /// Generate a prologue, post-regalloc.
1735 ///
1736 /// This should include any stack frame or other setup necessary to use the
1737 /// other methods (`load_arg`, `store_retval`, and spillslot accesses.)
1738 pub fn gen_prologue(&self) -> SmallInstVec<M::I> {
1739 let frame_layout = self.frame_layout();
1740 let mut insts = smallvec![];
1741
1742 // Set up frame.
1743 insts.extend(M::gen_prologue_frame_setup(
1744 self.call_conv,
1745 &self.flags,
1746 &self.isa_flags,
1747 &frame_layout,
1748 ));
1749
1750 // The stack limit check needs to cover all the stack adjustments we
1751 // might make, up to the next stack limit check in any function we
1752 // call. Since this happens after frame setup, the current function's
1753 // setup area needs to be accounted for in the caller's stack limit
1754 // check, but we need to account for any setup area that our callees
1755 // might need. Note that s390x may also use the outgoing args area for
1756 // backtrace support even in leaf functions, so that should be accounted
1757 // for unconditionally.
1758 let total_stacksize = (frame_layout.tail_args_size - frame_layout.incoming_args_size)
1759 + frame_layout.clobber_size
1760 + frame_layout.fixed_frame_storage_size
1761 + frame_layout.outgoing_args_size
1762 + if self.is_leaf {
1763 0
1764 } else {
1765 frame_layout.setup_area_size
1766 };
1767
1768 // Leaf functions with zero stack don't need a stack check if one's
1769 // specified, otherwise always insert the stack check.
1770 if total_stacksize > 0 || !self.is_leaf {
1771 if let Some((reg, stack_limit_load)) = &self.stack_limit {
1772 insts.extend(stack_limit_load.clone());
1773 self.insert_stack_check(*reg, total_stacksize, &mut insts);
1774 }
1775
1776 if self.flags.enable_probestack() {
1777 let guard_size = 1 << self.flags.probestack_size_log2();
1778 if total_stacksize >= guard_size {
1779 match self.flags.probestack_strategy() {
1780 ProbestackStrategy::Inline => M::gen_inline_probestack(
1781 &mut insts,
1782 self.call_conv,
1783 total_stacksize,
1784 guard_size,
1785 ),
1786 ProbestackStrategy::Outline => {
1787 M::gen_probestack(&mut insts, total_stacksize)
1788 }
1789 }
1790 }
1791 }
1792 }
1793
1794 // Save clobbered registers.
1795 insts.extend(M::gen_clobber_save(
1796 self.call_conv,
1797 &self.flags,
1798 &frame_layout,
1799 ));
1800
1801 insts
1802 }
1803
1804 /// Generate an epilogue, post-regalloc.
1805 ///
1806 /// Note that this must generate the actual return instruction (rather than
1807 /// emitting this in the lowering logic), because the epilogue code comes
1808 /// before the return and the two are likely closely related.
1809 pub fn gen_epilogue(&self) -> SmallInstVec<M::I> {
1810 let frame_layout = self.frame_layout();
1811 let mut insts = smallvec![];
1812
1813 // Restore clobbered registers.
1814 insts.extend(M::gen_clobber_restore(
1815 self.call_conv,
1816 &self.flags,
1817 &frame_layout,
1818 ));
1819
1820 // Tear down frame.
1821 insts.extend(M::gen_epilogue_frame_restore(
1822 self.call_conv,
1823 &self.flags,
1824 &self.isa_flags,
1825 &frame_layout,
1826 ));
1827
1828 // And return.
1829 insts.extend(M::gen_return(
1830 self.call_conv,
1831 &self.isa_flags,
1832 &frame_layout,
1833 ));
1834
1835 trace!("Epilogue: {:?}", insts);
1836 insts
1837 }
1838
1839 /// Return a reference to the computed frame layout information. This
1840 /// function will panic if it's called before [`Self::compute_frame_layout`].
1841 pub fn frame_layout(&self) -> &FrameLayout {
1842 self.frame_layout
1843 .as_ref()
1844 .expect("frame layout not computed before prologue generation")
1845 }
1846
1847 /// Returns the full frame size for the given function, after prologue
1848 /// emission has run. This comprises the spill slots and stack-storage
1849 /// slots as well as storage for clobbered callee-save registers, but
1850 /// not arguments arguments pushed at callsites within this function,
1851 /// or other ephemeral pushes.
1852 pub fn frame_size(&self) -> u32 {
1853 let frame_layout = self.frame_layout();
1854 frame_layout.clobber_size + frame_layout.fixed_frame_storage_size
1855 }
1856
1857 /// Returns offset from the slot base in the current frame to the caller's SP.
1858 pub fn slot_base_to_caller_sp_offset(&self) -> u32 {
1859 let frame_layout = self.frame_layout();
1860 frame_layout.clobber_size
1861 + frame_layout.fixed_frame_storage_size
1862 + frame_layout.setup_area_size
1863 }
1864
1865 /// Returns the size of arguments expected on the stack.
1866 pub fn stack_args_size(&self, sigs: &SigSet) -> u32 {
1867 sigs[self.sig].sized_stack_arg_space
1868 }
1869
1870 /// Get the spill-slot size.
1871 pub fn get_spillslot_size(&self, rc: RegClass) -> u32 {
1872 let max = if self.dynamic_type_sizes.len() == 0 {
1873 16
1874 } else {
1875 *self
1876 .dynamic_type_sizes
1877 .iter()
1878 .max_by(|x, y| x.1.cmp(&y.1))
1879 .map(|(_k, v)| v)
1880 .unwrap()
1881 };
1882 M::get_number_of_spillslots_for_value(rc, max, &self.isa_flags)
1883 }
1884
1885 /// Get the spill slot offset relative to the fixed allocation area start.
1886 pub fn get_spillslot_offset(&self, slot: SpillSlot) -> i64 {
1887 // Offset from beginning of spillslot area.
1888 let islot = slot.index() as i64;
1889 let spill_off = islot * M::word_bytes() as i64;
1890 let sp_off = self.stackslots_size as i64 + spill_off;
1891
1892 sp_off
1893 }
1894
1895 /// Generate a spill.
1896 pub fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> M::I {
1897 let ty = M::I::canonical_type_for_rc(from_reg.class());
1898 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
1899
1900 let sp_off = self.get_spillslot_offset(to_slot);
1901 trace!("gen_spill: {from_reg:?} into slot {to_slot:?} at offset {sp_off}");
1902
1903 let from = StackAMode::Slot(sp_off);
1904 <M>::gen_store_stack(from, Reg::from(from_reg), ty)
1905 }
1906
1907 /// Generate a reload (fill).
1908 pub fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> M::I {
1909 let ty = M::I::canonical_type_for_rc(to_reg.to_reg().class());
1910 debug_assert_eq!(<M>::I::rc_for_type(ty).unwrap().1, &[ty]);
1911
1912 let sp_off = self.get_spillslot_offset(from_slot);
1913 trace!("gen_reload: {to_reg:?} from slot {from_slot:?} at offset {sp_off}");
1914
1915 let from = StackAMode::Slot(sp_off);
1916 <M>::gen_load_stack(from, to_reg.map(Reg::from), ty)
1917 }
1918}
1919
1920/// An input argument to a call instruction: the vreg that is used,
1921/// and the preg it is constrained to (per the ABI).
1922#[derive(Clone, Debug)]
1923pub struct CallArgPair {
1924 /// The virtual register to use for the argument.
1925 pub vreg: Reg,
1926 /// The real register into which the arg goes.
1927 pub preg: Reg,
1928}
1929
1930/// An output return value from a call instruction: the vreg that is
1931/// defined, and the preg it is constrained to (per the ABI).
1932#[derive(Clone, Debug)]
1933pub struct CallRetPair {
1934 /// The virtual register to define from this return value.
1935 pub vreg: Writable<Reg>,
1936 /// The real register from which the return value is read.
1937 pub preg: Reg,
1938}
1939
1940pub type CallArgList = SmallVec<[CallArgPair; 8]>;
1941pub type CallRetList = SmallVec<[CallRetPair; 8]>;
1942
1943pub enum IsTailCall {
1944 Yes,
1945 No,
1946}
1947
1948/// ABI object for a callsite.
1949pub struct CallSite<M: ABIMachineSpec> {
1950 /// The called function's signature.
1951 sig: Sig,
1952 /// All register uses for the callsite, i.e., function args, with
1953 /// VReg and the physical register it is constrained to.
1954 uses: CallArgList,
1955 /// All defs for the callsite, i.e., return values.
1956 defs: CallRetList,
1957 /// Call destination.
1958 dest: CallDest,
1959 is_tail_call: IsTailCall,
1960 /// Caller's calling convention.
1961 caller_conv: isa::CallConv,
1962 /// The settings controlling this compilation.
1963 flags: settings::Flags,
1964
1965 _mach: PhantomData<M>,
1966}
1967
1968/// Destination for a call.
1969#[derive(Debug, Clone)]
1970pub enum CallDest {
1971 /// Call to an ExtName (named function symbol).
1972 ExtName(ir::ExternalName, RelocDistance),
1973 /// Indirect call to a function pointer in a register.
1974 Reg(Reg),
1975}
1976
1977impl<M: ABIMachineSpec> CallSite<M> {
1978 /// Create a callsite ABI object for a call directly to the specified function.
1979 pub fn from_func(
1980 sigs: &SigSet,
1981 sig_ref: ir::SigRef,
1982 extname: &ir::ExternalName,
1983 is_tail_call: IsTailCall,
1984 dist: RelocDistance,
1985 caller_conv: isa::CallConv,
1986 flags: settings::Flags,
1987 ) -> CallSite<M> {
1988 let sig = sigs.abi_sig_for_sig_ref(sig_ref);
1989 CallSite {
1990 sig,
1991 uses: smallvec![],
1992 defs: smallvec![],
1993 dest: CallDest::ExtName(extname.clone(), dist),
1994 is_tail_call,
1995 caller_conv,
1996 flags,
1997 _mach: PhantomData,
1998 }
1999 }
2000
2001 /// Create a callsite ABI object for a call directly to the specified
2002 /// libcall.
2003 pub fn from_libcall(
2004 sigs: &SigSet,
2005 sig: &ir::Signature,
2006 extname: &ir::ExternalName,
2007 dist: RelocDistance,
2008 caller_conv: isa::CallConv,
2009 flags: settings::Flags,
2010 ) -> CallSite<M> {
2011 let sig = sigs.abi_sig_for_signature(sig);
2012 CallSite {
2013 sig,
2014 uses: smallvec![],
2015 defs: smallvec![],
2016 dest: CallDest::ExtName(extname.clone(), dist),
2017 is_tail_call: IsTailCall::No,
2018 caller_conv,
2019 flags,
2020 _mach: PhantomData,
2021 }
2022 }
2023
2024 /// Create a callsite ABI object for a call to a function pointer with the
2025 /// given signature.
2026 pub fn from_ptr(
2027 sigs: &SigSet,
2028 sig_ref: ir::SigRef,
2029 ptr: Reg,
2030 is_tail_call: IsTailCall,
2031 caller_conv: isa::CallConv,
2032 flags: settings::Flags,
2033 ) -> CallSite<M> {
2034 let sig = sigs.abi_sig_for_sig_ref(sig_ref);
2035 CallSite {
2036 sig,
2037 uses: smallvec![],
2038 defs: smallvec![],
2039 dest: CallDest::Reg(ptr),
2040 is_tail_call,
2041 caller_conv,
2042 flags,
2043 _mach: PhantomData,
2044 }
2045 }
2046
2047 pub(crate) fn dest(&self) -> &CallDest {
2048 &self.dest
2049 }
2050
2051 pub(crate) fn take_uses(self) -> CallArgList {
2052 self.uses
2053 }
2054
2055 pub(crate) fn sig<'a>(&self, sigs: &'a SigSet) -> &'a SigData {
2056 &sigs[self.sig]
2057 }
2058
2059 pub(crate) fn is_tail_call(&self) -> bool {
2060 matches!(self.is_tail_call, IsTailCall::Yes)
2061 }
2062}
2063
2064impl<M: ABIMachineSpec> CallSite<M> {
2065 /// Get the number of arguments expected.
2066 pub fn num_args(&self, sigs: &SigSet) -> usize {
2067 sigs.num_args(self.sig)
2068 }
2069
2070 /// Get the number of return values expected.
2071 pub fn num_rets(&self, sigs: &SigSet) -> usize {
2072 sigs.num_rets(self.sig)
2073 }
2074
2075 /// Emit a copy of a large argument into its associated stack buffer, if
2076 /// any. We must be careful to perform all these copies (as necessary)
2077 /// before setting up the argument registers, since we may have to invoke
2078 /// memcpy(), which could clobber any registers already set up. The
2079 /// back-end should call this routine for all arguments before calling
2080 /// `gen_arg` for all arguments.
2081 pub fn emit_copy_regs_to_buffer(
2082 &self,
2083 ctx: &mut Lower<M::I>,
2084 idx: usize,
2085 from_regs: ValueRegs<Reg>,
2086 ) {
2087 match &ctx.sigs().args(self.sig)[idx] {
2088 &ABIArg::Slots { .. } | &ABIArg::ImplicitPtrArg { .. } => {}
2089 &ABIArg::StructArg { offset, size, .. } => {
2090 let src_ptr = from_regs.only_reg().unwrap();
2091 let dst_ptr = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
2092 ctx.emit(M::gen_get_stack_addr(
2093 StackAMode::OutgoingArg(offset),
2094 dst_ptr,
2095 ));
2096 // Emit a memcpy from `src_ptr` to `dst_ptr` of `size` bytes.
2097 // N.B.: because we process StructArg params *first*, this is
2098 // safe w.r.t. clobbers: we have not yet filled in any other
2099 // arg regs.
2100 let memcpy_call_conv =
2101 isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv);
2102 for insn in M::gen_memcpy(
2103 memcpy_call_conv,
2104 dst_ptr.to_reg(),
2105 src_ptr,
2106 size as usize,
2107 |ty| ctx.alloc_tmp(ty).only_reg().unwrap(),
2108 )
2109 .into_iter()
2110 {
2111 ctx.emit(insn);
2112 }
2113 }
2114 }
2115 }
2116
2117 /// Add a constraint for an argument value from a source register.
2118 /// For large arguments with associated stack buffer, this may
2119 /// load the address of the buffer into the argument register, if
2120 /// required by the ABI.
2121 pub fn gen_arg(&mut self, ctx: &mut Lower<M::I>, idx: usize, from_regs: ValueRegs<Reg>) {
2122 let stack_arg_space = ctx.sigs()[self.sig].sized_stack_arg_space;
2123 let stack_arg = if self.is_tail_call() {
2124 StackAMode::IncomingArg
2125 } else {
2126 |offset, _| StackAMode::OutgoingArg(offset)
2127 };
2128 let word_rc = M::word_reg_class();
2129 let word_bits = M::word_bits() as usize;
2130
2131 match ctx.sigs().args(self.sig)[idx].clone() {
2132 ABIArg::Slots { ref slots, .. } => {
2133 assert_eq!(from_regs.len(), slots.len());
2134 for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
2135 match slot {
2136 &ABIArgSlot::Reg {
2137 reg, ty, extension, ..
2138 } => {
2139 let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension);
2140 let vreg =
2141 if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
2142 assert_eq!(word_rc, reg.class());
2143 let signed = match ext {
2144 ir::ArgumentExtension::Uext => false,
2145 ir::ArgumentExtension::Sext => true,
2146 _ => unreachable!(),
2147 };
2148 let extend_result =
2149 ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
2150 ctx.emit(M::gen_extend(
2151 extend_result,
2152 *from_reg,
2153 signed,
2154 ty_bits(ty) as u8,
2155 word_bits as u8,
2156 ));
2157 extend_result.to_reg()
2158 } else {
2159 *from_reg
2160 };
2161
2162 let preg = reg.into();
2163 self.uses.push(CallArgPair { vreg, preg });
2164 }
2165 &ABIArgSlot::Stack {
2166 offset,
2167 ty,
2168 extension,
2169 ..
2170 } => {
2171 let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension);
2172 let (data, ty) =
2173 if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
2174 assert_eq!(word_rc, from_reg.class());
2175 let signed = match ext {
2176 ir::ArgumentExtension::Uext => false,
2177 ir::ArgumentExtension::Sext => true,
2178 _ => unreachable!(),
2179 };
2180 let extend_result =
2181 ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
2182 ctx.emit(M::gen_extend(
2183 extend_result,
2184 *from_reg,
2185 signed,
2186 ty_bits(ty) as u8,
2187 word_bits as u8,
2188 ));
2189 // Store the extended version.
2190 (extend_result.to_reg(), M::word_type())
2191 } else {
2192 (*from_reg, ty)
2193 };
2194 ctx.emit(M::gen_store_stack(
2195 stack_arg(offset, stack_arg_space),
2196 data,
2197 ty,
2198 ));
2199 }
2200 }
2201 }
2202 }
2203 ABIArg::StructArg { pointer, .. } => {
2204 assert!(pointer.is_none()); // Only supported via ISLE.
2205 }
2206 ABIArg::ImplicitPtrArg {
2207 offset,
2208 pointer,
2209 ty,
2210 purpose: _,
2211 } => {
2212 assert_eq!(from_regs.len(), 1);
2213 let vreg = from_regs.regs()[0];
2214 let amode = StackAMode::OutgoingArg(offset);
2215 let tmp = ctx.alloc_tmp(M::word_type()).only_reg().unwrap();
2216 ctx.emit(M::gen_get_stack_addr(amode, tmp));
2217 let tmp = tmp.to_reg();
2218 ctx.emit(M::gen_store_base_offset(tmp, 0, vreg, ty));
2219 match pointer {
2220 ABIArgSlot::Reg { reg, .. } => self.uses.push(CallArgPair {
2221 vreg: tmp,
2222 preg: reg.into(),
2223 }),
2224 ABIArgSlot::Stack { offset, .. } => ctx.emit(M::gen_store_stack(
2225 stack_arg(offset, stack_arg_space),
2226 tmp,
2227 M::word_type(),
2228 )),
2229 }
2230 }
2231 }
2232 }
2233
2234 /// Call `gen_arg` for each non-hidden argument and emit all instructions
2235 /// generated.
2236 pub fn emit_args(&mut self, ctx: &mut Lower<M::I>, (inputs, off): isle::ValueSlice) {
2237 let num_args = self.num_args(ctx.sigs());
2238 assert_eq!(inputs.len(&ctx.dfg().value_lists) - off, num_args);
2239
2240 let mut arg_value_regs: SmallVec<[_; 16]> = smallvec![];
2241 for i in 0..num_args {
2242 let input = inputs.get(off + i, &ctx.dfg().value_lists).unwrap();
2243 arg_value_regs.push(ctx.put_value_in_regs(input));
2244 }
2245 for (i, arg_regs) in arg_value_regs.iter().enumerate() {
2246 self.emit_copy_regs_to_buffer(ctx, i, *arg_regs);
2247 }
2248 for (i, value_regs) in arg_value_regs.iter().enumerate() {
2249 self.gen_arg(ctx, i, *value_regs);
2250 }
2251 }
2252
2253 /// Emit the code to forward a stack-return pointer argument through a tail
2254 /// call.
2255 pub fn emit_stack_ret_arg_for_tail_call(&mut self, ctx: &mut Lower<M::I>) {
2256 if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg() {
2257 let ret_area_ptr = ctx.abi().ret_area_ptr.expect(
2258 "if the tail callee has a return pointer, then the tail caller \
2259 must as well",
2260 );
2261 self.gen_arg(ctx, i.into(), ValueRegs::one(ret_area_ptr));
2262 }
2263 }
2264
2265 /// Define a return value after the call returns.
2266 pub fn gen_retval(
2267 &mut self,
2268 ctx: &mut Lower<M::I>,
2269 idx: usize,
2270 ) -> (SmallInstVec<M::I>, ValueRegs<Reg>) {
2271 let mut insts = smallvec![];
2272 let mut into_regs: SmallVec<[Reg; 2]> = smallvec![];
2273 let ret = ctx.sigs().rets(self.sig)[idx].clone();
2274 match ret {
2275 ABIArg::Slots { ref slots, .. } => {
2276 for slot in slots {
2277 match slot {
2278 // Extension mode doesn't matter because we're copying out, not in,
2279 // and we ignore high bits in our own registers by convention.
2280 &ABIArgSlot::Reg { reg, ty, .. } => {
2281 let into_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
2282 self.defs.push(CallRetPair {
2283 vreg: into_reg,
2284 preg: reg.into(),
2285 });
2286 into_regs.push(into_reg.to_reg());
2287 }
2288 &ABIArgSlot::Stack { offset, ty, .. } => {
2289 let into_reg = ctx.alloc_tmp(ty).only_reg().unwrap();
2290 let sig_data = &ctx.sigs()[self.sig];
2291 // The outgoing argument area must always be restored after a call,
2292 // ensuring that the return values will be in a consistent place after
2293 // any call.
2294 let ret_area_base = sig_data.sized_stack_arg_space();
2295 insts.push(M::gen_load_stack(
2296 StackAMode::OutgoingArg(offset + ret_area_base),
2297 into_reg,
2298 ty,
2299 ));
2300 into_regs.push(into_reg.to_reg());
2301 }
2302 }
2303 }
2304 }
2305 ABIArg::StructArg { .. } => {
2306 panic!("StructArg not supported in return position");
2307 }
2308 ABIArg::ImplicitPtrArg { .. } => {
2309 panic!("ImplicitPtrArg not supported in return position");
2310 }
2311 }
2312
2313 let value_regs = match *into_regs {
2314 [a] => ValueRegs::one(a),
2315 [a, b] => ValueRegs::two(a, b),
2316 _ => panic!("Expected to see one or two slots only from {ret:?}"),
2317 };
2318 (insts, value_regs)
2319 }
2320
2321 /// Emit the call itself.
2322 ///
2323 /// The returned instruction should have proper use- and def-sets according
2324 /// to the argument registers, return-value registers, and clobbered
2325 /// registers for this function signature in this ABI.
2326 ///
2327 /// (Arg registers are uses, and retval registers are defs. Clobbered
2328 /// registers are also logically defs, but should never be read; their
2329 /// values are "defined" (to the regalloc) but "undefined" in every other
2330 /// sense.)
2331 ///
2332 /// This function should only be called once, as it is allowed to re-use
2333 /// parts of the `CallSite` object in emitting instructions.
2334 pub fn emit_call(&mut self, ctx: &mut Lower<M::I>) {
2335 let word_type = M::word_type();
2336 if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg {
2337 let rd = ctx.alloc_tmp(word_type).only_reg().unwrap();
2338 let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space();
2339 ctx.emit(M::gen_get_stack_addr(
2340 StackAMode::OutgoingArg(ret_area_base),
2341 rd,
2342 ));
2343 self.gen_arg(ctx, i.into(), ValueRegs::one(rd.to_reg()));
2344 }
2345
2346 let uses = mem::take(&mut self.uses);
2347 let defs = mem::take(&mut self.defs);
2348 let clobbers = {
2349 // Get clobbers: all caller-saves. These may include return value
2350 // regs, which we will remove from the clobber set below.
2351 let mut clobbers = <M>::get_regs_clobbered_by_call(ctx.sigs()[self.sig].call_conv);
2352
2353 // Remove retval regs from clobbers.
2354 for def in &defs {
2355 clobbers.remove(PReg::from(def.preg.to_real_reg().unwrap()));
2356 }
2357
2358 clobbers
2359 };
2360
2361 let sig = &ctx.sigs()[self.sig];
2362 let callee_pop_size = if sig.call_conv() == isa::CallConv::Tail {
2363 // The tail calling convention has callees pop stack arguments.
2364 sig.sized_stack_arg_space
2365 } else {
2366 0
2367 };
2368
2369 let call_conv = sig.call_conv;
2370 let ret_space = sig.sized_stack_ret_space;
2371 let arg_space = sig.sized_stack_arg_space;
2372
2373 ctx.abi_mut()
2374 .accumulate_outgoing_args_size(ret_space + arg_space);
2375
2376 let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
2377
2378 // Any adjustment to SP to account for required outgoing arguments/stack return values must
2379 // be done inside of the call pseudo-op, to ensure that SP is always in a consistent
2380 // state for all other instructions. For example, if a tail-call abi function is called
2381 // here, the reclamation of the outgoing argument area must be done inside of the call
2382 // pseudo-op's emission to ensure that SP is consistent at all other points in the lowered
2383 // function. (Except the prologue and epilogue, but those are fairly special parts of the
2384 // function that establish the SP invariants that are relied on elsewhere and are generated
2385 // after the register allocator has run and thus cannot have register allocator-inserted
2386 // references to SP offsets.)
2387 for inst in M::gen_call(
2388 &self.dest,
2389 tmp,
2390 CallInfo {
2391 dest: (),
2392 uses,
2393 defs,
2394 clobbers,
2395 callee_conv: call_conv,
2396 caller_conv: self.caller_conv,
2397 callee_pop_size,
2398 },
2399 )
2400 .into_iter()
2401 {
2402 ctx.emit(inst);
2403 }
2404 }
2405}
2406
2407#[cfg(test)]
2408mod tests {
2409 use super::SigData;
2410
2411 #[test]
2412 fn sig_data_size() {
2413 // The size of `SigData` is performance sensitive, so make sure
2414 // we don't regress it unintentionally.
2415 assert_eq!(std::mem::size_of::<SigData>(), 24);
2416 }
2417}