cranelift_codegen/isa/x64/encoding/rex.rs
1//! Encodes instructions in the standard x86 encoding mode. This is called IA-32E mode in the Intel
2//! manuals but corresponds to the addition of the REX-prefix format (hence the name of this module)
3//! that allowed encoding instructions in both compatibility mode (32-bit instructions running on a
4//! 64-bit OS) and in 64-bit mode (using the full 64-bit address space).
5//!
6//! For all of the routines that take both a memory-or-reg operand (sometimes called "E" in the
7//! Intel documentation, see the Intel Developer's manual, vol. 2, section A.2) and a reg-only
8//! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following
9//! means "hardware register encoding number".
10
11use crate::machinst::{Reg, RegClass};
12use crate::{
13 isa::x64::inst::{
14 args::{Amode, OperandSize},
15 regs, Inst, LabelUse,
16 },
17 machinst::MachBuffer,
18};
19
20pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool {
21 let xs = (x as i32) as i64;
22 xs == ((xs << 56) >> 56)
23}
24
25pub(crate) fn low8_will_sign_extend_to_32(x: u32) -> bool {
26 let xs = x as i32;
27 xs == ((xs << 24) >> 24)
28}
29
30/// Encode the ModR/M byte.
31#[inline(always)]
32pub fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
33 debug_assert!(m0d < 4);
34 debug_assert!(enc_reg_g < 8);
35 debug_assert!(rm_e < 8);
36 ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
37}
38
39#[inline(always)]
40pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
41 debug_assert!(shift < 4);
42 debug_assert!(enc_index < 8);
43 debug_assert!(enc_base < 8);
44 ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
45}
46
47/// Get the encoding number of a GPR.
48#[inline(always)]
49pub(crate) fn int_reg_enc(reg: impl Into<Reg>) -> u8 {
50 let reg = reg.into();
51 debug_assert!(reg.is_real(), "reg = {reg:?}");
52 debug_assert_eq!(reg.class(), RegClass::Int);
53 reg.to_real_reg().unwrap().hw_enc()
54}
55
56/// Get the encoding number of any register.
57#[inline(always)]
58pub(crate) fn reg_enc(reg: impl Into<Reg>) -> u8 {
59 let reg = reg.into();
60 debug_assert!(reg.is_real());
61 reg.to_real_reg().unwrap().hw_enc()
62}
63
64/// A small bit field to record a REX prefix specification:
65/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
66/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
67#[repr(transparent)]
68#[derive(Clone, Copy)]
69pub struct RexFlags(u8);
70
71impl RexFlags {
72 /// By default, set the W field, and don't always emit.
73 #[inline(always)]
74 pub fn set_w() -> Self {
75 Self(0)
76 }
77
78 /// Creates a new RexPrefix for which the REX.W bit will be cleared.
79 #[inline(always)]
80 pub fn clear_w() -> Self {
81 Self(1)
82 }
83
84 /// Require that the REX prefix is emitted.
85 #[inline(always)]
86 pub fn always_emit(&mut self) -> &mut Self {
87 self.0 = self.0 | 2;
88 self
89 }
90
91 /// Emit the rex prefix if the referenced register would require it for 8-bit operations.
92 #[inline(always)]
93 pub fn always_emit_if_8bit_needed(&mut self, reg: Reg) -> &mut Self {
94 let enc_reg = int_reg_enc(reg);
95 if enc_reg >= 4 && enc_reg <= 7 {
96 self.always_emit();
97 }
98 self
99 }
100
101 /// True if 64-bit operands are used.
102 #[inline(always)]
103 pub fn must_clear_w(&self) -> bool {
104 (self.0 & 1) != 0
105 }
106
107 /// True if the REX prefix must always be emitted.
108 #[inline(always)]
109 pub fn must_always_emit(&self) -> bool {
110 (self.0 & 2) != 0
111 }
112
113 /// Emit a unary instruction.
114 #[inline(always)]
115 pub fn emit_one_op(&self, sink: &mut MachBuffer<Inst>, enc_e: u8) {
116 // Register Operand coded in Opcode Byte
117 // REX.R and REX.X unused
118 // REX.B == 1 accesses r8-r15
119 let w = if self.must_clear_w() { 0 } else { 1 };
120 let r = 0;
121 let x = 0;
122 let b = (enc_e >> 3) & 1;
123 let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
124 if rex != 0x40 || self.must_always_emit() {
125 sink.put1(rex);
126 }
127 }
128
129 /// Emit a binary instruction.
130 #[inline(always)]
131 pub fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
132 let w = if self.must_clear_w() { 0 } else { 1 };
133 let r = (enc_g >> 3) & 1;
134 let x = 0;
135 let b = (enc_e >> 3) & 1;
136 let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
137 if rex != 0x40 || self.must_always_emit() {
138 sink.put1(rex);
139 }
140 }
141
142 /// Emit a ternary instruction.
143 #[inline(always)]
144 pub fn emit_three_op(
145 &self,
146 sink: &mut MachBuffer<Inst>,
147 enc_g: u8,
148 enc_index: u8,
149 enc_base: u8,
150 ) {
151 let w = if self.must_clear_w() { 0 } else { 1 };
152 let r = (enc_g >> 3) & 1;
153 let x = (enc_index >> 3) & 1;
154 let b = (enc_base >> 3) & 1;
155 let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
156 if rex != 0x40 || self.must_always_emit() {
157 sink.put1(rex);
158 }
159 }
160}
161
162/// Generate the proper Rex flags for the given operand size.
163impl From<OperandSize> for RexFlags {
164 fn from(size: OperandSize) -> Self {
165 match size {
166 OperandSize::Size64 => RexFlags::set_w(),
167 _ => RexFlags::clear_w(),
168 }
169 }
170}
171/// Generate Rex flags for an OperandSize/register tuple.
172impl From<(OperandSize, Reg)> for RexFlags {
173 fn from((size, reg): (OperandSize, Reg)) -> Self {
174 let mut rex = RexFlags::from(size);
175 if size == OperandSize::Size8 {
176 rex.always_emit_if_8bit_needed(reg);
177 }
178 rex
179 }
180}
181
182/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction
183/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details.
184#[allow(missing_docs)]
185#[derive(PartialEq)]
186pub enum OpcodeMap {
187 None,
188 _0F,
189 _0F38,
190 _0F3A,
191}
192
193impl OpcodeMap {
194 /// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding
195 /// formats pack this information as bits in a prefix (e.g. VEX / EVEX).
196 pub(crate) fn bits(&self) -> u8 {
197 match self {
198 OpcodeMap::None => 0b00,
199 OpcodeMap::_0F => 0b01,
200 OpcodeMap::_0F38 => 0b10,
201 OpcodeMap::_0F3A => 0b11,
202 }
203 }
204}
205
206impl Default for OpcodeMap {
207 fn default() -> Self {
208 Self::None
209 }
210}
211
212/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
213/// covers only the small set of possibilities that we actually need.
214#[derive(PartialEq)]
215pub enum LegacyPrefixes {
216 /// No prefix bytes.
217 None,
218 /// Operand Size Override -- here, denoting "16-bit operation".
219 _66,
220 /// The Lock prefix.
221 _F0,
222 /// Operand size override and Lock.
223 _66F0,
224 /// REPNE, but no specific meaning here -- is just an opcode extension.
225 _F2,
226 /// REP/REPE, but no specific meaning here -- is just an opcode extension.
227 _F3,
228 /// Operand size override and same effect as F3.
229 _66F3,
230}
231
232impl LegacyPrefixes {
233 /// Emit the legacy prefix as bytes (e.g. in REX instructions).
234 #[inline(always)]
235 pub(crate) fn emit(&self, sink: &mut MachBuffer<Inst>) {
236 match self {
237 Self::_66 => sink.put1(0x66),
238 Self::_F0 => sink.put1(0xF0),
239 Self::_66F0 => {
240 // I don't think the order matters, but in any case, this is the same order that
241 // the GNU assembler uses.
242 sink.put1(0x66);
243 sink.put1(0xF0);
244 }
245 Self::_F2 => sink.put1(0xF2),
246 Self::_F3 => sink.put1(0xF3),
247 Self::_66F3 => {
248 sink.put1(0x66);
249 sink.put1(0xF3);
250 }
251 Self::None => (),
252 }
253 }
254
255 /// Emit the legacy prefix as bits (e.g. for EVEX instructions).
256 #[inline(always)]
257 pub(crate) fn bits(&self) -> u8 {
258 match self {
259 Self::None => 0b00,
260 Self::_66 => 0b01,
261 Self::_F3 => 0b10,
262 Self::_F2 => 0b11,
263 _ => panic!(
264 "VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2"
265 ),
266 }
267 }
268}
269
270impl Default for LegacyPrefixes {
271 fn default() -> Self {
272 Self::None
273 }
274}
275
276/// This is the core 'emit' function for instructions that reference memory.
277///
278/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
279/// create and emit:
280/// - first the legacy prefixes, if any
281/// - then the REX prefix, if needed
282/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
283/// - then the MOD/RM byte,
284/// - then optionally, a SIB byte,
285/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
286///
287/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
288/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
289/// instructions will require their own emitter functions.
290///
291/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
292///
293/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
294/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
295/// 0xF3_0F_27 and `num_opcodes` == 3.
296///
297/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
298/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
299/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
300/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
301/// indicate a 64-bit operation.
302pub(crate) fn emit_std_enc_mem(
303 sink: &mut MachBuffer<Inst>,
304 prefixes: LegacyPrefixes,
305 opcodes: u32,
306 mut num_opcodes: usize,
307 enc_g: u8,
308 mem_e: &Amode,
309 rex: RexFlags,
310 bytes_at_end: u8,
311) {
312 // General comment for this function: the registers in `mem_e` must be
313 // 64-bit integer registers, because they are part of an address
314 // expression. But `enc_g` can be derived from a register of any class.
315
316 if let Some(trap_code) = mem_e.get_flags().trap_code() {
317 sink.add_trap(trap_code);
318 }
319
320 prefixes.emit(sink);
321
322 // After prefixes, first emit the REX byte depending on the kind of
323 // addressing mode that's being used.
324 match *mem_e {
325 Amode::ImmReg { base, .. } => {
326 let enc_e = int_reg_enc(base);
327 rex.emit_two_op(sink, enc_g, enc_e);
328 }
329
330 Amode::ImmRegRegShift {
331 base: reg_base,
332 index: reg_index,
333 ..
334 } => {
335 let enc_base = int_reg_enc(*reg_base);
336 let enc_index = int_reg_enc(*reg_index);
337 rex.emit_three_op(sink, enc_g, enc_index, enc_base);
338 }
339
340 Amode::RipRelative { .. } => {
341 // note REX.B = 0.
342 rex.emit_two_op(sink, enc_g, 0);
343 }
344 }
345
346 // Now the opcode(s). These include any other prefixes the caller
347 // hands to us.
348 while num_opcodes > 0 {
349 num_opcodes -= 1;
350 sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
351 }
352
353 // And finally encode the mod/rm bytes and all further information.
354 emit_modrm_sib_disp(sink, enc_g, mem_e, bytes_at_end, None)
355}
356
357pub(crate) fn emit_modrm_sib_disp(
358 sink: &mut MachBuffer<Inst>,
359 enc_g: u8,
360 mem_e: &Amode,
361 bytes_at_end: u8,
362 evex_scaling: Option<i8>,
363) {
364 match *mem_e {
365 Amode::ImmReg { simm32, base, .. } => {
366 let enc_e = int_reg_enc(base);
367 let mut imm = Imm::new(simm32, evex_scaling);
368
369 // Most base registers allow for a single ModRM byte plus an
370 // optional immediate. If rsp is the base register, however, then a
371 // SIB byte must be used.
372 let enc_e_low3 = enc_e & 7;
373 if enc_e_low3 != regs::ENC_RSP {
374 // If the base register is rbp and there's no offset then force
375 // a 1-byte zero offset since otherwise the encoding would be
376 // invalid.
377 if enc_e_low3 == regs::ENC_RBP {
378 imm.force_immediate();
379 }
380 sink.put1(encode_modrm(imm.m0d(), enc_g & 7, enc_e & 7));
381 imm.emit(sink);
382 } else {
383 // Displacement from RSP is encoded with a SIB byte where
384 // the index and base are both encoded as RSP's encoding of
385 // 0b100. This special encoding means that the index register
386 // isn't used and the base is 0b100 with or without a
387 // REX-encoded 4th bit (e.g. rsp or r12)
388 sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
389 sink.put1(0b00_100_100);
390 imm.emit(sink);
391 }
392 }
393
394 Amode::ImmRegRegShift {
395 simm32,
396 base: reg_base,
397 index: reg_index,
398 shift,
399 ..
400 } => {
401 let enc_base = int_reg_enc(*reg_base);
402 let enc_index = int_reg_enc(*reg_index);
403
404 // Encoding of ModRM/SIB bytes don't allow the index register to
405 // ever be rsp. Note, though, that the encoding of r12, whose three
406 // lower bits match the encoding of rsp, is explicitly allowed with
407 // REX bytes so only rsp is disallowed.
408 assert!(enc_index != regs::ENC_RSP);
409
410 // If the offset is zero then there is no immediate. Note, though,
411 // that if the base register's lower three bits are `101` then an
412 // offset must be present. This is a special case in the encoding of
413 // the SIB byte and requires an explicit displacement with rbp/r13.
414 let mut imm = Imm::new(simm32, evex_scaling);
415 if enc_base & 7 == regs::ENC_RBP {
416 imm.force_immediate();
417 }
418
419 // With the above determined encode the ModRM byte, then the SIB
420 // byte, then any immediate as necessary.
421 sink.put1(encode_modrm(imm.m0d(), enc_g & 7, 0b100));
422 sink.put1(encode_sib(shift, enc_index & 7, enc_base & 7));
423 imm.emit(sink);
424 }
425
426 Amode::RipRelative { ref target } => {
427 // RIP-relative is mod=00, rm=101.
428 sink.put1(encode_modrm(0b00, enc_g & 7, 0b101));
429
430 let offset = sink.cur_offset();
431 sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
432 // N.B.: some instructions (XmmRmRImm format for example)
433 // have bytes *after* the RIP-relative offset. The
434 // addressed location is relative to the end of the
435 // instruction, but the relocation is nominally relative
436 // to the end of the u32 field. So, to compensate for
437 // this, we emit a negative extra offset in the u32 field
438 // initially, and the relocation will add to it.
439 sink.put4(-(i32::from(bytes_at_end)) as u32);
440 }
441 }
442}
443
444#[derive(Copy, Clone)]
445enum Imm {
446 None,
447 Imm8(i8),
448 Imm32(i32),
449}
450
451impl Imm {
452 /// Classifies the 32-bit immediate `val` as how this can be encoded
453 /// with ModRM/SIB bytes.
454 ///
455 /// For `evex_scaling` according to Section 2.7.5 of Intel's manual:
456 ///
457 /// > EVEX-encoded instructions always use a compressed displacement scheme
458 /// > by multiplying disp8 in conjunction with a scaling factor N that is
459 /// > determined based on the vector length, the value of EVEX.b bit
460 /// > (embedded broadcast) and the input element size of the instruction
461 ///
462 /// The `evex_scaling` factor provided here is `Some(N)` for EVEX
463 /// instructions. This is taken into account where the `Imm` value
464 /// contained is the raw byte offset.
465 fn new(val: i32, evex_scaling: Option<i8>) -> Imm {
466 if val == 0 {
467 return Imm::None;
468 }
469 match evex_scaling {
470 Some(scaling) => {
471 if val % i32::from(scaling) == 0 {
472 let scaled = val / i32::from(scaling);
473 if low8_will_sign_extend_to_32(scaled as u32) {
474 return Imm::Imm8(scaled as i8);
475 }
476 }
477 Imm::Imm32(val)
478 }
479 None => match i8::try_from(val) {
480 Ok(val) => Imm::Imm8(val),
481 Err(_) => Imm::Imm32(val),
482 },
483 }
484 }
485
486 /// Forces `Imm::None` to become `Imm::Imm8(0)`, used for special cases
487 /// where some base registers require an immediate.
488 fn force_immediate(&mut self) {
489 if let Imm::None = self {
490 *self = Imm::Imm8(0);
491 }
492 }
493
494 /// Returns the two "mod" bits present at the upper bits of the mod/rm
495 /// byte.
496 fn m0d(&self) -> u8 {
497 match self {
498 Imm::None => 0b00,
499 Imm::Imm8(_) => 0b01,
500 Imm::Imm32(_) => 0b10,
501 }
502 }
503
504 fn emit(&self, sink: &mut MachBuffer<Inst>) {
505 match self {
506 Imm::None => {}
507 Imm::Imm8(n) => sink.put1(*n as u8),
508 Imm::Imm32(n) => sink.put4(*n as u32),
509 }
510 }
511}
512
513/// This is the core 'emit' function for instructions that do not reference memory.
514///
515/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
516/// operand is a register rather than memory. Hence it is much simpler.
517pub(crate) fn emit_std_enc_enc(
518 sink: &mut MachBuffer<Inst>,
519 prefixes: LegacyPrefixes,
520 opcodes: u32,
521 mut num_opcodes: usize,
522 enc_g: u8,
523 enc_e: u8,
524 rex: RexFlags,
525) {
526 // EncG and EncE can be derived from registers of any class, and they
527 // don't even have to be from the same class. For example, for an
528 // integer-to-FP conversion insn, one might be RegClass::I64 and the other
529 // RegClass::V128.
530
531 // The legacy prefixes.
532 prefixes.emit(sink);
533
534 // The rex byte.
535 rex.emit_two_op(sink, enc_g, enc_e);
536
537 // All other prefixes and opcodes.
538 while num_opcodes > 0 {
539 num_opcodes -= 1;
540 sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
541 }
542
543 // Now the mod/rm byte. The instruction we're generating doesn't access
544 // memory, so there is no SIB byte or immediate -- we're done.
545 sink.put1(encode_modrm(0b11, enc_g & 7, enc_e & 7));
546}
547
548// These are merely wrappers for the above two functions that facilitate passing
549// actual `Reg`s rather than their encodings.
550
551pub(crate) fn emit_std_reg_mem(
552 sink: &mut MachBuffer<Inst>,
553 prefixes: LegacyPrefixes,
554 opcodes: u32,
555 num_opcodes: usize,
556 reg_g: Reg,
557 mem_e: &Amode,
558 rex: RexFlags,
559 bytes_at_end: u8,
560) {
561 let enc_g = reg_enc(reg_g);
562 emit_std_enc_mem(
563 sink,
564 prefixes,
565 opcodes,
566 num_opcodes,
567 enc_g,
568 mem_e,
569 rex,
570 bytes_at_end,
571 );
572}
573
574pub(crate) fn emit_std_reg_reg(
575 sink: &mut MachBuffer<Inst>,
576 prefixes: LegacyPrefixes,
577 opcodes: u32,
578 num_opcodes: usize,
579 reg_g: Reg,
580 reg_e: Reg,
581 rex: RexFlags,
582) {
583 let enc_g = reg_enc(reg_g);
584 let enc_e = reg_enc(reg_e);
585 emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
586}
587
588/// Write a suitable number of bits from an imm64 to the sink.
589pub(crate) fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
590 match size {
591 8 | 4 => sink.put4(simm32),
592 2 => sink.put2(simm32 as u16),
593 1 => sink.put1(simm32 as u8),
594 _ => unreachable!(),
595 }
596}