Skip to content

Commit 51c351f

Browse files
authored
[X86][MC] Support decoding of EGPR for APX (#72102)
#70958 adds registers R16-R31 (EGPR), this patch 1. Supports decoding of EGPR for instruction w/ REX2 prefix 2. Supports decoding of EGPR for instruction w/ EVEX prefix For simplicity's sake, we 1. Simulate the REX prefix w/ the 1st payload of REX2 2. Simulate the REX2 prefix w/ the 2nd and 3rd payloads of EVEX RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 Explanations for some changes: 1. invalid-EVEX-R2.txt is deleted b/c `0x62 0xe1 0xff 0x08 0x79 0xc0` is valid and decoded to `vcvtsd2usi %xmm0, %r16` now. 2. One line in x86-64-err.txt is removed b/c APX relaxes the limitation of the 1st and 2nd payloads of EVEX prefix, so the error message changes
1 parent 8434b0b commit 51c351f

File tree

7 files changed

+988
-175
lines changed

7 files changed

+988
-175
lines changed

llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp

+84-42
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206206
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
207207
}
208208

209+
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
210+
return insn->mode == MODE_64BIT && prefix == 0xd5;
211+
}
212+
209213
// Consumes all of an instruction's prefix bytes, and marks the
210214
// instruction as having them. Also sets the instruction's default operand,
211215
// address, and other relevant data sizes to report operands correctly.
@@ -337,8 +341,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
337341
return -1;
338342
}
339343

340-
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
341-
((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
344+
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
342345
insn->vectorExtensionType = TYPE_EVEX;
343346
} else {
344347
--insn->readerCursor; // unconsume byte1
@@ -357,13 +360,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
357360
return -1;
358361
}
359362

360-
// We simulate the REX prefix for simplicity's sake
361363
if (insn->mode == MODE_64BIT) {
364+
// We simulate the REX prefix for simplicity's sake
362365
insn->rexPrefix = 0x40 |
363366
(wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
364367
(rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
365368
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
366369
(bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
370+
371+
// We simulate the REX2 prefix for simplicity's sake
372+
insn->rex2ExtensionPrefix[1] =
373+
(r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
374+
(x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
375+
(b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
367376
}
368377

369378
LLVM_DEBUG(
@@ -474,6 +483,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
474483
insn->vectorExtensionPrefix[1],
475484
insn->vectorExtensionPrefix[2]));
476485
}
486+
} else if (isREX2(insn, byte)) {
487+
uint8_t byte1;
488+
if (peek(insn, byte1)) {
489+
LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
490+
return -1;
491+
}
492+
insn->rex2ExtensionPrefix[0] = byte;
493+
consume(insn, insn->rex2ExtensionPrefix[1]);
494+
495+
// We simulate the REX prefix for simplicity's sake
496+
insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
497+
(rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
498+
(xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
499+
(bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
500+
LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
501+
insn->rex2ExtensionPrefix[0],
502+
insn->rex2ExtensionPrefix[1]));
477503
} else if (isREX(insn, byte)) {
478504
if (peek(insn, nextByte))
479505
return -1;
@@ -532,7 +558,8 @@ static int readSIB(struct InternalInstruction *insn) {
532558
if (consume(insn, insn->sib))
533559
return -1;
534560

535-
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
561+
index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
562+
(x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
536563

537564
if (index == 0x4) {
538565
insn->sibIndex = SIB_INDEX_NONE;
@@ -542,7 +569,8 @@ static int readSIB(struct InternalInstruction *insn) {
542569

543570
insn->sibScale = 1 << scaleFromSIB(insn->sib);
544571

545-
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
572+
base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
573+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
546574

547575
switch (base) {
548576
case 0x5:
@@ -604,7 +632,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
604632

605633
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
606634
static int readModRM(struct InternalInstruction *insn) {
607-
uint8_t mod, rm, reg, evexrm;
635+
uint8_t mod, rm, reg;
608636
LLVM_DEBUG(dbgs() << "readModRM()");
609637

610638
if (insn->consumedModRM)
@@ -636,14 +664,13 @@ static int readModRM(struct InternalInstruction *insn) {
636664
break;
637665
}
638666

639-
reg |= rFromREX(insn->rexPrefix) << 3;
640-
rm |= bFromREX(insn->rexPrefix) << 3;
667+
reg |= (rFromREX(insn->rexPrefix) << 3) |
668+
(r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
669+
rm |= (bFromREX(insn->rexPrefix) << 3) |
670+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
641671

642-
evexrm = 0;
643-
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
672+
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
644673
reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
645-
evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
646-
}
647674

648675
insn->reg = (Reg)(insn->regBase + reg);
649676

@@ -731,7 +758,7 @@ static int readModRM(struct InternalInstruction *insn) {
731758
break;
732759
case 0x3:
733760
insn->eaDisplacement = EA_DISP_NONE;
734-
insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
761+
insn->eaBase = (EABase)(insn->eaRegBase + rm);
735762
break;
736763
}
737764
break;
@@ -741,7 +768,7 @@ static int readModRM(struct InternalInstruction *insn) {
741768
return 0;
742769
}
743770

744-
#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
771+
#define GENERIC_FIXUP_FUNC(name, base, prefix) \
745772
static uint16_t name(struct InternalInstruction *insn, OperandType type, \
746773
uint8_t index, uint8_t *valid) { \
747774
*valid = 1; \
@@ -753,28 +780,15 @@ static int readModRM(struct InternalInstruction *insn) {
753780
case TYPE_Rv: \
754781
return base + index; \
755782
case TYPE_R8: \
756-
index &= mask; \
757-
if (index > 0xf) \
758-
*valid = 0; \
759-
if (insn->rexPrefix && index >= 4 && index <= 7) { \
783+
if (insn->rexPrefix && index >= 4 && index <= 7) \
760784
return prefix##_SPL + (index - 4); \
761-
} else { \
785+
else \
762786
return prefix##_AL + index; \
763-
} \
764787
case TYPE_R16: \
765-
index &= mask; \
766-
if (index > 0xf) \
767-
*valid = 0; \
768788
return prefix##_AX + index; \
769789
case TYPE_R32: \
770-
index &= mask; \
771-
if (index > 0xf) \
772-
*valid = 0; \
773790
return prefix##_EAX + index; \
774791
case TYPE_R64: \
775-
index &= mask; \
776-
if (index > 0xf) \
777-
*valid = 0; \
778792
return prefix##_RAX + index; \
779793
case TYPE_ZMM: \
780794
return prefix##_ZMM0 + index; \
@@ -824,8 +838,8 @@ static int readModRM(struct InternalInstruction *insn) {
824838
// @param valid - The address of a uint8_t. The target is set to 1 if the
825839
// field is valid for the register class; 0 if not.
826840
// @return - The proper value.
827-
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
828-
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
841+
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
842+
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
829843

830844
// Consult an operand specifier to determine which of the fixup*Value functions
831845
// to use in correcting readModRM()'ss interpretation.
@@ -855,8 +869,31 @@ static int fixupReg(struct InternalInstruction *insn,
855869
if (!valid)
856870
return -1;
857871
break;
858-
case ENCODING_SIB:
859872
CASE_ENCODING_RM:
873+
if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
874+
modFromModRM(insn->modRM) == 3) {
875+
// EVEX_X can extend the register id to 32 for a non-GPR register that is
876+
// encoded in RM.
877+
// mode : MODE_64_BIT
878+
// Only 8 vector registers are available in 32 bit mode
879+
// mod : 3
880+
// RM encodes a register
881+
switch (op->type) {
882+
case TYPE_Rv:
883+
case TYPE_R8:
884+
case TYPE_R16:
885+
case TYPE_R32:
886+
case TYPE_R64:
887+
break;
888+
default:
889+
insn->eaBase =
890+
(EABase)(insn->eaBase +
891+
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
892+
break;
893+
}
894+
}
895+
[[fallthrough]];
896+
case ENCODING_SIB:
860897
if (insn->eaBase >= insn->eaRegBase) {
861898
insn->eaBase = (EABase)fixupRMValue(
862899
insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
@@ -945,6 +982,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
945982
insn->opcodeType = XOPA_MAP;
946983
return consume(insn, insn->opcode);
947984
}
985+
} else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
986+
// m bit indicates opcode map 1
987+
insn->opcodeType = TWOBYTE;
988+
return consume(insn, insn->opcode);
948989
}
949990

950991
if (consume(insn, current))
@@ -1388,10 +1429,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
13881429
if (size == 0)
13891430
size = insn->registerSize;
13901431

1432+
auto setOpcodeRegister = [&](unsigned base) {
1433+
insn->opcodeRegister =
1434+
(Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
1435+
(b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
1436+
(insn->opcode & 7)));
1437+
};
1438+
13911439
switch (size) {
13921440
case 1:
1393-
insn->opcodeRegister = (Reg)(
1394-
MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1441+
setOpcodeRegister(MODRM_REG_AL);
13951442
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
13961443
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
13971444
insn->opcodeRegister =
@@ -1400,18 +1447,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
14001447

14011448
break;
14021449
case 2:
1403-
insn->opcodeRegister = (Reg)(
1404-
MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1450+
setOpcodeRegister(MODRM_REG_AX);
14051451
break;
14061452
case 4:
1407-
insn->opcodeRegister =
1408-
(Reg)(MODRM_REG_EAX +
1409-
((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1453+
setOpcodeRegister(MODRM_REG_EAX);
14101454
break;
14111455
case 8:
1412-
insn->opcodeRegister =
1413-
(Reg)(MODRM_REG_RAX +
1414-
((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
1456+
setOpcodeRegister(MODRM_REG_RAX);
14151457
break;
14161458
}
14171459

0 commit comments

Comments
 (0)