@@ -206,6 +206,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
206
206
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f ;
207
207
}
208
208
209
+ static bool isREX2 (struct InternalInstruction *insn, uint8_t prefix) {
210
+ return insn->mode == MODE_64BIT && prefix == 0xd5 ;
211
+ }
212
+
209
213
// Consumes all of an instruction's prefix bytes, and marks the
210
214
// instruction as having them. Also sets the instruction's default operand,
211
215
// address, and other relevant data sizes to report operands correctly.
@@ -337,8 +341,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
337
341
return -1 ;
338
342
}
339
343
340
- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 ) &&
341
- ((~byte1 & 0x8 ) == 0x8 ) && ((byte2 & 0x4 ) == 0x4 )) {
344
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0 ) == 0xc0 )) {
342
345
insn->vectorExtensionType = TYPE_EVEX;
343
346
} else {
344
347
--insn->readerCursor ; // unconsume byte1
@@ -357,13 +360,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
357
360
return -1 ;
358
361
}
359
362
360
- // We simulate the REX prefix for simplicity's sake
361
363
if (insn->mode == MODE_64BIT) {
364
+ // We simulate the REX prefix for simplicity's sake
362
365
insn->rexPrefix = 0x40 |
363
366
(wFromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 3 ) |
364
367
(rFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 2 ) |
365
368
(xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 1 ) |
366
369
(bFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 0 );
370
+
371
+ // We simulate the REX2 prefix for simplicity's sake
372
+ insn->rex2ExtensionPrefix [1 ] =
373
+ (r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 6 ) |
374
+ (x2FromEVEX3of4 (insn->vectorExtensionPrefix [2 ]) << 5 ) |
375
+ (b2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 );
367
376
}
368
377
369
378
LLVM_DEBUG (
@@ -474,6 +483,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
474
483
insn->vectorExtensionPrefix [1 ],
475
484
insn->vectorExtensionPrefix [2 ]));
476
485
}
486
+ } else if (isREX2 (insn, byte)) {
487
+ uint8_t byte1;
488
+ if (peek (insn, byte1)) {
489
+ LLVM_DEBUG (dbgs () << " Couldn't read second byte of REX2" );
490
+ return -1 ;
491
+ }
492
+ insn->rex2ExtensionPrefix [0 ] = byte;
493
+ consume (insn, insn->rex2ExtensionPrefix [1 ]);
494
+
495
+ // We simulate the REX prefix for simplicity's sake
496
+ insn->rexPrefix = 0x40 | (wFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 3 ) |
497
+ (rFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 2 ) |
498
+ (xFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 1 ) |
499
+ (bFromREX2 (insn->rex2ExtensionPrefix [1 ]) << 0 );
500
+ LLVM_DEBUG (dbgs () << format (" Found REX2 prefix 0x%hhx 0x%hhx" ,
501
+ insn->rex2ExtensionPrefix [0 ],
502
+ insn->rex2ExtensionPrefix [1 ]));
477
503
} else if (isREX (insn, byte)) {
478
504
if (peek (insn, nextByte))
479
505
return -1 ;
@@ -532,7 +558,8 @@ static int readSIB(struct InternalInstruction *insn) {
532
558
if (consume (insn, insn->sib ))
533
559
return -1 ;
534
560
535
- index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 );
561
+ index = indexFromSIB (insn->sib ) | (xFromREX (insn->rexPrefix ) << 3 ) |
562
+ (x2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
536
563
537
564
if (index == 0x4 ) {
538
565
insn->sibIndex = SIB_INDEX_NONE;
@@ -542,7 +569,8 @@ static int readSIB(struct InternalInstruction *insn) {
542
569
543
570
insn->sibScale = 1 << scaleFromSIB (insn->sib );
544
571
545
- base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 );
572
+ base = baseFromSIB (insn->sib ) | (bFromREX (insn->rexPrefix ) << 3 ) |
573
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
546
574
547
575
switch (base) {
548
576
case 0x5 :
@@ -604,7 +632,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
604
632
605
633
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
606
634
static int readModRM (struct InternalInstruction *insn) {
607
- uint8_t mod, rm, reg, evexrm ;
635
+ uint8_t mod, rm, reg;
608
636
LLVM_DEBUG (dbgs () << " readModRM()" );
609
637
610
638
if (insn->consumedModRM )
@@ -636,14 +664,13 @@ static int readModRM(struct InternalInstruction *insn) {
636
664
break ;
637
665
}
638
666
639
- reg |= rFromREX (insn->rexPrefix ) << 3 ;
640
- rm |= bFromREX (insn->rexPrefix ) << 3 ;
667
+ reg |= (rFromREX (insn->rexPrefix ) << 3 ) |
668
+ (r2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
669
+ rm |= (bFromREX (insn->rexPrefix ) << 3 ) |
670
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 );
641
671
642
- evexrm = 0 ;
643
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
672
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
644
673
reg |= r2FromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
645
- evexrm = xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ;
646
- }
647
674
648
675
insn->reg = (Reg)(insn->regBase + reg);
649
676
@@ -731,7 +758,7 @@ static int readModRM(struct InternalInstruction *insn) {
731
758
break ;
732
759
case 0x3 :
733
760
insn->eaDisplacement = EA_DISP_NONE;
734
- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm );
761
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
735
762
break ;
736
763
}
737
764
break ;
@@ -741,7 +768,7 @@ static int readModRM(struct InternalInstruction *insn) {
741
768
return 0 ;
742
769
}
743
770
744
- #define GENERIC_FIXUP_FUNC (name, base, prefix, mask ) \
771
+ #define GENERIC_FIXUP_FUNC (name, base, prefix ) \
745
772
static uint16_t name (struct InternalInstruction *insn, OperandType type, \
746
773
uint8_t index, uint8_t *valid) { \
747
774
*valid = 1 ; \
@@ -753,28 +780,15 @@ static int readModRM(struct InternalInstruction *insn) {
753
780
case TYPE_Rv: \
754
781
return base + index ; \
755
782
case TYPE_R8: \
756
- index &= mask; \
757
- if (index > 0xf ) \
758
- *valid = 0 ; \
759
- if (insn->rexPrefix && index >= 4 && index <= 7 ) { \
783
+ if (insn->rexPrefix && index >= 4 && index <= 7 ) \
760
784
return prefix##_SPL + (index - 4 ); \
761
- } else { \
785
+ else \
762
786
return prefix##_AL + index ; \
763
- } \
764
787
case TYPE_R16: \
765
- index &= mask; \
766
- if (index > 0xf ) \
767
- *valid = 0 ; \
768
788
return prefix##_AX + index ; \
769
789
case TYPE_R32: \
770
- index &= mask; \
771
- if (index > 0xf ) \
772
- *valid = 0 ; \
773
790
return prefix##_EAX + index ; \
774
791
case TYPE_R64: \
775
- index &= mask; \
776
- if (index > 0xf ) \
777
- *valid = 0 ; \
778
792
return prefix##_RAX + index ; \
779
793
case TYPE_ZMM: \
780
794
return prefix##_ZMM0 + index ; \
@@ -824,8 +838,8 @@ static int readModRM(struct InternalInstruction *insn) {
824
838
// @param valid - The address of a uint8_t. The target is set to 1 if the
825
839
// field is valid for the register class; 0 if not.
826
840
// @return - The proper value.
827
- GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG, 0x1f )
828
- GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf )
841
+ GENERIC_FIXUP_FUNC (fixupRegValue, insn->regBase, MODRM_REG)
842
+ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
829
843
830
844
// Consult an operand specifier to determine which of the fixup*Value functions
831
845
// to use in correcting readModRM()'ss interpretation.
@@ -855,8 +869,31 @@ static int fixupReg(struct InternalInstruction *insn,
855
869
if (!valid)
856
870
return -1 ;
857
871
break ;
858
- case ENCODING_SIB:
859
872
CASE_ENCODING_RM:
873
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
874
+ modFromModRM (insn->modRM ) == 3 ) {
875
+ // EVEX_X can extend the register id to 32 for a non-GPR register that is
876
+ // encoded in RM.
877
+ // mode : MODE_64_BIT
878
+ // Only 8 vector registers are available in 32 bit mode
879
+ // mod : 3
880
+ // RM encodes a register
881
+ switch (op->type ) {
882
+ case TYPE_Rv:
883
+ case TYPE_R8:
884
+ case TYPE_R16:
885
+ case TYPE_R32:
886
+ case TYPE_R64:
887
+ break ;
888
+ default :
889
+ insn->eaBase =
890
+ (EABase)(insn->eaBase +
891
+ (xFromEVEX2of4 (insn->vectorExtensionPrefix [1 ]) << 4 ));
892
+ break ;
893
+ }
894
+ }
895
+ [[fallthrough]];
896
+ case ENCODING_SIB:
860
897
if (insn->eaBase >= insn->eaRegBase ) {
861
898
insn->eaBase = (EABase)fixupRMValue (
862
899
insn, (OperandType)op->type , insn->eaBase - insn->eaRegBase , &valid);
@@ -945,6 +982,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
945
982
insn->opcodeType = XOPA_MAP;
946
983
return consume (insn, insn->opcode );
947
984
}
985
+ } else if (mFromREX2 (insn->rex2ExtensionPrefix [1 ])) {
986
+ // m bit indicates opcode map 1
987
+ insn->opcodeType = TWOBYTE;
988
+ return consume (insn, insn->opcode );
948
989
}
949
990
950
991
if (consume (insn, current))
@@ -1388,10 +1429,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1388
1429
if (size == 0 )
1389
1430
size = insn->registerSize ;
1390
1431
1432
+ auto setOpcodeRegister = [&](unsigned base) {
1433
+ insn->opcodeRegister =
1434
+ (Reg)(base + ((bFromREX (insn->rexPrefix ) << 3 ) |
1435
+ (b2FromREX2 (insn->rex2ExtensionPrefix [1 ]) << 4 ) |
1436
+ (insn->opcode & 7 )));
1437
+ };
1438
+
1391
1439
switch (size) {
1392
1440
case 1 :
1393
- insn->opcodeRegister = (Reg)(
1394
- MODRM_REG_AL + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1441
+ setOpcodeRegister (MODRM_REG_AL);
1395
1442
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1396
1443
insn->opcodeRegister < MODRM_REG_AL + 0x8 ) {
1397
1444
insn->opcodeRegister =
@@ -1400,18 +1447,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1400
1447
1401
1448
break ;
1402
1449
case 2 :
1403
- insn->opcodeRegister = (Reg)(
1404
- MODRM_REG_AX + ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1450
+ setOpcodeRegister (MODRM_REG_AX);
1405
1451
break ;
1406
1452
case 4 :
1407
- insn->opcodeRegister =
1408
- (Reg)(MODRM_REG_EAX +
1409
- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1453
+ setOpcodeRegister (MODRM_REG_EAX);
1410
1454
break ;
1411
1455
case 8 :
1412
- insn->opcodeRegister =
1413
- (Reg)(MODRM_REG_RAX +
1414
- ((bFromREX (insn->rexPrefix ) << 3 ) | (insn->opcode & 7 )));
1456
+ setOpcodeRegister (MODRM_REG_RAX);
1415
1457
break ;
1416
1458
}
1417
1459
0 commit comments