@@ -103,7 +103,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
103
103
getActionDefinitionsBuilder (G_BSWAP)
104
104
.legalFor ({s32, s64, v4s32, v2s32, v2s64})
105
105
.clampScalar (0 , s32, s64)
106
- .widenScalarToNextPow2 (0 );
106
+ .widenScalarToNextPow2 (0 )
107
+ .customIf (typeIs (0 , v2s16)); // custom lower as G_REV32 + G_LSHR
107
108
108
109
getActionDefinitionsBuilder ({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
109
110
.legalFor ({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
@@ -791,6 +792,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
791
792
case TargetOpcode::G_LOAD:
792
793
case TargetOpcode::G_STORE:
793
794
return legalizeLoadStore (MI, MRI, MIRBuilder, Observer);
795
+ case TargetOpcode::G_BSWAP:
796
+ return legalizeBSwap (MI, MRI, MIRBuilder);
794
797
case TargetOpcode::G_SHL:
795
798
case TargetOpcode::G_ASHR:
796
799
case TargetOpcode::G_LSHR:
@@ -1001,6 +1004,46 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
1001
1004
return true ;
1002
1005
}
1003
1006
1007
+ bool AArch64LegalizerInfo::legalizeBSwap (MachineInstr &MI,
1008
+ MachineRegisterInfo &MRI,
1009
+ MachineIRBuilder &MIRBuilder) const {
1010
+ assert (MI.getOpcode () == TargetOpcode::G_BSWAP);
1011
+
1012
+ // The <2 x half> case needs special lowering because there isn't an
1013
+ // instruction that does that directly. Instead, we widen to <8 x i8>
1014
+ // and emit a G_REV32 followed by a G_LSHR knowing that instruction selection
1015
+ // will later match them as:
1016
+ //
1017
+ // rev32.8b v0, v0
1018
+ // ushr.2s v0, v0, #16
1019
+ //
1020
+ // We could emit those here directly, but it seems better to keep things as
1021
+ // generic as possible through legalization, and avoid committing layering
1022
+ // violations by legalizing & selecting here at the same time.
1023
+
1024
+ Register ValReg = MI.getOperand (1 ).getReg ();
1025
+ assert (LLT::fixed_vector (2 , 16 ) == MRI.getType (ValReg));
1026
+ const LLT v2s32 = LLT::fixed_vector (2 , 32 );
1027
+ const LLT v8s8 = LLT::fixed_vector (8 , 8 );
1028
+ const LLT s32 = LLT::scalar (32 );
1029
+
1030
+ auto Undef = MIRBuilder.buildUndef (v8s8);
1031
+ auto Insert =
1032
+ MIRBuilder
1033
+ .buildInstr (TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg})
1034
+ .addImm (AArch64::ssub);
1035
+ auto Rev32 = MIRBuilder.buildInstr (AArch64::G_REV32, {v8s8}, {Insert});
1036
+ auto Bitcast = MIRBuilder.buildBitcast (v2s32, Rev32);
1037
+ auto Amt = MIRBuilder.buildConstant (v2s32, 16 );
1038
+ auto UShr =
1039
+ MIRBuilder.buildInstr (TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt});
1040
+ auto Zero = MIRBuilder.buildConstant (s32, 0 );
1041
+ auto Extract = MIRBuilder.buildExtractVectorElement (s32, UShr, Zero);
1042
+ MIRBuilder.buildBitcast ({MI.getOperand (0 ).getReg ()}, Extract);
1043
+ MI.eraseFromParent ();
1044
+ return true ;
1045
+ }
1046
+
1004
1047
bool AArch64LegalizerInfo::legalizeVaArg (MachineInstr &MI,
1005
1048
MachineRegisterInfo &MRI,
1006
1049
MachineIRBuilder &MIRBuilder) const {
0 commit comments