Skip to content

Commit 3da1af8

Browse files
authored
[Xe] Add scalar load/store with shared addrspace (intel#1540)
Add MIR for scalar load/store with SLM address mode, and add the corresponding selection functions. The base case of SLM scalar load/store is XeSLM_ScalarA32 which the scalar address is encoded in the $addroff operand instead of in $addrbase as stateless address does, since SLM address must be 32b, and HW doesn't have 32b $addrbase.
1 parent 24b54ae commit 3da1af8

File tree

10 files changed

+153
-171
lines changed

10 files changed

+153
-171
lines changed

llvm/lib/Target/Xe/XeInstrInfoSend.td

+40-27
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ def XeA32v3 : XeAddrRC<GRF_3x32, "32v3">;
9696
def XeA32v4 : XeAddrRC<GRF_4x32, "32v4">;
9797

9898
// SRF address operand (set for $addrbase or $addroff)
99-
// TODO: All supported messages currently have 64b srf address
100-
def XeScalarA64 : XeAddrRC<SRF_2x32, "64">;
99+
def XeScalarA32 : XeAddrRC<SRF_32, "32">;
100+
def XeScalarA64 : XeAddrRC<SRF_2x32, "64">;
101101

102102
class AddressOpndHandler<string Fn> {
103103
string HandlerFn = Fn;
@@ -109,11 +109,11 @@ def StatelessBase64A64Handler : AddressOpndHandler<"selectStatelessBase64A64">;
109109
def StatelessBase64A32SHandler : AddressOpndHandler<"selectStatelessBase64A32S">;
110110
def StatelessBase64A32UHandler : AddressOpndHandler<"selectStatelessBase64A32U">;
111111
def SLMA32Handler : AddressOpndHandler<"selectSLMA32">;
112-
def SLMBase64A32Handler : AddressOpndHandler<"selectSLMBase64A32">;
113112
def GenericBase64A64Handler : AddressOpndHandler<"selectGenericBase64A64">;
114113
def NullHandler : AddressOpndHandler<"/* Unused */">;
115114
// scalar address handler
116-
def StatelessBase64Handler : AddressOpndHandler<"selectStatelessBase64">;
115+
def StatelessBase64Handler : AddressOpndHandler<"selectStatelessBase64">;
116+
def SLMScalarA32Handler : AddressOpndHandler<"selectSLMScalarA32">;
117117

118118
class AddrPattern<ValueType VT, int NumOps, AddressOpndHandler H, int complexity>
119119
: ComplexPattern<VT, NumOps, H.HandlerFn> {
@@ -176,10 +176,20 @@ class toAddrMIROp<string memName, XeAddrRC baseRC, XeAddrRC offRC, string asmNam
176176
true : "");
177177

178178
// address offset name not present if RC is null. Or it is the asmName if exists.
179-
// Otherwise it's "a32" or "a64" accroding to RC size
179+
// Otherwise it's "a" + offRC.Size, e.g. "a32" or "a64".
180+
// Additionally, prefix scalar form address offset with "scalar"
181+
// Some examples:
182+
// asmName offRC.Size vectorName scalarName
183+
// --------------------------------------------------
184+
// "" "32" "a32" "scalara32"
185+
// "" "32v2" "a32v2" "sclara32v2"
186+
// "a64" -- "a64" "scalara64"
187+
// "a32s" -- "a32s" "scalara32s"
188+
// "sa32" -- "sa32" "scalarsa32"
189+
defvar OffPrefix = !if(!eq(!find(!cast<string>(offRC), "Scalar"), -1), "", "scalar");
180190
defvar OffName = !cond(!eq(offRC, XeAddrNull) : "",
181-
!eq(asmName, "") : "_a"#offRC.Size,
182-
true : "_"#asmName);
191+
!eq(asmName, "") : "_"#OffPrefix#"a"#offRC.Size,
192+
true : "_"#OffPrefix#asmName);
183193

184194
string AddrName = memName#BaseName#OffName;
185195
}
@@ -207,25 +217,25 @@ class XeAddressMode<XeMemoryType mt, XeAddrRC baseRC, XeAddrRC offsetRC, string
207217
XeAddrOpnd AddrOpnd = toXeAddrOpnd<mt.Size, baseRC, offsetRC, H, complexity>.Opnd;
208218
}
209219

210-
// MemoryType, $addrbase, $addroff, AsmName, AddressOpndHandler, complexity
211-
// ------------------------------------------------------------------------------------
220+
// MemoryType, $addrbase, $addroff, AsmName, AddressOpndHandler, complexity
221+
// ---------------------------------------------------------------------------------------
212222
// Vector address
213-
def XeStateful_Base64_A32 : XeAddressMode<XeStateful, XeScalarA64, XeA32, "sa32", StatefulBase64A32Handler, 1>;
214-
def XeStateless_A64 : XeAddressMode<XeStateless, XeAddrNull, XeA64, "a64", StatelessA64Hanlder, 1>;
215-
def XeStateless_Base64_A64 : XeAddressMode<XeStateless, XeScalarA64, XeA64, "a64", StatelessBase64A64Handler, 1>;
216-
def XeStateless_Base64_A32S : XeAddressMode<XeStateless, XeScalarA64, XeA32, "a32s", StatelessBase64A32SHandler, 6>;
217-
def XeStateless_Base64_A32U : XeAddressMode<XeStateless, XeScalarA64, XeA32, "a32u", StatelessBase64A32UHandler, 6>;
218-
def XeSLM_Base64_A32 : XeAddressMode<XeSLM, XeScalarA64, XeA32, "", SLMBase64A32Handler, 3>;
219-
def XeSLM_A32 : XeAddressMode<XeSLM, XeAddrNull, XeA32, "", SLMA32Handler, 1>;
220-
def XeGeneric_Base64_A64 : XeAddressMode<XeGeneric, XeScalarA64, XeA64, "a64", GenericBase64A64Handler, 1>;
221-
def XeURB_A32 : XeAddressMode<XeURB, XeAddrNull, XeA32, "", NullHandler, 1>;
223+
def XeStateful_Base64_A32 : XeAddressMode<XeStateful, XeScalarA64, XeA32, "sa32", StatefulBase64A32Handler, 1>;
224+
def XeStateless_A64 : XeAddressMode<XeStateless, XeAddrNull, XeA64, "a64", StatelessA64Hanlder, 1>;
225+
def XeStateless_Base64_A64 : XeAddressMode<XeStateless, XeScalarA64, XeA64, "a64", StatelessBase64A64Handler, 1>;
226+
def XeStateless_Base64_A32S : XeAddressMode<XeStateless, XeScalarA64, XeA32, "a32s", StatelessBase64A32SHandler, 6>;
227+
def XeStateless_Base64_A32U : XeAddressMode<XeStateless, XeScalarA64, XeA32, "a32u", StatelessBase64A32UHandler, 6>;
228+
def XeSLM_A32 : XeAddressMode<XeSLM, XeAddrNull, XeA32, "", SLMA32Handler, 1>;
229+
def XeGeneric_Base64_A64 : XeAddressMode<XeGeneric, XeScalarA64, XeA64, "a64", GenericBase64A64Handler, 1>;
230+
def XeURB_A32 : XeAddressMode<XeURB, XeAddrNull, XeA32, "", NullHandler, 1>;
222231
// TGM has multiple address payload sizes depending on surface type
223-
def XeTGM_Base64_A32 : XeAddressMode<XeTGM, XeScalarA64, XeA32, "", NullHandler, 1>;
224-
def XeTGM_Base64_A32v2 : XeAddressMode<XeTGM, XeScalarA64, XeA32v2, "", NullHandler, 1>;
225-
def XeTGM_Base64_A32v3 : XeAddressMode<XeTGM, XeScalarA64, XeA32v3, "", NullHandler, 1>;
226-
def XeTGM_Base64_A32v4 : XeAddressMode<XeTGM, XeScalarA64, XeA32v4, "", NullHandler, 1>;
232+
def XeTGM_Base64_A32 : XeAddressMode<XeTGM, XeScalarA64, XeA32, "", NullHandler, 1>;
233+
def XeTGM_Base64_A32v2 : XeAddressMode<XeTGM, XeScalarA64, XeA32v2, "", NullHandler, 1>;
234+
def XeTGM_Base64_A32v3 : XeAddressMode<XeTGM, XeScalarA64, XeA32v3, "", NullHandler, 1>;
235+
def XeTGM_Base64_A32v4 : XeAddressMode<XeTGM, XeScalarA64, XeA32v4, "", NullHandler, 1>;
227236
// Scalar address
228-
def XeStateless_Base64 : XeAddressMode<XeStateless, XeScalarA64, XeAddrNull, "a64", StatelessBase64Handler, 2>;
237+
def XeStateless_Base64 : XeAddressMode<XeStateless, XeScalarA64, XeAddrNull, "a64", StatelessBase64Handler, 2>;
238+
def XeSLM_ScalarA32 : XeAddressMode<XeSLM, XeAddrNull, XeScalarA32, "", SLMScalarA32Handler, 2>;
229239

230240
class StorePat<XeAddressMode AM, SDPatternOperator Op> : PatFrag<(ops node:$data, node:$ptr), (Op node:$data, node:$ptr)> {
231241
let IsStore = 1;
@@ -243,7 +253,10 @@ class AtomPat<XeAddressMode AM, PatFrag pat> :
243253
let AddressSpaces = AM.MemTy.AsNum;
244254
}
245255

246-
foreach Am = [XeStateless_Base64_A32S, XeStateless_Base64_A32U, XeStateless_Base64_A64, XeStateless_A64, XeGeneric_Base64_A64, XeSLM_A32, XeSLM_Base64_A32, XeStateful_Base64_A32, XeStateless_Base64] in {
256+
foreach Am = [XeStateless_Base64_A32S, XeStateless_Base64_A32U, XeStateless_Base64_A64, XeStateless_A64, XeStateless_Base64,
257+
XeGeneric_Base64_A64,
258+
XeSLM_A32, XeSLM_ScalarA32,
259+
XeStateful_Base64_A32] in {
247260
// Complex operand matchers for GISel.
248261
def gi_AddrPattern_#Am.MIRName : GIComplexOperandMatcher<VTtoLLT<Am.MemTy.Size>.Ty, Am.AddrOpnd.Handler.HandlerFn>, GIComplexPatternEquiv<Am.AddrOpnd>;
249262
}
@@ -579,13 +592,13 @@ multiclass TypedStore<XeAddressMode Am, list<XeIOType> IOTypes> {
579592
}
580593

581594
// vector ld/st
582-
foreach as = [XeStateless_Base64_A32S, XeStateless_Base64_A32U, XeStateless_Base64_A64, XeStateless_A64, XeGeneric_Base64_A64, XeSLM_A32, XeSLM_Base64_A32, XeStateful_Base64_A32] in {
595+
foreach as = [XeStateless_Base64_A32S, XeStateless_Base64_A32U, XeStateless_Base64_A64, XeStateless_A64, XeGeneric_Base64_A64, XeSLM_A32, XeStateful_Base64_A32] in {
583596
defm LD: Load<as, XeIOTypes.LdSt>;
584597
defm ST: Store<as, XeIOTypes.LdSt>;
585598
}
586599

587600
// scalar ld/st
588-
foreach as = [XeStateless_Base64] in {
601+
foreach as = [XeStateless_Base64, XeSLM_ScalarA32] in {
589602
defm SLD: Load<as, XeIOTypes.Sldst, 1>;
590603
defm SST: Store<as, XeIOTypes.Sldst, 1>;
591604
}
@@ -616,7 +629,7 @@ foreach as = [XeStateless_Base64_A32S, XeStateless_Base64_A32U, XeStateless_Base
616629
}
617630

618631
// shared atomic
619-
foreach as = [XeSLM_A32, XeSLM_Base64_A32] in {
632+
foreach as = [XeSLM_A32] in {
620633
defm IATOM: Atom<as, "IATOM", XeIOTypes.Atom1SrcNo64, ["aincwrap", "adecwrap", "aadd", "asub", "asmin", "asmax", "aumin", "aumax", "aand", "aor", "axor", "ast"]>;
621634
defm IATOM: Atom<as, "IATOM", XeIOTypes.Atom2Src, ["acxg"]>;
622635
defm FATOM: Atom<as, "FATOM", XeIOTypes.Atom1SrcNo64, ["afadd", "afsub", "afmin", "afmax"]>;

llvm/lib/Target/Xe/XeInstructionSelector.cpp

+55-107
Original file line numberDiff line numberDiff line change
@@ -190,17 +190,20 @@ class XeInstructionSelector : public InstructionSelector {
190190
InstructionSelector::ComplexRendererFns
191191
selectSLMA32(MachineOperand &Root) const;
192192
InstructionSelector::ComplexRendererFns
193-
selectSLMBase64A32(MachineOperand &Root) const;
194-
InstructionSelector::ComplexRendererFns
195193
selectGenericBase64A64(MachineOperand &Root) const;
196194
// scalar address selector
197195
InstructionSelector::ComplexRendererFns
198196
selectStatelessBase64(MachineOperand &Root) const;
197+
InstructionSelector::ComplexRendererFns
198+
selectSLMScalarA32(MachineOperand &Root) const;
199199
// helper functions for address selector
200200
InstructionSelector::ComplexRendererFns
201201
selectStatelessBase64A32Impl(MachineOperand &Root, bool IsSigned) const;
202202
InstructionSelector::ComplexRendererFns
203203
selectParamLoadImpl(MachineOperand &Root, MachineInstr &MemInst) const;
204+
InstructionSelector::ComplexRendererFns
205+
selectSLMA32Impl(MachineOperand &Root, bool IsScalarAddrOff) const;
206+
bool canMemInstBeScalar(const MachineInstr &MemInst, Register Addr) const;
204207

205208
template <MVT::SimpleValueType T>
206209
InstructionSelector::ComplexRendererFns
@@ -1769,32 +1772,6 @@ XeInstructionSelector::selectOpAndAddrOpndForAtomicCas(MachineOperand &Addr,
17691772
}
17701773
llvm_unreachable("unknown fatom cas op");
17711774
};
1772-
auto getSLMBase64A32 = [&DataSize, &IsFloat]() {
1773-
if (IsFloat) {
1774-
switch (DataSize) {
1775-
case 16:
1776-
return Xe::FATOM_afcxg_slm_base64_a32_d16u32;
1777-
case 32:
1778-
return Xe::FATOM_afcxg_slm_base64_a32_d32;
1779-
case 64:
1780-
return Xe::FATOM_afcxg_slm_base64_a32_d64;
1781-
default:
1782-
break;
1783-
}
1784-
} else {
1785-
switch (DataSize) {
1786-
case 16:
1787-
return Xe::IATOM_acxg_slm_base64_a32_d16u32;
1788-
case 32:
1789-
return Xe::IATOM_acxg_slm_base64_a32_d32;
1790-
case 64:
1791-
return Xe::IATOM_acxg_slm_base64_a32_d64;
1792-
default:
1793-
break;
1794-
}
1795-
}
1796-
llvm_unreachable("unknown fatom cas op");
1797-
};
17981775
auto getGenericBase64A64 = [&DataSize, &IsFloat]() {
17991776
if (IsFloat) {
18001777
switch (DataSize) {
@@ -1840,10 +1817,7 @@ XeInstructionSelector::selectOpAndAddrOpndForAtomicCas(MachineOperand &Addr,
18401817
}
18411818
return std::nullopt;
18421819
} else if (AS == Xe::AddressSpace::SHARED) {
1843-
if (ComplexRendererFns Fns = selectSLMBase64A32(Addr)) {
1844-
AtomOp = getSLMBase64A32();
1845-
return Fns;
1846-
} else if (ComplexRendererFns Fns = selectSLMA32(Addr)) {
1820+
if (ComplexRendererFns Fns = selectSLMA32(Addr)) {
18471821
AtomOp = getSLMA32();
18481822
return Fns;
18491823
}
@@ -3009,64 +2983,30 @@ XeInstructionSelector::selectStatelessBase64A64(MachineOperand &Root) const {
30092983
}
30102984

30112985
InstructionSelector::ComplexRendererFns
3012-
XeInstructionSelector::selectSLMBase64A32(MachineOperand &Root) const {
3013-
MachineBasicBlock *BB = Root.getParent()->getParent();
3014-
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2986+
XeInstructionSelector::selectSLMA32Impl(MachineOperand &Root,
2987+
bool IsScalarAddrOff) const {
2988+
// Try match optional global offset
2989+
AddrImmOffset AddrImmOff;
30152990
MachineInstr *MemInst = Root.getParent();
2991+
auto GOffMatch = matchSendGlobalOffset(*MemInst, Root, AddrImmOff);
2992+
Register Addr = GOffMatch ? *GOffMatch : Root.getReg();
30162993

3017-
assert(MRI.getType(getAddrOpnd(*MemInst).getReg()).getAddressSpace() ==
3018-
unsigned(Xe::AddressSpace::SHARED));
3019-
3020-
// Match address pattern with 32b base address and 32b index
3021-
// e.g.
3022-
// %7:grf(s32) = G_CONSTANT i32 2
3023-
// %4:grf(s32) = G_SHL %1:srf, %7:grf(s32)
3024-
// %5:grf(p3) = G_PTR_ADD %0:srf, %4:grf(s32)
3025-
// G_STORE %2:srf(s32), %5:grf(p3) :: (store (s32) into %ir.p, addrspace 3)
3026-
// FIXME: do we need to match global_offset?
3027-
3028-
Register Base;
3029-
Register PtrAddSrc1;
3030-
if (!mi_match(Root.getReg(), MRI, m_GPtrAdd(m_Reg(Base), m_Reg(PtrAddSrc1))))
3031-
return std::nullopt;
3032-
3033-
// TODO: create COPY for using SRF_2x32
3034-
// Base address must be SRF64 for this pattern
3035-
if (TRI.getRegClassForTypeOnBank(Base, MRI) != &Xe::SRF_2x32RegClass)
3036-
return std::nullopt;
3037-
3038-
// Try match offset scaling value
3039-
std::optional<int64_t> ScaleVal{0};
3040-
Register AddrIdx;
3041-
if (!mi_match(PtrAddSrc1, MRI, m_GShl(m_Reg(AddrIdx), m_ICst(*ScaleVal))))
3042-
return std::nullopt;
3043-
3044-
// Address index must be 32 for this pattern
3045-
if (MRI.getType(AddrIdx).getSizeInBits() != 32)
2994+
// check if address/data RC are valid for selecting scalar ld/st
2995+
if (IsScalarAddrOff && !canMemInstBeScalar(*MemInst, Addr))
30462996
return std::nullopt;
30472997

3048-
AddrImmOffset AddrImmOff;
3049-
if (ScaleVal) {
3050-
// ScaleVal is matched from SHL
3051-
auto Val = 1 << *ScaleVal;
3052-
if (!AddrImmOff.trySetOffsetScalingFromLogicalVal(Val,
3053-
getMemSize(*MemInst)))
3054-
return std::nullopt;
3055-
}
3056-
3057-
return {{{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
3058-
[=](MachineInstrBuilder &MIB) { MIB.addReg(AddrIdx); },
3059-
[=](MachineInstrBuilder &MIB) { MIB.addImm(AddrImmOff.val()); }}}};
2998+
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Addr); },
2999+
[=](MachineInstrBuilder &MIB) { MIB.addImm(AddrImmOff.val()); }}};
30603000
}
30613001

30623002
InstructionSelector::ComplexRendererFns
30633003
XeInstructionSelector::selectSLMA32(MachineOperand &Root) const {
3064-
// Try match optional global offset
3065-
AddrImmOffset AddrImmOff;
3066-
auto GOffMatch = matchSendGlobalOffset(*Root.getParent(), Root, AddrImmOff);
3067-
Register Addr = GOffMatch ? *GOffMatch : Root.getReg();
3068-
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Addr); },
3069-
[=](MachineInstrBuilder &MIB) { MIB.addImm(AddrImmOff.val()); }}};
3004+
return selectSLMA32Impl(Root, false);
3005+
}
3006+
3007+
InstructionSelector::ComplexRendererFns
3008+
XeInstructionSelector::selectSLMScalarA32(MachineOperand &Root) const {
3009+
return selectSLMA32Impl(Root, true);
30703010
}
30713011

30723012
InstructionSelector::ComplexRendererFns
@@ -3146,6 +3086,38 @@ XeInstructionSelector::selectParamLoadImpl(MachineOperand &Root,
31463086
[=](MachineInstrBuilder &MIB) { MIB.addImm(AddrImmOff.val()); }}}};
31473087
}
31483088

3089+
bool XeInstructionSelector::canMemInstBeScalar(const MachineInstr &MemInst,
3090+
Register Addr) const {
3091+
// Skip non-4-bytes aligned load/store
3092+
// HW supports scalar load/store with aligned address only
3093+
assert(!MemInst.memoperands_empty());
3094+
MachineMemOperand *MMO = *MemInst.memoperands_begin();
3095+
if (commonAlignment(MMO->getAlign(), 4) != Align(4))
3096+
return false;
3097+
3098+
unsigned Op = MemInst.getOpcode();
3099+
// TODO: Currently only supoprt G_LOAD and G_STORE with scalar form
3100+
if (Op != TargetOpcode::G_LOAD && Op != TargetOpcode::G_STORE)
3101+
return false;
3102+
3103+
if (Op == TargetOpcode::G_LOAD) {
3104+
// there shouldn't be a case that a load has grf dst and srf address
3105+
assert(
3106+
RBI.getRegBankFromReg(Addr, *MRI).getID() != Xe::SRFRegBankID ||
3107+
RBI.getRegBankFromReg(MemInst.getOperand(0).getReg(), *MRI).getID() !=
3108+
Xe::GRFRegBankID);
3109+
// For load, dst needs to be uniform one
3110+
if (RBI.getRegBankFromReg(MemInst.getOperand(0).getReg(), *MRI).getID() !=
3111+
Xe::SRFRegBankID)
3112+
return false;
3113+
} else if (Op == TargetOpcode::G_STORE) {
3114+
// For store, address needs to be uniform
3115+
if (RBI.getRegBankFromReg(Addr, *MRI).getID() != Xe::SRFRegBankID)
3116+
return false;
3117+
}
3118+
return true;
3119+
}
3120+
31493121
InstructionSelector::ComplexRendererFns
31503122
XeInstructionSelector::selectStatelessBase64(MachineOperand &Root) const {
31513123
// Scalar load/store pattern
@@ -3163,33 +3135,9 @@ XeInstructionSelector::selectStatelessBase64(MachineOperand &Root) const {
31633135
if (GOffMatch)
31643136
Base = *GOffMatch;
31653137

3166-
// Skip non-4-bytes aligned load/store
3167-
// HW supports scalar load/store with aligned address only
3168-
assert(!MemInst->memoperands_empty());
3169-
MachineMemOperand *MMO = *MemInst->memoperands_begin();
3170-
if (commonAlignment(MMO->getAlign(), 4) != Align(4))
3138+
if (!canMemInstBeScalar(*MemInst, Base))
31713139
return std::nullopt;
31723140

3173-
unsigned Op = MemInst->getOpcode();
3174-
if (Op == TargetOpcode::G_LOAD) {
3175-
// there shouldn't be a case that a load has grf dst and srf address
3176-
assert(
3177-
RBI.getRegBankFromReg(Base, *MRI).getID() != Xe::SRFRegBankID ||
3178-
RBI.getRegBankFromReg(MemInst->getOperand(0).getReg(), *MRI).getID() !=
3179-
Xe::GRFRegBankID);
3180-
// For load, dst needs to be uniform one
3181-
if (RBI.getRegBankFromReg(MemInst->getOperand(0).getReg(), *MRI).getID() !=
3182-
Xe::SRFRegBankID)
3183-
return std::nullopt;
3184-
} else if (Op == TargetOpcode::G_STORE) {
3185-
// For store, base (address) needs to be uniform
3186-
if (RBI.getRegBankFromReg(Base, *MRI).getID() != Xe::SRFRegBankID)
3187-
return std::nullopt;
3188-
} else {
3189-
// TODO: Currently only supoprt G_LOAD and G_STORE with scalar form
3190-
return std::nullopt;
3191-
}
3192-
31933141
return {{{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
31943142
[=](MachineInstrBuilder &MIB) { MIB.addImm(AddrImmOff.val()); }}}};
31953143
}

llvm/lib/Target/Xe/XeRegisterBankInfo.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,10 @@ XeRegisterBankInfo::determineRegBank(const MachineInstr &MI,
119119
break;
120120
case TargetOpcode::G_LOAD: {
121121
const LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
122-
// Skip non-GLOBAL/CONSTANT space so far
122+
// Skip non-GLOBAL/CONSTANT/SHARED space so far
123123
if (PtrTy.getAddressSpace() != unsigned(Xe::AddressSpace::GLOBAL) &&
124-
PtrTy.getAddressSpace() != unsigned(Xe::AddressSpace::CONSTANT))
124+
PtrTy.getAddressSpace() != unsigned(Xe::AddressSpace::CONSTANT) &&
125+
PtrTy.getAddressSpace() != unsigned(Xe::AddressSpace::SHARED))
125126
return nullptr;
126127

127128
// Skip non-4-bytes aligned load

llvm/lib/Target/Xe/XeSchedule.td

+2-2
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def : InstRW<[WriteNop], (instrs ILLEGAL)>;
194194
def : InstRW<[WriteBr], (instrs JOIN)>;
195195

196196
// load
197-
def : InstRW<[WriteSLM], (instregex "^LD_slm")>;
197+
def : InstRW<[WriteSLM], (instregex "^(LD|SLD)_slm")>;
198198
def : InstRW<[WriteStateful], (instregex "^LD_stateful")>;
199199
def : InstRW<[WriteStateless], (instregex "^(LD|SLD)_stateless")>;
200200
def : InstRW<[WriteStateless], (instregex "^LD_generic")>;
@@ -304,7 +304,7 @@ def : InstRW<[WriteL], (instregex "^SMOV(_comp)?_b64")>;
304304
def : InstRW<[WriteI], (instregex "^SSETA", "^SETA")>;
305305

306306
// store
307-
def : InstRW<[WriteSLM], (instregex "^ST_slm")>;
307+
def : InstRW<[WriteSLM], (instregex "^(ST|SST)_slm")>;
308308
def : InstRW<[WriteStateful], (instregex "^ST_stateful")>;
309309
def : InstRW<[WriteStateless], (instregex "^(ST|SST)_stateless")>;
310310
def : InstRW<[WriteStateless], (instregex "^ST_generic")>;

0 commit comments

Comments
 (0)