Skip to content

Commit 4bd50e7

Browse files
authored
[SYCL][ESIMD] Add support for lsc mem access APIs (#5512)
* [SYCL][ESIMD] Add support for lsc mem access APIs Signed-off-by: Sergey Dmitriev <[email protected]> * Removed XeHP_SDV from the list of supported platforms * Removed DG2 from the list of supported platforms for 2d intrinsics * Removed cache hints from user-visible lsc SLM APIs * Replaced "flat-address" with "USM pointer" * Removed Transposed and Transformed params from lsc_store2d template * Removed L1 cache hint from atomic operations * Removed NElts from atomic operations * Reordered parameters for lsc atomic templates to make them consistent with regular atomics * Added static asserts to check data sizes for Transformed and Transposed messages * Added checks for allowed cache hints * Add special handling for u8 and u16 data types * Remove 'Transposed' and 'Transformed' perameters from prefetch 2d
1 parent 2ceeba5 commit 4bd50e7

File tree

6 files changed

+2583
-1
lines changed

6 files changed

+2583
-1
lines changed

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

+127
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,42 @@ ModulePass *llvm::createSYCLLowerESIMDPass() {
7979
}
8080

8181
namespace {
82+
enum class lsc_subopcode : uint8_t {
83+
load = 0x00,
84+
load_strided = 0x01,
85+
load_quad = 0x02,
86+
load_block2d = 0x03,
87+
store = 0x04,
88+
store_strided = 0x05,
89+
store_quad = 0x06,
90+
store_block2d = 0x07,
91+
//
92+
atomic_iinc = 0x08,
93+
atomic_idec = 0x09,
94+
atomic_load = 0x0a,
95+
atomic_store = 0x0b,
96+
atomic_iadd = 0x0c,
97+
atomic_isub = 0x0d,
98+
atomic_smin = 0x0e,
99+
atomic_smax = 0x0f,
100+
atomic_umin = 0x10,
101+
atomic_umax = 0x11,
102+
atomic_icas = 0x12,
103+
atomic_fadd = 0x13,
104+
atomic_fsub = 0x14,
105+
atomic_fmin = 0x15,
106+
atomic_fmax = 0x16,
107+
atomic_fcas = 0x17,
108+
atomic_and = 0x18,
109+
atomic_or = 0x19,
110+
atomic_xor = 0x1a,
111+
//
112+
load_status = 0x1b,
113+
store_uncompressed = 0x1c,
114+
ccs_update = 0x1d,
115+
read_state_info = 0x1e,
116+
fence = 0x1f,
117+
};
82118
// The regexp for ESIMD intrinsics:
83119
// /^_Z(\d+)__esimd_\w+/
84120
static constexpr char ESIMD_INTRIN_PREF0[] = "_Z";
@@ -227,6 +263,10 @@ class ESIMDIntrinDescTable {
227263
return ESIMDIntrinDesc::ArgRule{ESIMDIntrinDesc::CONST_INT8, {{N, {}}}};
228264
}
229265

266+
static constexpr ESIMDIntrinDesc::ArgRule c8(lsc_subopcode OpCode) {
267+
return c8(static_cast<uint8_t>(OpCode));
268+
}
269+
230270
static constexpr ESIMDIntrinDesc::ArgRule c16(int16_t N) {
231271
return ESIMDIntrinDesc::ArgRule{ESIMDIntrinDesc::CONST_INT16, {{N, {}}}};
232272
}
@@ -454,6 +494,87 @@ class ESIMDIntrinDescTable {
454494
{"nbarrier", {"nbarrier", {a(0), a(1), a(2)}}},
455495
{"raw_send_nbarrier_signal",
456496
{"raw.send.noresult", {a(0), ai1(4), a(1), a(2), a(3)}}},
497+
{"lsc_load_slm",
498+
{"lsc.load.slm",
499+
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
500+
t8(6), t8(7), c8(0), a(1), c32(0)}}},
501+
{"lsc_load_bti",
502+
{"lsc.load.bti",
503+
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
504+
t8(6), t8(7), c8(0), a(1), aSI(2)}}},
505+
{"lsc_load_stateless",
506+
{"lsc.load.stateless",
507+
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
508+
t8(6), t8(7), c8(0), a(1), c32(0)}}},
509+
{"lsc_prefetch_bti",
510+
{"lsc.prefetch.bti",
511+
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
512+
t8(6), t8(7), c8(0), a(1), aSI(2)}}},
513+
{"lsc_prefetch_stateless",
514+
{"lsc.prefetch.stateless",
515+
{ai1(0), c8(lsc_subopcode::load), t8(1), t8(2), t16(3), t32(4), t8(5),
516+
t8(6), t8(7), c8(0), a(1), c32(0)}}},
517+
{"lsc_store_slm",
518+
{"lsc.store.slm",
519+
{ai1(0), c8(lsc_subopcode::store), t8(1), t8(2), t16(3), t32(4),
520+
t8(5), t8(6), t8(7), c8(0), a(1), a(2), c32(0)}}},
521+
{"lsc_store_bti",
522+
{"lsc.store.bti",
523+
{ai1(0), c8(lsc_subopcode::store), t8(1), t8(2), t16(3), t32(4),
524+
t8(5), t8(6), t8(7), c8(0), a(1), a(2), aSI(3)}}},
525+
{"lsc_store_stateless",
526+
{"lsc.store.stateless",
527+
{ai1(0), c8(lsc_subopcode::store), t8(1), t8(2), t16(3), t32(4),
528+
t8(5), t8(6), t8(7), c8(0), a(1), a(2), c32(0)}}},
529+
{"lsc_load2d_stateless",
530+
{"lsc.load2d.stateless",
531+
{ai1(0), t8(1), t8(2), t8(3), t8(4), t8(5), t16(6), t16(7), t8(8),
532+
a(1), a(2), a(3), a(4), a(5), a(6)}}},
533+
{"lsc_prefetch2d_stateless",
534+
{"lsc.prefetch2d.stateless",
535+
{ai1(0), t8(1), t8(2), t8(3), t8(4), t8(5), t16(6), t16(7), t8(8),
536+
a(1), a(2), a(3), a(4), a(5), a(6)}}},
537+
{"lsc_store2d_stateless",
538+
{"lsc.store2d.stateless",
539+
{ai1(0), t8(1), t8(2), t8(3), t8(4), t8(5), t16(6), t16(7), t8(8),
540+
a(1), a(2), a(3), a(4), a(5), a(6), a(7)}}},
541+
{"lsc_xatomic_slm_0",
542+
{"lsc.xatomic.slm",
543+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
544+
c8(0), a(1), u(-1), u(-1), c32(0), u(-1)}}},
545+
{"lsc_xatomic_slm_1",
546+
{"lsc.xatomic.slm",
547+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
548+
c8(0), a(1), a(2), u(-1), c32(0), u(-1)}}},
549+
{"lsc_xatomic_slm_2",
550+
{"lsc.xatomic.slm",
551+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
552+
c8(0), a(1), a(2), a(3), c32(0), u(-1)}}},
553+
{"lsc_xatomic_bti_0",
554+
{"lsc.xatomic.bti",
555+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
556+
c8(0), a(1), u(-1), u(-1), aSI(2), u(-1)}}},
557+
{"lsc_xatomic_bti_1",
558+
{"lsc.xatomic.bti",
559+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
560+
c8(0), a(1), a(2), u(-1), aSI(3), u(-1)}}},
561+
{"lsc_xatomic_bti_2",
562+
{"lsc.xatomic.bti",
563+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
564+
c8(0), a(1), a(2), a(3), aSI(4), u(-1)}}},
565+
{"lsc_xatomic_stateless_0",
566+
{"lsc.xatomic.stateless",
567+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
568+
c8(0), a(1), u(-1), u(-1), c32(0), u(-1)}}},
569+
{"lsc_xatomic_stateless_1",
570+
{"lsc.xatomic.stateless",
571+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
572+
c8(0), a(1), a(2), u(-1), c32(0), u(-1)}}},
573+
{"lsc_xatomic_stateless_2",
574+
{"lsc.xatomic.stateless",
575+
{ai1(0), t8(1), t8(2), t8(3), t16(4), t32(5), t8(6), t8(7), t8(8),
576+
c8(0), a(1), a(2), a(3), c32(0), u(-1)}}},
577+
{"lsc_fence", {"lsc.fence", {ai1(0), t8(0), t8(1), t8(2)}}},
457578
{"sat", {"sat", {a(0)}}},
458579
{"fptoui_sat", {"fptoui.sat", {a(0)}}},
459580
{"fptosi_sat", {"fptosi.sat", {a(0)}}},
@@ -723,6 +844,12 @@ static std::string getESIMDIntrinSuffix(id::FunctionEncoding *FE,
723844
case 0x12:
724845
Suff = ".fcmpwr";
725846
break;
847+
case 0x13:
848+
Suff = ".fadd";
849+
break;
850+
case 0x14:
851+
Suff = ".fsub";
852+
break;
726853
case 0xff:
727854
Suff = ".predec";
728855
break;

0 commit comments

Comments
 (0)