Skip to content

Commit 5443ea9

Browse files
committed
[LLVM][Clang][XTHeadVector] Add indexed load/store intrinsics (llvm#54)
* [Clang][XTHeadVector] Rename multiclass to avoid confusion * [Clang][XTHeadVector] Add `vlxb`, `vlxh`, `vlxw` and corresponding unsigned version * [Clang][XTHeadVector] Add `vloxei` intrinsic * [Clang][XTHeadVector] Add `vsoxei` intrinsic * [Clang][XTHeadVector] Rename * [Clang][XTHeadVector] Add `vsxb`, `vsxh`, `vsxw` * [Clang][XTHeadVector] Fix typo * [Clang][XTHeadVector] Fix multiclass * [Clang][XTHeadVector] Add todo * [Clang][XTHeadVector] Use `RVVOutOp1Builtin` * [Clang][XTHeadVector] Add simple handcrafted tests * [Clang][XTHeadVector] Add generate tests * [Clang][XTHeadVector] Add generate tests * [Clang][XTHeadVector] Add wrapper macros * [Clang][XTHeadVector] Add more tests * [NFC][XTHeadVector] Update README
1 parent e81563e commit 5443ea9

File tree

15 files changed

+6698
-16
lines changed

15 files changed

+6698
-16
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ Any feature not listed below but present in the specification should be consider
4646
- (WIP) `7. Vector Load/Store`
4747
- (Done) `7.1. Vector Unit-Stride Operations`
4848
- (Done) `7.2. Vector Strided Load/Store Operations`
49+
- (Done) `7.3. Vector Indexed Load/Store Operations`
4950
- (Done) `7.4 Unit-stride Fault-Only-First Loads Operations`
5051

5152
## Q & A

clang/include/clang/Basic/riscv_vector_xtheadv.td

+146-16
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ class RVVOutBuiltin<string suffix, string prototype, string type_range>
1919
let IntrinsicTypes = [-1];
2020
}
2121

22+
class RVVOutOp1Builtin<string suffix, string prototype, string type_range>
23+
: RVVBuiltin<suffix, prototype, type_range> {
24+
let IntrinsicTypes = [-1, 1];
25+
}
26+
2227
multiclass RVVBuiltinSet<string intrinsic_name, string type_range,
2328
list<list<string>> suffixes_prototypes,
2429
list<int> intrinsic_types> {
@@ -53,6 +58,11 @@ multiclass RVVIntBinBuiltinSet
5358
: RVVSignedBinBuiltinSet,
5459
RVVUnsignedBinBuiltinSet;
5560

61+
defvar TypeList = ["c", "s", "i", "l", "x", "f", "d"];
62+
defvar EEWList = [["8", "(Log2EEW:3)"],
63+
["16", "(Log2EEW:4)"],
64+
["32", "(Log2EEW:5)"],
65+
["64", "(Log2EEW:6)"]];
5666

5767
//===----------------------------------------------------------------------===//
5868
// 6. Configuration-Setting and Utility
@@ -193,7 +203,7 @@ let SupportOverloading = false,
193203
}
194204

195205
// 7.1 Unit-stride load: vlb/h/w/bu/hu/wu
196-
multiclass RVVVLXBuiltin<string ir, list<string> types> {
206+
multiclass RVVVLBHWBuiltin<string ir, list<string> types> {
197207
foreach type = types in {
198208
// `vPCe` is type `const T * -> {VL} -> VectorType`
199209
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
@@ -227,7 +237,7 @@ let SupportOverloading = false,
227237
}
228238

229239
// 7.2 Strided load: vlsb/h/w/bu/hu/wu
230-
multiclass RVVVLSXBuiltin<string ir, list<string> types> {
240+
multiclass RVVVLSBHWBuiltin<string ir, list<string> types> {
231241
foreach type = types in {
232242
// `vPCez` is type `const T * -> SizeT -> {VL} -> VectorType`
233243
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
@@ -243,6 +253,24 @@ let SupportOverloading = false,
243253
}
244254
}
245255

256+
257+
// 7.3 Indexed Load Operations: vlxb/h/w/bu/hu/wu
258+
multiclass RVVVLXBHWBuiltin<string ir, list<string> types> {
259+
foreach type = types in {
260+
// `vPCeUv` is type `const T * -> unsigned VectorType -> {VL} -> VectorType`
261+
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
262+
let Name = NAME # "_v",
263+
IRName = ir,
264+
MaskedIRName = ir # "_mask" in
265+
def : RVVOutOp1Builtin<"v", "vPCeUv", type>;
266+
// `UvPCUeUv` is type `const unsigned T * -> unsigned VectorType -> {VL} -> unsigned VectorType`
267+
let Name = NAME # "u_v",
268+
IRName = ir # "u",
269+
MaskedIRName = ir # "u_mask" in
270+
def : RVVOutOp1Builtin<"Uv", "UvPCUeUv", type>;
271+
}
272+
}
273+
246274
// 7.4. Unit-stride Fault-Only-First Loads Operations
247275
multiclass RVVVLEFFBuiltin<string ir, list<string> types> {
248276
let Name = NAME # "_v",
@@ -292,6 +320,36 @@ let SupportOverloading = false,
292320
}
293321
}
294322

323+
// 7.3 Indexed Load Operations: vlxei<eew>
324+
multiclass RVVVLXEEWBuiltin<string ir, list<string> types> {
325+
let UnMaskedPolicyScheme = HasPassthruOperand in {
326+
foreach type = types in {
327+
foreach eew_list = EEWList in {
328+
defvar eew = eew_list[0];
329+
defvar eew_type = eew_list[1];
330+
let Name = NAME # eew # "_v",
331+
IRName = ir,
332+
MaskedIRName = ir # "_mask" in {
333+
// Compare the following two signatures of vloxei:
334+
// vint8m1_t vloxei8_v_i8m1 (const int8_t *base, vuint8m1_t bindex, size_t vl);
335+
// vint8m1_t vloxei16_v_i8m1 (const int8_t *base, vuint16m2_t bindex, size_t vl);
336+
// The type of `bindex` should not be computed from `type` (aka, i8m1, i8m2, etc.),
337+
// which is not the same as what we do in other intirnsics.
338+
339+
// `vPCe<eew>Uv` is type `const T * -> unsigned <EEW> VectorType -> {VL} -> VectorType`
340+
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
341+
def: RVVOutOp1Builtin<"v", "vPCe" # eew_type # "Uv", type>;
342+
if !not(IsFloat<type>.val) then {
343+
// `UvPCUe<eew>Uv` is type `const unsigned T * -> unsigned <EEW> VectorType -> {VL} -> unsigned VectorType`
344+
def: RVVOutOp1Builtin<"Uv", "UvPCUe" # eew_type # "Uv", type>;
345+
}
346+
}
347+
}
348+
}
349+
}
350+
}
351+
352+
// 7.1 Unit-strided Store Operations
295353
let HasMaskedOffOperand = false,
296354
MaskedPolicyScheme = NonePolicy,
297355
ManualCodegen = [{
@@ -326,7 +384,7 @@ let HasMaskedOffOperand = false,
326384
}
327385

328386
// 7.1 Unit-stride store: vsb/h/w/bu/hu/wu
329-
multiclass RVVVSXBuiltin<string ir, list<string> types> {
387+
multiclass RVVVSBHWBuiltin<string ir, list<string> types> {
330388
let Name = NAME # "_v",
331389
IRName = ir,
332390
MaskedIRName = ir # "_mask" in {
@@ -341,6 +399,7 @@ let HasMaskedOffOperand = false,
341399
}
342400
}
343401

402+
// 7.2 Strided Store Operations
344403
let HasMaskedOffOperand = false,
345404
MaskedPolicyScheme = NonePolicy,
346405
ManualCodegen = [{
@@ -375,7 +434,7 @@ let HasMaskedOffOperand = false,
375434
}
376435

377436
// 7.2 Strided store: vssb/h/w/bu/hu/wu
378-
multiclass RVVVSSXBuiltin<string ir, list<string> types> {
437+
multiclass RVVVSSBHWBuiltin<string ir, list<string> types> {
379438
let Name = NAME # "_v",
380439
IRName = ir,
381440
MaskedIRName = ir # "_mask" in {
@@ -390,40 +449,111 @@ let HasMaskedOffOperand = false,
390449
}
391450
}
392451

452+
// 7.3 Indexed Store Operations
453+
let HasMaskedOffOperand = false,
454+
MaskedPolicyScheme = NonePolicy,
455+
ManualCodegen = [{
456+
if (IsMasked) {
457+
// Builtin: (mask, ptr, index, value, vl). Intrinsic: (value, ptr, index, mask, vl)
458+
std::swap(Ops[0], Ops[3]);
459+
} else {
460+
// Builtin: (ptr, index, value, vl). Intrinsic: (value, ptr, index, vl)
461+
std::rotate(Ops.begin(), Ops.begin() + 2, Ops.begin() + 3);
462+
}
463+
Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo());
464+
if (IsMasked)
465+
IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType(), Ops[4]->getType()};
466+
else
467+
IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType(), Ops[3]->getType()};
468+
}] in {
469+
multiclass RVVVSXEEWBuiltin<string ir, list<string> types> {
470+
// 7.3 Indexed store: vsxei<eew>
471+
foreach type = types in {
472+
foreach eew_list = EEWList in {
473+
defvar eew = eew_list[0];
474+
defvar eew_type = eew_list[1];
475+
let Name = NAME # eew # "_v",
476+
IRName = ir,
477+
MaskedIRName = ir # "_mask" in {
478+
// `0Pe<eew>Uvv` is type `T * -> unsigned <EEW> VectorType -> VectorType -> {VL} -> void`
479+
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
480+
def : RVVBuiltin<"v", "0Pe" # eew_type # "Uvv", type>;
481+
if !not(IsFloat<type>.val) then {
482+
// `0PUe<eew>UvUv` is type `unsigned T * -> unsigned <EEW> VectorType -> unsigned VectorType -> {VL} -> void`
483+
def : RVVBuiltin<"Uv", "0PUe" # eew_type # "UvUv", type>;
484+
}
485+
}
486+
}
487+
}
488+
}
489+
490+
// 7.3 Indexed store: vsxb/h/w/bu/hu/wu
491+
multiclass RVVVSXBHWBuiltin<string ir, list<string> types> {
492+
let Name = NAME # "_v",
493+
IRName = ir,
494+
MaskedIRName = ir # "_mask" in {
495+
foreach type = types in {
496+
// `0PeUvv` is type `T * -> unsigned VectorType -> VectorType -> {VL} -> void`
497+
// Note: the last operand {VL} is inserted by `RVVIntrinsic::computeBuiltinTypes`
498+
def : RVVBuiltin<"v", "0PeUvv", type>;
499+
// `0PUeUvUv` is type `unsigned T * -> unsigned VectorType -> unsigned VectorType -> {VL} -> void`
500+
def : RVVBuiltin<"Uv", "0PUeUvUv", type>;
501+
}
502+
}
503+
}
504+
}
505+
393506
// 7.1. Vector Unit-Stride Operations
394-
defm th_vlb : RVVVLXBuiltin<"th_vlb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
395-
defm th_vlh : RVVVLXBuiltin<"th_vlh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
396-
defm th_vlw : RVVVLXBuiltin<"th_vlw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
507+
defm th_vlb : RVVVLBHWBuiltin<"th_vlb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
508+
defm th_vlh : RVVVLBHWBuiltin<"th_vlh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
509+
defm th_vlw : RVVVLBHWBuiltin<"th_vlw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
397510
defm th_vle8 : RVVVLEBuiltin<"th_vle", ["c"]>; // i8
398511
defm th_vle16: RVVVLEBuiltin<"th_vle", ["s","x"]>; // i16, f16
399512
defm th_vle32: RVVVLEBuiltin<"th_vle", ["i","f"]>; // i32, f32
400513
defm th_vle64: RVVVLEBuiltin<"th_vle", ["l","d"]>; // i64, f64
401514

402-
defm th_vsb : RVVVSXBuiltin<"th_vsb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
403-
defm th_vsh : RVVVSXBuiltin<"th_vsh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
404-
defm th_vsw : RVVVSXBuiltin<"th_vsw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
515+
defm th_vsb : RVVVSBHWBuiltin<"th_vsb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
516+
defm th_vsh : RVVVSBHWBuiltin<"th_vsh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
517+
defm th_vsw : RVVVSBHWBuiltin<"th_vsw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
405518
defm th_vse8 : RVVVSEBuiltin<"th_vse", ["c"]>; // i8
406519
defm th_vse16: RVVVSEBuiltin<"th_vse", ["s","x"]>; // i16, f16
407520
defm th_vse32: RVVVSEBuiltin<"th_vse", ["i","f"]>; // i32, f32
408521
defm th_vse64: RVVVSEBuiltin<"th_vse", ["l","d"]>; // i64, f64
409522

410523
// 7.2. Vector Strided Load/Store Operations
411-
defm th_vlsb : RVVVLSXBuiltin<"th_vlsb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
412-
defm th_vlsh : RVVVLSXBuiltin<"th_vlsh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
413-
defm th_vlsw : RVVVLSXBuiltin<"th_vlsw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
524+
defm th_vlsb : RVVVLSBHWBuiltin<"th_vlsb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
525+
defm th_vlsh : RVVVLSBHWBuiltin<"th_vlsh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
526+
defm th_vlsw : RVVVLSBHWBuiltin<"th_vlsw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
414527
defm th_vlse8 : RVVVLSEBuiltin<"th_vlse", ["c"]>; // i8
415528
defm th_vlse16: RVVVLSEBuiltin<"th_vlse", ["s","x"]>; // i16, f16
416529
defm th_vlse32: RVVVLSEBuiltin<"th_vlse", ["i","f"]>; // i32, f32
417530
defm th_vlse64: RVVVLSEBuiltin<"th_vlse", ["l","d"]>; // i64, f64
418531

419-
defm th_vssb : RVVVSSXBuiltin<"th_vssb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
420-
defm th_vssh : RVVVSSXBuiltin<"th_vssh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
421-
defm th_vssw : RVVVSSXBuiltin<"th_vssw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
532+
defm th_vssb : RVVVSSBHWBuiltin<"th_vssb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
533+
defm th_vssh : RVVVSSBHWBuiltin<"th_vssh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
534+
defm th_vssw : RVVVSSBHWBuiltin<"th_vssw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
422535
defm th_vsse8 : RVVVSSEBuiltin<"th_vsse", ["c"]>; // i8
423536
defm th_vsse16: RVVVSSEBuiltin<"th_vsse", ["s","x"]>; // i16, f16
424537
defm th_vsse32: RVVVSSEBuiltin<"th_vsse", ["i","f"]>; // i32, f32
425538
defm th_vsse64: RVVVSSEBuiltin<"th_vsse", ["l","d"]>; // i64, f64
426539

540+
// 7.3 Vector Indexed Load/Store Operations
541+
defm th_vlxb : RVVVLXBHWBuiltin<"th_vlxb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
542+
defm th_vlxh : RVVVLXBHWBuiltin<"th_vlxh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
543+
defm th_vlxw : RVVVLXBHWBuiltin<"th_vlxw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
544+
defm th_vloxei : RVVVLXEEWBuiltin<"th_vlxe", TypeList>; // all types
545+
546+
defm th_vsxb : RVVVSXBHWBuiltin<"th_vsxb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
547+
defm th_vsxh : RVVVSXBHWBuiltin<"th_vsxh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
548+
defm th_vsxw : RVVVSXBHWBuiltin<"th_vsxw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
549+
defm th_vsoxei : RVVVSXEEWBuiltin<"th_vsxe", TypeList>; // all types
550+
551+
// TODO: LLVM intrinsic th_vsuxb, th_vsuxh, th_vsuxw, th_xsuxei for the following:
552+
//defm th_vsuxb : RVVVSXBHWBuiltin<"th_vsuxb", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
553+
//defm th_vsuxh : RVVVSXBHWBuiltin<"th_vsuxh", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
554+
//defm th_vsuxw : RVVVSXBHWBuiltin<"th_vsuxw", ["c", "s", "i", "l"]>; // i8, i16, i32, i64
555+
//defm th_vsuxei : RVVVSXEEWBuiltin<"th_vsuxe", TypeList>; // all types
556+
427557
// 7.4. Unit-stride Fault-Only-First Loads Operations
428558
defm th_vle8ff : RVVVLEFFBuiltin<"th_vleff", ["c"]>; // i8
429559
defm th_vle16ff: RVVVLEFFBuiltin<"th_vleff", ["s","x"]>; // i16, f16

0 commit comments

Comments
 (0)