Skip to content

Commit a5b9804

Browse files
authored
[SYCL][FPGA] Enable a set of loop attributes (#1312)
This patch introduces the following loop attributes: - loop_coalesce: Indicates that the loop nest should be coalesced into a single loop without affecting functionality - speculated_iterations: Specifies the number of concurrent speculated iterations that will be in flight for a loop invocation - disable_loop_pipelining: Disables pipelining of the loop data path, causing the loop to be executed serially - max_interleaving: Places a maximum limit N on the number of interleaved invocations of an inner loop by an outer loop Signed-off-by: Viktoria Maksimova <[email protected]>
1 parent 2b6f4f4 commit a5b9804

File tree

10 files changed

+572
-32
lines changed

10 files changed

+572
-32
lines changed

clang/include/clang/Basic/Attr.td

+51
Original file line numberDiff line numberDiff line change
@@ -1660,6 +1660,57 @@ def SYCLIntelFPGAMaxConcurrency : Attr {
16601660
let Documentation = [SYCLIntelFPGAMaxConcurrencyAttrDocs];
16611661
}
16621662

1663+
def SYCLIntelFPGALoopCoalesce : Attr {
1664+
let Spellings = [CXX11<"intelfpga","loop_coalesce">];
1665+
let Args = [ExprArgument<"NExpr">];
1666+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1667+
let HasCustomTypeTransform = 1;
1668+
let AdditionalMembers = [{
1669+
static const char *getName() {
1670+
return "loop_coalesce";
1671+
}
1672+
}];
1673+
let Documentation = [SYCLIntelFPGALoopCoalesceAttrDocs];
1674+
}
1675+
1676+
def SYCLIntelFPGADisableLoopPipelining : Attr {
1677+
let Spellings = [CXX11<"intelfpga","disable_loop_pipelining">];
1678+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1679+
let HasCustomTypeTransform = 1;
1680+
let AdditionalMembers = [{
1681+
static const char *getName() {
1682+
return "disable_loop_pipelining";
1683+
}
1684+
}];
1685+
let Documentation = [SYCLIntelFPGADisableLoopPipeliningAttrDocs];
1686+
}
1687+
1688+
def SYCLIntelFPGAMaxInterleaving : Attr {
1689+
let Spellings = [CXX11<"intelfpga","max_interleaving">];
1690+
let Args = [ExprArgument<"NExpr">];
1691+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1692+
let HasCustomTypeTransform = 1;
1693+
let AdditionalMembers = [{
1694+
static const char *getName() {
1695+
return "max_interleaving";
1696+
}
1697+
}];
1698+
let Documentation = [SYCLIntelFPGAMaxInterleavingAttrDocs];
1699+
}
1700+
1701+
def SYCLIntelFPGASpeculatedIterations : Attr {
1702+
let Spellings = [CXX11<"intelfpga","speculated_iterations">];
1703+
let Args = [ExprArgument<"NExpr">];
1704+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1705+
let HasCustomTypeTransform = 1;
1706+
let AdditionalMembers = [{
1707+
static const char *getName() {
1708+
return "speculated_iterations";
1709+
}
1710+
}];
1711+
let Documentation = [SYCLIntelFPGASpeculatedIterationsAttrDocs];
1712+
}
1713+
16631714
def IntelFPGALocalNonConstVar : SubsetSubject<Var,
16641715
[{S->hasLocalStorage() &&
16651716
S->getKind() != Decl::ImplicitParam &&

clang/include/clang/Basic/AttrDocs.td

+47
Original file line numberDiff line numberDiff line change
@@ -2140,6 +2140,53 @@ be applied multiple times to the same loop.
21402140
}];
21412141
}
21422142

2143+
def SYCLIntelFPGALoopCoalesceAttrDocs : Documentation {
2144+
let Category = DocCatVariable;
2145+
let Heading = "loop_coalesce";
2146+
let Content = [{
2147+
This attribute applies to a loop. Indicates that the loop nest should be
2148+
coalesced into a single loop without affecting functionality. Parameter N is
2149+
optional. If specified, it shall be a positive integer, and indicates how many
2150+
of the nested loop levels should be coalesced.
2151+
}];
2152+
}
2153+
2154+
def SYCLIntelFPGADisableLoopPipeliningAttrDocs : Documentation {
2155+
let Category = DocCatVariable;
2156+
let Heading = "disable_loop_pipelining";
2157+
let Content = [{
2158+
This attribute applies to a loop. Disables pipelining of the loop data path,
2159+
causing the loop to be executed serially. Cannot be used on the same loop in
2160+
conjunction with max_interleaving, speculated_iterations, max_concurrency, ii
2161+
or ivdep.
2162+
}];
2163+
}
2164+
2165+
def SYCLIntelFPGAMaxInterleavingAttrDocs : Documentation {
2166+
let Category = DocCatVariable;
2167+
let Heading = "max_interleaving";
2168+
let Content = [{
2169+
This attribute applies to a loop. Places a maximum limit N on the number of
2170+
interleaved invocations of an inner loop by an outer loop (note, this does not
2171+
mean that this attribute can only be applied to inner loops in user code - outer
2172+
loops in user code may still be contained in an implicit loop due to NDRange).
2173+
Parameter N is mandatory, and shall be non-negative integer. Cannot be
2174+
used on the same loop in conjunction with disable_loop_pipelining.
2175+
}];
2176+
}
2177+
2178+
def SYCLIntelFPGASpeculatedIterationsAttrDocs : Documentation {
2179+
let Category = DocCatVariable;
2180+
let Heading = "speculated_iterations";
2181+
let Content = [{
2182+
This attribute applies to a loop. Specifies the number of concurrent speculated
2183+
iterations that will be in flight for a loop invocation (i.e. the exit
2184+
condition for these iterations will not have been evaluated yet).
2185+
Parameter N is mandatory, and may either be 0, or a positive integer. Cannot be
2186+
used on the same loop in conjunction with disable_loop_pipelining.
2187+
}];
2188+
}
2189+
21432190
def SYCLDeviceIndirectlyCallableDocs : Documentation {
21442191
let Category = DocCatFunction;
21452192
let Heading = "intel::device_indirectly_callable";

clang/include/clang/Sema/Sema.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1754,7 +1754,7 @@ class Sema final {
17541754
Expr *Expr2);
17551755
template <typename FPGALoopAttrT>
17561756
FPGALoopAttrT *BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A,
1757-
Expr *E);
1757+
Expr *E = nullptr);
17581758

17591759
LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A,
17601760
Expr *E);

clang/lib/CodeGen/CGLoopInfo.cpp

+124-10
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,6 @@ MDNode *LoopInfo::createMetadata(
532532

533533
// Setting ii attribute with an initiation interval
534534
if (Attrs.SYCLIInterval > 0) {
535-
LLVMContext &Ctx = Header->getContext();
536535
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.ii.count"),
537536
ConstantAsMetadata::get(ConstantInt::get(
538537
llvm::Type::getInt32Ty(Ctx), Attrs.SYCLIInterval))};
@@ -541,14 +540,52 @@ MDNode *LoopInfo::createMetadata(
541540

542541
// Setting max_concurrency attribute with number of threads
543542
if (Attrs.SYCLMaxConcurrencyEnable) {
544-
LLVMContext &Ctx = Header->getContext();
545543
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.max_concurrency.count"),
546544
ConstantAsMetadata::get(ConstantInt::get(
547545
llvm::Type::getInt32Ty(Ctx),
548546
Attrs.SYCLMaxConcurrencyNThreads))};
549547
LoopProperties.push_back(MDNode::get(Ctx, Vals));
550548
}
551549

550+
if (Attrs.SYCLLoopCoalesceEnable) {
551+
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.coalesce.enable")};
552+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
553+
}
554+
555+
if (Attrs.SYCLLoopCoalesceNLevels > 0) {
556+
Metadata *Vals[] = {
557+
MDString::get(Ctx, "llvm.loop.coalesce.count"),
558+
ConstantAsMetadata::get(ConstantInt::get(
559+
llvm::Type::getInt32Ty(Ctx), Attrs.SYCLLoopCoalesceNLevels))};
560+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
561+
}
562+
563+
// disable_loop_pipelining attribute corresponds to
564+
// 'llvm.loop.intel.pipelining.enable, i32 0' metadata
565+
if (Attrs.SYCLLoopPipeliningDisable) {
566+
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.intel.pipelining.enable"),
567+
ConstantAsMetadata::get(
568+
ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 0))};
569+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
570+
}
571+
572+
if (Attrs.SYCLMaxInterleavingEnable) {
573+
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.max_interleaving.count"),
574+
ConstantAsMetadata::get(ConstantInt::get(
575+
llvm::Type::getInt32Ty(Ctx),
576+
Attrs.SYCLMaxInterleavingNInvocations))};
577+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
578+
}
579+
580+
if (Attrs.SYCLSpeculatedIterationsEnable) {
581+
Metadata *Vals[] = {
582+
MDString::get(Ctx, "llvm.loop.intel.speculated.iterations.count"),
583+
ConstantAsMetadata::get(
584+
ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
585+
Attrs.SYCLSpeculatedIterationsNIterations))};
586+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
587+
}
588+
552589
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
553590
AdditionalLoopProperties.end());
554591
return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms);
@@ -560,9 +597,13 @@ LoopAttributes::LoopAttributes(bool IsParallel)
560597
UnrollAndJamEnable(LoopAttributes::Unspecified),
561598
VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
562599
InterleaveCount(0), SYCLIInterval(0), SYCLMaxConcurrencyEnable(false),
563-
SYCLMaxConcurrencyNThreads(0), UnrollCount(0), UnrollAndJamCount(0),
564-
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
565-
PipelineInitiationInterval(0) {}
600+
SYCLMaxConcurrencyNThreads(0), SYCLLoopCoalesceEnable(false),
601+
SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false),
602+
SYCLMaxInterleavingEnable(false), SYCLMaxInterleavingNInvocations(0),
603+
SYCLSpeculatedIterationsEnable(false),
604+
SYCLSpeculatedIterationsNIterations(0), UnrollCount(0),
605+
UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified),
606+
PipelineDisabled(false), PipelineInitiationInterval(0) {}
566607

567608
void LoopAttributes::clear() {
568609
IsParallel = false;
@@ -572,6 +613,13 @@ void LoopAttributes::clear() {
572613
SYCLIInterval = 0;
573614
SYCLMaxConcurrencyEnable = false;
574615
SYCLMaxConcurrencyNThreads = 0;
616+
SYCLLoopCoalesceEnable = false;
617+
SYCLLoopCoalesceNLevels = 0;
618+
SYCLLoopPipeliningDisable = false;
619+
SYCLMaxInterleavingEnable = false;
620+
SYCLMaxInterleavingNInvocations = 0;
621+
SYCLSpeculatedIterationsEnable = false;
622+
SYCLSpeculatedIterationsNIterations = 0;
575623
InterleaveCount = 0;
576624
UnrollCount = 0;
577625
UnrollAndJamCount = 0;
@@ -599,9 +647,16 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
599647
if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
600648
Attrs.InterleaveCount == 0 && !Attrs.GlobalSYCLIVDepInfo.hasValue() &&
601649
Attrs.ArraySYCLIVDepInfo.empty() && Attrs.SYCLIInterval == 0 &&
602-
Attrs.SYCLMaxConcurrencyEnable == false && Attrs.UnrollCount == 0 &&
603-
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
604-
Attrs.PipelineInitiationInterval == 0 &&
650+
Attrs.SYCLMaxConcurrencyEnable == false &&
651+
Attrs.SYCLLoopCoalesceEnable == false &&
652+
Attrs.SYCLLoopCoalesceNLevels == 0 &&
653+
Attrs.SYCLLoopPipeliningDisable == false &&
654+
Attrs.SYCLMaxInterleavingEnable == false &&
655+
Attrs.SYCLMaxInterleavingNInvocations == 0 &&
656+
Attrs.SYCLSpeculatedIterationsEnable == false &&
657+
Attrs.SYCLSpeculatedIterationsNIterations == 0 &&
658+
Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 &&
659+
!Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 &&
605660
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
606661
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
607662
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
@@ -903,15 +958,36 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
903958
// n - 'llvm.loop.ii.count, i32 n' metadata will be emitted
904959
// For attribute max_concurrency:
905960
// n - 'llvm.loop.max_concurrency.count, i32 n' metadata will be emitted
961+
// For attribute loop_coalesce:
962+
// without parameter - 'lvm.loop.coalesce.enable' metadata will be emitted
963+
// n - 'llvm.loop.coalesce.count, i32 n' metadata will be emitted
964+
// For attribute disable_loop_pipelining:
965+
// 'llvm.loop.intel.pipelining.enable, i32 0' metadata will be emitted
966+
// For attribute max_interleaving:
967+
// n - 'llvm.loop.max_interleaving.count, i32 n' metadata will be emitted
968+
// For attribute speculated_iterations:
969+
// n - 'llvm.loop.intel.speculated.iterations.count, i32 n' metadata will be
970+
// emitted
906971
for (const auto *Attr : Attrs) {
907972
const SYCLIntelFPGAIVDepAttr *IntelFPGAIVDep =
908973
dyn_cast<SYCLIntelFPGAIVDepAttr>(Attr);
909974
const SYCLIntelFPGAIIAttr *IntelFPGAII =
910975
dyn_cast<SYCLIntelFPGAIIAttr>(Attr);
911976
const SYCLIntelFPGAMaxConcurrencyAttr *IntelFPGAMaxConcurrency =
912977
dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(Attr);
913-
914-
if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency)
978+
const SYCLIntelFPGALoopCoalesceAttr *IntelFPGALoopCoalesce =
979+
dyn_cast<SYCLIntelFPGALoopCoalesceAttr>(Attr);
980+
const SYCLIntelFPGADisableLoopPipeliningAttr
981+
*IntelFPGADisableLoopPipelining =
982+
dyn_cast<SYCLIntelFPGADisableLoopPipeliningAttr>(Attr);
983+
const SYCLIntelFPGAMaxInterleavingAttr *IntelFPGAMaxInterleaving =
984+
dyn_cast<SYCLIntelFPGAMaxInterleavingAttr>(Attr);
985+
const SYCLIntelFPGASpeculatedIterationsAttr *IntelFPGASpeculatedIterations =
986+
dyn_cast<SYCLIntelFPGASpeculatedIterationsAttr>(Attr);
987+
988+
if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency &&
989+
!IntelFPGALoopCoalesce && !IntelFPGADisableLoopPipelining &&
990+
!IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations)
915991
continue;
916992

917993
if (IntelFPGAIVDep) {
@@ -944,6 +1020,44 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
9441020
setSYCLMaxConcurrencyEnable();
9451021
setSYCLMaxConcurrencyNThreads(ArgVal.getSExtValue());
9461022
}
1023+
1024+
if (IntelFPGALoopCoalesce) {
1025+
llvm::APSInt ArgVal(32);
1026+
if (auto *LCE = IntelFPGALoopCoalesce->getNExpr()) {
1027+
bool IsValid = LCE->isIntegerConstantExpr(ArgVal, Ctx);
1028+
assert(IsValid && "Not an integer constant expression");
1029+
(void)IsValid;
1030+
setSYCLLoopCoalesceNLevels(ArgVal.getSExtValue());
1031+
} else {
1032+
setSYCLLoopCoalesceEnable();
1033+
}
1034+
}
1035+
1036+
if (IntelFPGADisableLoopPipelining) {
1037+
setSYCLLoopPipeliningDisable();
1038+
}
1039+
1040+
if (IntelFPGAMaxInterleaving) {
1041+
llvm::APSInt ArgVal(32);
1042+
bool IsValid =
1043+
IntelFPGAMaxInterleaving->getNExpr()->isIntegerConstantExpr(ArgVal,
1044+
Ctx);
1045+
assert(IsValid && "Not an integer constant expression");
1046+
(void)IsValid;
1047+
setSYCLMaxInterleavingEnable();
1048+
setSYCLMaxInterleavingNInvocations(ArgVal.getSExtValue());
1049+
}
1050+
1051+
if (IntelFPGASpeculatedIterations) {
1052+
llvm::APSInt ArgVal(32);
1053+
bool IsValid =
1054+
IntelFPGASpeculatedIterations->getNExpr()->isIntegerConstantExpr(
1055+
ArgVal, Ctx);
1056+
assert(IsValid && "Not an integer constant expression");
1057+
(void)IsValid;
1058+
setSYCLSpeculatedIterationsEnable();
1059+
setSYCLSpeculatedIterationsNIterations(ArgVal.getSExtValue());
1060+
}
9471061
}
9481062

9491063
/// Stage the attributes.

clang/lib/CodeGen/CGLoopInfo.h

+56
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,27 @@ struct LoopAttributes {
113113
/// Value for llvm.loop.max_concurrency.count metadata.
114114
unsigned SYCLMaxConcurrencyNThreads;
115115

116+
/// Flag for llvm.loop.coalesce metadata.
117+
bool SYCLLoopCoalesceEnable;
118+
119+
/// Value for llvm.loop.coalesce.count metadata.
120+
unsigned SYCLLoopCoalesceNLevels;
121+
122+
/// Flag for llvm.loop.intel.pipelining.enable, i32 0 metadata.
123+
bool SYCLLoopPipeliningDisable;
124+
125+
/// Flag for llvm.loop.max_interleaving.count metadata.
126+
bool SYCLMaxInterleavingEnable;
127+
128+
/// Value for llvm.loop.max_interleaving.count metadata.
129+
unsigned SYCLMaxInterleavingNInvocations;
130+
131+
/// Flag for llvm.loop.intel.speculated.iterations.count metadata.
132+
bool SYCLSpeculatedIterationsEnable;
133+
134+
/// Value for llvm.loop.intel.speculated.iterations.count metadata.
135+
unsigned SYCLSpeculatedIterationsNIterations;
136+
116137
/// llvm.unroll.
117138
unsigned UnrollCount;
118139

@@ -333,6 +354,41 @@ class LoopInfoStack {
333354
StagedAttrs.SYCLMaxConcurrencyNThreads = C;
334355
}
335356

357+
/// Set flag of loop_coalesce for the next loop pushed.
358+
void setSYCLLoopCoalesceEnable() {
359+
StagedAttrs.SYCLLoopCoalesceEnable = true;
360+
}
361+
362+
/// Set value of coalesced levels for the next loop pushed.
363+
void setSYCLLoopCoalesceNLevels(unsigned C) {
364+
StagedAttrs.SYCLLoopCoalesceNLevels = C;
365+
}
366+
367+
/// Set flag of disable_loop_pipelining for the next loop pushed.
368+
void setSYCLLoopPipeliningDisable() {
369+
StagedAttrs.SYCLLoopPipeliningDisable = true;
370+
}
371+
372+
/// Set flag of max_interleaving for the next loop pushed.
373+
void setSYCLMaxInterleavingEnable() {
374+
StagedAttrs.SYCLMaxInterleavingEnable = true;
375+
}
376+
377+
/// Set value of max interleaved invocations for the next loop pushed.
378+
void setSYCLMaxInterleavingNInvocations(unsigned C) {
379+
StagedAttrs.SYCLMaxInterleavingNInvocations = C;
380+
}
381+
382+
/// Set flag of speculated_iterations for the next loop pushed.
383+
void setSYCLSpeculatedIterationsEnable() {
384+
StagedAttrs.SYCLSpeculatedIterationsEnable = true;
385+
}
386+
387+
/// Set value of concurrent speculated iterations for the next loop pushed.
388+
void setSYCLSpeculatedIterationsNIterations(unsigned C) {
389+
StagedAttrs.SYCLSpeculatedIterationsNIterations = C;
390+
}
391+
336392
/// Set the unroll count for the next loop pushed.
337393
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
338394

0 commit comments

Comments
 (0)