Skip to content

[SYCL] Add Clang support for FPGA loop fusion function attributes #2876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,40 @@ def SYCLIntelNoGlobalWorkOffset : InheritableAttr {
let PragmaAttributeSupport = 0;
}

def SYCLIntelLoopFuse : InheritableAttr {
let Spellings = [CXX11<"intel","loop_fuse">];
let Args = [ExprArgument<"Value", /*optional=*/ 1>];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [SYCLIntelLoopFuseDocs];
let PragmaAttributeSupport = 0;
let AdditionalMembers = [{
static unsigned getMinValue() {
return 0;
}
static unsigned getMaxValue() {
return 1024*1024;
}
}];
}

def SYCLIntelLoopFuseIndependent : InheritableAttr {
let Spellings = [CXX11<"intel","loop_fuse_independent">];
let Args = [ExprArgument<"Value", /*optional=*/ 1>];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [SYCLIntelLoopFuseIndependentDocs];
let PragmaAttributeSupport = 0;
let AdditionalMembers = [{
static unsigned getMinValue() {
return 0;
}
static unsigned getMaxValue() {
return 1024*1024;
}
}];
}

def C11NoReturn : InheritableAttr {
let Spellings = [Keyword<"_Noreturn">];
let Subjects = SubjectList<[Function], ErrorDiag>;
Expand Down
51 changes: 51 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2608,6 +2608,57 @@ loop should not be fused with any adjacent loop.
}];
}

def SYCLIntelLoopFuseDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::loop_fuse";
let Content = [{
``[[intel::loop_fuse(N)]]`` attribute applies to a function/lambda function. It
is a strong request, to the extent possible, to fuse the loops within the
function, that are contained in at most N-1 other loops within the function. If
the optional parameter N is omitted, it is a strong request, to the extent possible,
to fuse loops within the function that are not contained in any other loop within
the function. This attribute should be passed through to the FPGA backend and
ignored by the emulator. ``[[intel::loop_fuse(N)]]`` should not be propagated to
the caller.

.. code-block:: c++

[[intel::loop_fuse(N)]]
int foo() {}

``[[intel::loop_fuse(N)]]`` takes one optional parameter, a constant integral
expression N with value greater than or equal to 0. The parameter N may be a
template parameter.

}];
}

def SYCLIntelLoopFuseIndependentDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::loop_fuse_independent";
let Content = [{
``[[intel::loop_fuse_independent(N)]]`` attribute applies to a function/lambda function.
It is a strong request, to the extent possible, to fuse the loops within the
function, that are contained in at most N-1 other loops within the function. It also
guarantees that fusion safety analysis can ignore negative-distance dependences between
these loops. If the optional parameter N is omitted, it is a strong request, to the extent
possible, to fuse loops within the function that are not contained in any other loop within
the function. This attribute should be passed through to the FPGA backend and
ignored by the emulator. ``[[intel::loop_fuse_independent(N)]]`` should not be propagated
to the caller.

.. code-block:: c++

[[intel::loop_fuse_independent(N)]]
int foo() {}

``[[intel::loop_fuse_independent(N)]]`` takes one optional parameter, a constant integral
expression N with value greater than or equal to 0. The parameter N may be a template
parameter.

}];
}

def SYCLDeviceIndirectlyCallableDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::device_indirectly_callable";
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,9 @@ class AttributeCommonInfo {
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize ||
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
ParsedAttr == AT_SYCLIntelUseStallEnableClusters)
ParsedAttr == AT_SYCLIntelUseStallEnableClusters ||
ParsedAttr == AT_SYCLIntelLoopFuse ||
ParsedAttr == AT_SYCLIntelLoopFuseIndependent)
return true;

return false;
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -3356,6 +3356,12 @@ class Sema final {
WebAssemblyImportModuleAttr *mergeImportModuleAttr(
Decl *D, const WebAssemblyImportModuleAttr &AL);

SYCLIntelLoopFuseAttr *
mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr,
Expr *E);
SYCLIntelLoopFuseIndependentAttr *mergeSYCLIntelLoopFuseIndependentAttr(
Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E);

void mergeDeclAttributes(NamedDecl *New, Decl *Old,
AvailabilityMergeKind AMK = AMK_Redeclaration);
void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
Expand Down
39 changes: 39 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,45 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (getLangOpts().SYCLIsHost && D && D->hasAttr<SYCLKernelAttr>())
Fn->addFnAttr("sycl_kernel");

if (getLangOpts().SYCL && D && (D->hasAttr<SYCLIntelLoopFuseAttr>())) {
auto *A = D->getAttr<SYCLIntelLoopFuseAttr>();
Expr *E = A->getValue();

// Emit '1' if optional argument is omitted.
llvm::ConstantInt *Value = Builder.getInt32(1);

// Emit argument if specified.
if (E)
Value = Builder.getInt32(
E->getIntegerConstantExpr(D->getASTContext())->getSExtValue());

llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Value),
llvm::ConstantAsMetadata::get(Builder.getInt32(0))};
Fn->setMetadata("loop_fuse",
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
}

if (getLangOpts().SYCL && D &&
(D->hasAttr<SYCLIntelLoopFuseIndependentAttr>())) {
auto *A = D->getAttr<SYCLIntelLoopFuseIndependentAttr>();
Expr *E = A->getValue();

// Emit '1' if optional argument is omitted.
llvm::ConstantInt *Value = Builder.getInt32(1);

// Emit argument if specified.
if (E)
Value = Builder.getInt32(
E->getIntegerConstantExpr(D->getASTContext())->getSExtValue());

llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Value),
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("loop_fuse",
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
}

if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2614,6 +2614,11 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.mergeImportModuleAttr(D, *IMA);
else if (const auto *INA = dyn_cast<WebAssemblyImportNameAttr>(Attr))
NewAttr = S.mergeImportNameAttr(D, *INA);
else if (const auto *LFA = dyn_cast<SYCLIntelLoopFuseAttr>(Attr))
NewAttr = S.mergeSYCLIntelLoopFuseAttr(D, *LFA, LFA->getValue());
else if (const auto *LFIA = dyn_cast<SYCLIntelLoopFuseIndependentAttr>(Attr))
NewAttr =
S.mergeSYCLIntelLoopFuseIndependentAttr(D, *LFIA, LFIA->getValue());
else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));

Expand Down
86 changes: 73 additions & 13 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,19 @@ static bool checkAttrMutualExclusion(Sema &S, Decl *D, const Attr &AL) {
return false;
}

/// Give a warning for duplicate attributes, return true if duplicate.
template <typename AttrType>
static bool checkForDuplicateAttribute(Sema &S, Decl *D,
const ParsedAttr &Attr) {
// Give a warning for duplicates but not if it's one we've implicitly added.
auto *A = D->getAttr<AttrType>();
if (A && !A->isImplicit()) {
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A;
return true;
}
return false;
}

static bool checkDeprecatedSYCLAttributeSpelling(Sema &S,
const ParsedAttr &Attr) {
if (Attr.getScopeName()->isStr("intelfpga"))
Expand Down Expand Up @@ -3091,6 +3104,58 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D,
E);
}

SYCLIntelLoopFuseAttr *
Sema::mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr,
Expr *E) {
if (checkAttrMutualExclusion<SYCLIntelLoopFuseIndependentAttr>(*this, D,
Attr))
return nullptr;

if (D->hasAttr<SYCLIntelLoopFuseAttr>())
return nullptr;

return ::new (Context) SYCLIntelLoopFuseAttr(Context, Attr, E);
}

SYCLIntelLoopFuseIndependentAttr *Sema::mergeSYCLIntelLoopFuseIndependentAttr(
Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E) {
if (checkAttrMutualExclusion<SYCLIntelLoopFuseAttr>(*this, D, Attr))
return nullptr;

if (D->hasAttr<SYCLIntelLoopFuseIndependentAttr>())
return nullptr;

return ::new (Context) SYCLIntelLoopFuseIndependentAttr(Context, Attr, E);
}

// Handles loop_fuse and loop_fuse_independent.
// These attributes are incompatible with eachother.
template <typename AttrType, typename ConflictingAttrType>
static void handleLoopFusionAttr(Sema &S, Decl *D, const ParsedAttr &Attr) {
if (D->isInvalidDecl())
return;

if (checkForDuplicateAttribute<AttrType>(S, D, Attr))
return;

if (checkAttrMutualExclusion<ConflictingAttrType>(S, D, Attr))
return;

unsigned NumArgs = Attr.getNumArgs();
if (NumArgs > 1) {
S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0;
return;
}

// Handle optional attribute argument.
if (Attr.isArgExpr(0))
// Attribute argument specified.
S.AddOneConstantValueAttr<AttrType>(D, Attr, Attr.getArgAsExpr(0));
else
// Attribute argument not specified.
D->addAttr(::new (S.Context) AttrType(S.Context, Attr));
}

static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.hasParsedType()) {
S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1;
Expand Down Expand Up @@ -5256,19 +5321,6 @@ static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D,
AL.getMustBeNull()));
}

/// Give a warning for duplicate attributes, return true if duplicate.
template <typename AttrType>
static bool checkForDuplicateAttribute(Sema &S, Decl *D,
const ParsedAttr &Attr) {
// Give a warning for duplicates but not if it's one we've implicitly added.
auto *A = D->getAttr<AttrType>();
if (A && !A->isImplicit()) {
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A;
return true;
}
return false;
}

static void handleNoGlobalWorkOffsetAttr(Sema &S, Decl *D,
const ParsedAttr &Attr) {
if (S.LangOpts.SYCLIsHost)
Expand Down Expand Up @@ -8388,6 +8440,14 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelUseStallEnableClusters:
handleUseStallEnableClustersAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelLoopFuse:
handleLoopFusionAttr<SYCLIntelLoopFuseAttr,
SYCLIntelLoopFuseIndependentAttr>(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelLoopFuseIndependent:
handleLoopFusionAttr<SYCLIntelLoopFuseIndependentAttr,
SYCLIntelLoopFuseAttr>(S, D, AL);
break;
case ParsedAttr::AT_VecTypeHint:
handleVecTypeHint(S, D, AL);
break;
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
(KernelBody != FD) && !FD->hasAttr<SYCLSimdAttr>())
FD->addAttr(SYCLSimdAttr::CreateImplicit(SemaRef.getASTContext()));

// Attribute "loop_fuse" can be applied explicitly on kernel function.
// Attribute should not be propagated from device functions to kernel
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
}
}

// Attribute "loop_fuse_independent" can be applied explicitly on kernel
// function. Attribute should not be propagated from device functions to
// kernel
if (auto *A = FD->getAttr<SYCLIntelLoopFuseIndependentAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
}
}

// TODO: vec_len_hint should be handled here

CallGraphNode *N = SYCLCG.getNode(FD);
Expand Down Expand Up @@ -3283,6 +3300,8 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLIntelLoopFuseIndependent:
case attr::Kind::SYCLSimd: {
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
!KernelBody->getAttr<SYCLSimdAttr>()) {
Expand Down
15 changes: 14 additions & 1 deletion clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,16 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelSchedulerTargetFmaxMhz, New);
continue;
}
if (const auto *SYCLIntelLoopFuse =
dyn_cast<SYCLIntelLoopFuseAttr>(TmplAttr)) {
instantiateIntelSYCLFunctionAttr<SYCLIntelLoopFuseAttr>(
*this, TemplateArgs, SYCLIntelLoopFuse, New);
continue;
}
if (const auto *SYCLIntelLoopFuseIndependent =
dyn_cast<SYCLIntelLoopFuseIndependentAttr>(TmplAttr)) {
instantiateIntelSYCLFunctionAttr<SYCLIntelLoopFuseIndependentAttr>(
*this, TemplateArgs, SYCLIntelLoopFuseIndependent, New);
if (const auto *SYCLIntelMaxGlobalWorkDim =
dyn_cast<SYCLIntelMaxGlobalWorkDimAttr>(TmplAttr)) {
instantiateIntelSYCLFunctionAttr<SYCLIntelMaxGlobalWorkDimAttr>(
Expand Down Expand Up @@ -6198,7 +6208,10 @@ static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) {
if (S.LangOpts.SYCLIsDevice) {
S.ConstructOpenCLKernel(FD, MC);
} else if (S.LangOpts.SYCLIsHost) {
CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl();
QualType KernelParamTy = (*FD->param_begin())->getType();
const CXXRecordDecl *CRD = (KernelParamTy->isReferenceType()
? KernelParamTy->getPointeeCXXRecordDecl()
: KernelParamTy->getAsCXXRecordDecl());
for (auto *Method : CRD->methods())
if (Method->getOverloadedOperator() == OO_Call &&
!Method->hasAttr<AlwaysInlineAttr>())
Expand Down
Loading