Skip to content

[SYCL] Add Clang support for FPGA loop fusion function attributes #2877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 22, 2020
Merged
19 changes: 19 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,25 @@ def SYCLIntelNoGlobalWorkOffset : InheritableAttr {
let PragmaAttributeSupport = 0;
}

def SYCLIntelLoopFuse : InheritableAttr {
let Spellings = [CXX11<"intel", "loop_fuse">,
CXX11<"intel", "loop_fuse_independent">];
let Args = [ExprArgument<"Value", /*optional=*/ 1>];
let LangOpts = [SYCLIsDevice, SYCLIsHost];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Accessors = [Accessor<"isIndependent",
[CXX11<"intel", "loop_fuse_independent">]>];
let Documentation = [SYCLIntelLoopFuseDocs];
let AdditionalMembers = [{
static unsigned getMinValue() {
return 0;
}
static unsigned getMaxValue() {
return 1024*1024;
}
}];
}

def C11NoReturn : InheritableAttr {
let Spellings = [Keyword<"_Noreturn">];
let Subjects = SubjectList<[Function], ErrorDiag>;
Expand Down
28 changes: 28 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2608,6 +2608,34 @@ loop should not be fused with any adjacent loop.
}];
}

def SYCLIntelLoopFuseDocs : Documentation {
let Category = DocCatFunction;
let Heading = "loop_fuse, loop_fuse_independent";
let Content = [{
``[[intel::loop_fuse(N)]]`` and ``[[intel::loop_fuse_independent(N)]]`` attributes apply
to a function/lambda function. It is a strong request, to the extent possible, to fuse
the loops within the function, that are contained in at most N-1 other loops within the
function. If the optional parameter N is omitted, it is a strong request, to the extent
possible, to fuse loops within the function that are not contained in any other loop
within the function. ``[[intel::loop_fuse_independent(N)]]`` also guarantees that fusion
safety analysis can ignore negative-distance dependences between these loops.

.. code-block:: c++

[[intel::loop_fuse(N)]]
int foo() {}

[[intel::loop_fuse_independent(N)]]
int foo() {}


``[[intel::loop_fuse(N)]]`` and ``[[intel::loop_fuse_independent(N)]]`` takes one optional
parameter that is a constant unsigned integer expression. The parameter N may be a template
parameter.

}];
}

def SYCLDeviceIndirectlyCallableDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::device_indirectly_callable";
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,8 @@ class AttributeCommonInfo {
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize ||
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
ParsedAttr == AT_SYCLIntelUseStallEnableClusters)
ParsedAttr == AT_SYCLIntelUseStallEnableClusters ||
ParsedAttr == AT_SYCLIntelLoopFuse)
return true;

return false;
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -3363,6 +3363,8 @@ class Sema final {
WebAssemblyImportModuleAttr *mergeImportModuleAttr(
Decl *D, const WebAssemblyImportModuleAttr &AL);

SYCLIntelLoopFuseAttr *
mergeSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E);
void mergeDeclAttributes(NamedDecl *New, Decl *Old,
AvailabilityMergeKind AMK = AMK_Redeclaration);
void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
Expand Down Expand Up @@ -10204,6 +10206,8 @@ class Sema final {
/// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular
/// declaration.
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID);
void addSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *E);

bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
bool checkAllowedSYCLInitializer(VarDecl *VD,
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
if (getLangOpts().SYCLIsHost && D && D->hasAttr<SYCLKernelAttr>())
Fn->addFnAttr("sycl_kernel");

if (getLangOpts().SYCLIsDevice && D) {
if (const auto *A = D->getAttr<SYCLIntelLoopFuseAttr>()) {
Expr *E = A->getValue();
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(
E->getIntegerConstantExpr(D->getASTContext())->getZExtValue())),
llvm::ConstantAsMetadata::get(
A->isIndependent() ? Builder.getInt32(1) : Builder.getInt32(0))};
Fn->setMetadata("loop_fuse",
llvm::MDNode::get(getLLVMContext(), AttrMDArgs));
}
}

if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) {
// Add metadata for a kernel function.
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Sema/SemaDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2614,6 +2614,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.mergeImportModuleAttr(D, *IMA);
else if (const auto *INA = dyn_cast<WebAssemblyImportNameAttr>(Attr))
NewAttr = S.mergeImportNameAttr(D, *INA);
else if (const auto *LFA = dyn_cast<SYCLIntelLoopFuseAttr>(Attr))
NewAttr = S.mergeSYCLIntelLoopFuseAttr(D, *LFA, LFA->getValue());
else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));

Expand Down
87 changes: 87 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3091,6 +3091,90 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D,
E);
}

SYCLIntelLoopFuseAttr *
Sema::mergeSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *E) {

if (const auto ExistingAttr = D->getAttr<SYCLIntelLoopFuseAttr>()) {
// [[intel::loop_fuse]] and [[intel::loop_fuse_independent]] are
// incompatible.
// FIXME: If additional spellings are provided for this attribute,
// this code will do the wrong thing.
if (ExistingAttr->getAttributeSpellingListIndex() !=
CI.getAttributeSpellingListIndex()) {
Diag(CI.getLoc(), diag::err_attributes_are_not_compatible)
<< CI << ExistingAttr;
Diag(ExistingAttr->getLocation(), diag::note_conflicting_attribute);
return nullptr;
}

if (!E->isValueDependent()) {
Optional<llvm::APSInt> ArgVal = E->getIntegerConstantExpr(Context);
Optional<llvm::APSInt> ExistingArgVal =
ExistingAttr->getValue()->getIntegerConstantExpr(Context);

assert(ArgVal && ExistingArgVal &&
"Argument should be an integer constant expression");
// Compare attribute argument value and warn if there is a mismatch.
if (ArgVal->getExtValue() != ExistingArgVal->getExtValue())
Diag(ExistingAttr->getLoc(), diag::warn_duplicate_attribute)
<< ExistingAttr;
}

// If there is no mismatch, silently ignore duplicate attribute.
return nullptr;
}
return ::new (Context) SYCLIntelLoopFuseAttr(Context, CI, E);
}

static bool checkSYCLIntelLoopFuseArgument(Sema &S,
const AttributeCommonInfo &CI,
Expr *E) {
// Dependent expressions are checked when instantiated.
if (E->isValueDependent())
return false;

Optional<llvm::APSInt> ArgVal = E->getIntegerConstantExpr(S.Context);
if (!ArgVal) {
S.Diag(E->getExprLoc(), diag::err_attribute_argument_type)
<< CI << AANT_ArgumentIntegerConstant << E->getSourceRange();
return true;
}

SYCLIntelLoopFuseAttr TmpAttr(S.Context, CI, E);
ExprResult ICE;

return S.checkRangedIntegralArgument<SYCLIntelLoopFuseAttr>(E, &TmpAttr, ICE);
}

void Sema::addSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI,
Expr *E) {
assert(E && "argument has unexpected null value");

if (checkSYCLIntelLoopFuseArgument(*this, CI, E))
return;

// Attribute should not be added during host compilation.
if (getLangOpts().SYCLIsHost)
return;

SYCLIntelLoopFuseAttr *NewAttr = mergeSYCLIntelLoopFuseAttr(D, CI, E);

if (NewAttr)
D->addAttr(NewAttr);
}

// Handles [[intel::loop_fuse]] and [[intel::loop_fuse_independent]].
static void handleLoopFuseAttr(Sema &S, Decl *D, const ParsedAttr &Attr) {
// Default argument value is set to 1.
Expr *E = Attr.isArgExpr(0)
? Attr.getArgAsExpr(0)
: IntegerLiteral::Create(S.Context, llvm::APInt(32, 1),
S.Context.IntTy, Attr.getLoc());

S.addSYCLIntelLoopFuseAttr(D, Attr, E);
}

static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!AL.hasParsedType()) {
S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1;
Expand Down Expand Up @@ -8379,6 +8463,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelUseStallEnableClusters:
handleUseStallEnableClustersAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelLoopFuse:
handleLoopFuseAttr(S, D, AL);
break;
case ParsedAttr::AT_VecTypeHint:
handleVecTypeHint(S, D, AL);
break;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
(KernelBody != FD) && !FD->hasAttr<SYCLSimdAttr>())
FD->addAttr(SYCLSimdAttr::CreateImplicit(SemaRef.getASTContext()));

// Attribute "loop_fuse" can be applied explicitly on kernel function.
// Attribute should not be propagated from device functions to kernel.
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
}
}

// TODO: vec_len_hint should be handled here

CallGraphNode *N = SYCLCG.getNode(FD);
Expand Down Expand Up @@ -3335,6 +3343,7 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLSimd: {
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
!KernelBody->getAttr<SYCLSimdAttr>()) {
Expand Down
21 changes: 20 additions & 1 deletion clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,16 @@ static void instantiateSYCLIntelPipeIOAttr(
S.addSYCLIntelPipeIOAttr(New, *Attr, Result.getAs<Expr>());
}

static void instantiateSYCLIntelLoopFuseAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLIntelLoopFuseAttr *Attr, Decl *New) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(Attr->getValue(), TemplateArgs);
if (!Result.isInvalid())
S.addSYCLIntelLoopFuseAttr(New, *Attr, Result.getAs<Expr>());
}

template <typename AttrName>
static void instantiateIntelSYCLFunctionAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
Expand Down Expand Up @@ -775,6 +785,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelMaxGlobalWorkDim, New);
continue;
}
if (const auto *SYCLIntelLoopFuse =
dyn_cast<SYCLIntelLoopFuseAttr>(TmplAttr)) {
instantiateSYCLIntelLoopFuseAttr(*this, TemplateArgs, SYCLIntelLoopFuse,
New);
continue;
}
if (const auto *SYCLIntelNoGlobalWorkOffset =
dyn_cast<SYCLIntelNoGlobalWorkOffsetAttr>(TmplAttr)) {
instantiateIntelSYCLFunctionAttr<SYCLIntelNoGlobalWorkOffsetAttr>(
Expand Down Expand Up @@ -6204,7 +6220,10 @@ static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) {
if (S.LangOpts.SYCLIsDevice) {
S.ConstructOpenCLKernel(FD, MC);
} else if (S.LangOpts.SYCLIsHost) {
CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl();
QualType KernelParamTy = (*FD->param_begin())->getType();
const CXXRecordDecl *CRD = (KernelParamTy->isReferenceType()
? KernelParamTy->getPointeeCXXRecordDecl()
: KernelParamTy->getAsCXXRecordDecl());
for (auto *Method : CRD->methods())
if (Method->getOverloadedOperator() == OO_Call &&
!Method->hasAttr<AlwaysInlineAttr>())
Expand Down
52 changes: 52 additions & 0 deletions clang/test/CodeGenSYCL/loop_fuse_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

[[intel::loop_fuse(5)]] void foo() {}

template <int SIZE>
class KernelFunctor5 {
public:
[[intel::loop_fuse(SIZE)]] void operator()() const {}
};

void bar() {

q.submit([&](handler &h) {
// Test template argument.
KernelFunctor5<5> f5;
h.single_task<class kernel_name_1>(f5);

// Test different argument sizes.
// Emit 1 if there is no argument.
h.single_task<class kernel_name_2>(
[]() [[intel::loop_fuse]]{});
h.single_task<class kernel_name_3>(
[]() [[intel::loop_fuse(0)]]{});
h.single_task<class kernel_name_4>(
[]() [[intel::loop_fuse(1)]]{});
h.single_task<class kernel_name_5>(
[]() [[intel::loop_fuse(10)]]{});

// Test attribute is not propagated.
h.single_task<class kernel_name_6>(
[]() { foo(); });
});
}

// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LF5:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LF1:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LF0:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LF1]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LF10:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"()
// CHECK-NOT: !loop_fuse
// CHECK-SAME: {
// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LF5]]
// CHECK: ![[LF5]] = !{i32 5, i32 0}
// CHECK: ![[LF1]] = !{i32 1, i32 0}
// CHECK: ![[LF0]] = !{i32 0, i32 0}
// CHECK: ![[LF10]] = !{i32 10, i32 0}
52 changes: 52 additions & 0 deletions clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

[[intel::loop_fuse_independent(5)]] void foo() {}

template <int SIZE>
class KernelFunctor5 {
public:
[[intel::loop_fuse_independent(SIZE)]] void operator()() const {}
};

void bar() {

q.submit([&](handler &h) {
// Test template argument.
KernelFunctor5<5> f5;
h.single_task<class kernel_name_1>(f5);

// Test different argument sizes.
// Emit 1 if there is no argument.
h.single_task<class kernel_name_2>(
[]() [[intel::loop_fuse_independent]]{});
h.single_task<class kernel_name_3>(
[]() [[intel::loop_fuse_independent(0)]]{});
h.single_task<class kernel_name_4>(
[]() [[intel::loop_fuse_independent(1)]]{});
h.single_task<class kernel_name_5>(
[]() [[intel::loop_fuse_independent(10)]]{});

// Test attribute is not propagated.
h.single_task<class kernel_name_6>(
[]() { foo(); });
});
}

// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LFI5:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LFI1:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LFI0:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LFI1]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LFI10:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"()
// CHECK-NOT: !loop_fuse
// CHECK-SAME: {
// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LFI5]]
// CHECK: ![[LFI5]] = !{i32 5, i32 1}
// CHECK: ![[LFI1]] = !{i32 1, i32 1}
// CHECK: ![[LFI0]] = !{i32 0, i32 1}
// CHECK: ![[LFI10]] = !{i32 10, i32 1}
Loading