Skip to content

Commit b970a77

Browse files
committed
Set anonymous struct flag for lambda types only. Small refactoring.
1 parent 1a61c46 commit b970a77

File tree

1 file changed

+41
-37
lines changed

1 file changed

+41
-37
lines changed

clang/lib/Sema/SemaSYCL.cpp

+41-37
Original file line numberDiff line numberDiff line change
@@ -2505,38 +2505,44 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
25052505
return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {});
25062506
}
25072507

2508-
void markParallelWorkItemCalls() {
2509-
if (getKernelInvocationKind(KernelCallerFunc) ==
2510-
InvokeParallelForWorkGroup) {
2511-
// Fetch the kernel object and the associated call operator
2512-
// (of either the lambda or the function object).
2513-
CXXRecordDecl *KernelObj =
2514-
GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl();
2515-
CXXMethodDecl *WGLambdaFn = nullptr;
2516-
if (KernelObj->isLambda())
2517-
WGLambdaFn = KernelObj->getLambdaCallOperator();
2518-
else
2519-
WGLambdaFn = getOperatorParens(KernelObj);
2520-
assert(WGLambdaFn && "non callable object is passed as kernel obj");
2521-
// Mark the function that it "works" in a work group scope:
2522-
// NOTE: In case of parallel_for_work_item the marker call itself is
2523-
// marked with work item scope attribute, here the '()' operator of the
2524-
// object passed as parameter is marked. This is an optimization -
2525-
// there are a lot of locals created at parallel_for_work_group
2526-
// scope before calling the lambda - it is more efficient to have
2527-
// all of them in the private address space rather then sharing via
2528-
// the local AS. See parallel_for_work_group implementation in the
2529-
// SYCL headers.
2530-
if (!WGLambdaFn->hasAttr<SYCLScopeAttr>()) {
2531-
WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit(
2532-
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));
2533-
// Search and mark parallel_for_work_item calls:
2534-
MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext());
2535-
MarkWIScope.TraverseDecl(WGLambdaFn);
2536-
// Now mark local variables declared in the PFWG lambda with work group
2537-
// scope attribute
2538-
addScopeAttrToLocalVars(*WGLambdaFn);
2539-
}
2508+
void annotateHierarchicalParallelismAPICalls() {
2509+
// Is this a hierarchical parallelism kernel invocation?
2510+
if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup)
2511+
return;
2512+
2513+
// Mark kernel object with work-group scope attribute to avoid work-item
2514+
// scope memory allocation.
2515+
KernelObjClone->addAttr(SYCLScopeAttr::CreateImplicit(
2516+
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));
2517+
2518+
// Fetch the kernel object and the associated call operator
2519+
// (of either the lambda or the function object).
2520+
CXXRecordDecl *KernelObj =
2521+
GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl();
2522+
CXXMethodDecl *WGLambdaFn = nullptr;
2523+
if (KernelObj->isLambda())
2524+
WGLambdaFn = KernelObj->getLambdaCallOperator();
2525+
else
2526+
WGLambdaFn = getOperatorParens(KernelObj);
2527+
assert(WGLambdaFn && "non callable object is passed as kernel obj");
2528+
// Mark the function that it "works" in a work group scope:
2529+
// NOTE: In case of parallel_for_work_item the marker call itself is
2530+
// marked with work item scope attribute, here the '()' operator of the
2531+
// object passed as parameter is marked. This is an optimization -
2532+
// there are a lot of locals created at parallel_for_work_group
2533+
// scope before calling the lambda - it is more efficient to have
2534+
// all of them in the private address space rather then sharing via
2535+
// the local AS. See parallel_for_work_group implementation in the
2536+
// SYCL headers.
2537+
if (!WGLambdaFn->hasAttr<SYCLScopeAttr>()) {
2538+
WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit(
2539+
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));
2540+
// Search and mark parallel_for_work_item calls:
2541+
MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext());
2542+
MarkWIScope.TraverseDecl(WGLambdaFn);
2543+
// Now mark local variables declared in the PFWG lambda with work group
2544+
// scope attribute
2545+
addScopeAttrToLocalVars(*WGLambdaFn);
25402546
}
25412547
}
25422548

@@ -2768,13 +2774,11 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
27682774
TypeSourceInfo *TSInfo =
27692775
KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr;
27702776
auto Type = QualType(KernelObj->getTypeForDecl(), 0);
2771-
Type->getAsRecordDecl()->setAnonymousStructOrUnion(true);
2777+
if (KernelObj->isLambda())
2778+
Type->getAsRecordDecl()->setAnonymousStructOrUnion(true);
27722779
VarDecl *VD = VarDecl::Create(
27732780
Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(),
27742781
KernelObj->getIdentifier(), Type, TSInfo, SC_None);
2775-
if (getKernelInvocationKind(KernelCallerFunc) == InvokeParallelForWorkGroup)
2776-
VD->addAttr(
2777-
SYCLScopeAttr::CreateImplicit(Ctx, SYCLScopeAttr::Level::WorkGroup));
27782782
return VD;
27792783
}
27802784

@@ -2856,7 +2860,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
28562860
KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc),
28572861
KernelCallerSrcLoc(KernelCallerFunc->getLocation()) {
28582862
CollectionInitExprs.push_back(createInitListExpr(KernelObj));
2859-
markParallelWorkItemCalls();
2863+
annotateHierarchicalParallelismAPICalls();
28602864

28612865
Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone),
28622866
KernelCallerSrcLoc, KernelCallerSrcLoc);

0 commit comments

Comments
 (0)