Skip to content

[SYCL] Refactor SYCL kernel object handling in hierarchical parallelism #6212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 45 additions & 37 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2505,38 +2505,44 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {});
}

void markParallelWorkItemCalls() {
if (getKernelInvocationKind(KernelCallerFunc) ==
InvokeParallelForWorkGroup) {
// Fetch the kernel object and the associated call operator
// (of either the lambda or the function object).
CXXRecordDecl *KernelObj =
GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl();
CXXMethodDecl *WGLambdaFn = nullptr;
if (KernelObj->isLambda())
WGLambdaFn = KernelObj->getLambdaCallOperator();
else
WGLambdaFn = getOperatorParens(KernelObj);
assert(WGLambdaFn && "non callable object is passed as kernel obj");
// Mark the function that it "works" in a work group scope:
// NOTE: In case of parallel_for_work_item the marker call itself is
// marked with work item scope attribute, here the '()' operator of the
// object passed as parameter is marked. This is an optimization -
// there are a lot of locals created at parallel_for_work_group
// scope before calling the lambda - it is more efficient to have
// all of them in the private address space rather then sharing via
// the local AS. See parallel_for_work_group implementation in the
// SYCL headers.
if (!WGLambdaFn->hasAttr<SYCLScopeAttr>()) {
WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit(
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));
// Search and mark parallel_for_work_item calls:
MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext());
MarkWIScope.TraverseDecl(WGLambdaFn);
// Now mark local variables declared in the PFWG lambda with work group
// scope attribute
addScopeAttrToLocalVars(*WGLambdaFn);
}
void annotateHierarchicalParallelismAPICalls() {
// Is this a hierarchical parallelism kernel invocation?
if (getKernelInvocationKind(KernelCallerFunc) != InvokeParallelForWorkGroup)
return;

// Mark kernel object with work-group scope attribute to avoid work-item
// scope memory allocation.
KernelObjClone->addAttr(SYCLScopeAttr::CreateImplicit(
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));

// Fetch the kernel object and the associated call operator
// (of either the lambda or the function object).
CXXRecordDecl *KernelObj =
GetSYCLKernelObjectType(KernelCallerFunc)->getAsCXXRecordDecl();
CXXMethodDecl *WGLambdaFn = nullptr;
if (KernelObj->isLambda())
WGLambdaFn = KernelObj->getLambdaCallOperator();
else
WGLambdaFn = getOperatorParens(KernelObj);
assert(WGLambdaFn && "non callable object is passed as kernel obj");
// Mark the function that it "works" in a work group scope:
// NOTE: In case of parallel_for_work_item the marker call itself is
// marked with work item scope attribute, here the '()' operator of the
// object passed as parameter is marked. This is an optimization -
// there are a lot of locals created at parallel_for_work_group
// scope before calling the lambda - it is more efficient to have
// all of them in the private address space rather then sharing via
// the local AS. See parallel_for_work_group implementation in the
// SYCL headers.
if (!WGLambdaFn->hasAttr<SYCLScopeAttr>()) {
WGLambdaFn->addAttr(SYCLScopeAttr::CreateImplicit(
SemaRef.getASTContext(), SYCLScopeAttr::Level::WorkGroup));
// Search and mark parallel_for_work_item calls:
MarkWIScopeFnVisitor MarkWIScope(SemaRef.getASTContext());
MarkWIScope.TraverseDecl(WGLambdaFn);
// Now mark local variables declared in the PFWG lambda with work group
// scope attribute
addScopeAttrToLocalVars(*WGLambdaFn);
}
}

Expand Down Expand Up @@ -2766,11 +2772,13 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
const CXXRecordDecl *KernelObj) {
TypeSourceInfo *TSInfo =
KernelObj->isLambda() ? KernelObj->getLambdaTypeInfo() : nullptr;
VarDecl *VD = VarDecl::Create(
Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(),
KernelObj->getIdentifier(), QualType(KernelObj->getTypeForDecl(), 0),
TSInfo, SC_None);
IdentifierInfo *Ident = KernelObj->getIdentifier();
if (!Ident)
Ident = &Ctx.Idents.get("__SYCLKernel");

VarDecl *VD = VarDecl::Create(
Ctx, DC, KernelObj->getLocation(), KernelObj->getLocation(), Ident,
QualType(KernelObj->getTypeForDecl(), 0), TSInfo, SC_None);
return VD;
}

Expand Down Expand Up @@ -2851,7 +2859,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler {
KernelObj(KernelObj), KernelCallerFunc(KernelCallerFunc),
KernelCallerSrcLoc(KernelCallerFunc->getLocation()) {
CollectionInitExprs.push_back(createInitListExpr(KernelObj));
markParallelWorkItemCalls();
annotateHierarchicalParallelismAPICalls();

Stmt *DS = new (S.Context) DeclStmt(DeclGroupRef(KernelObjClone),
KernelCallerSrcLoc, KernelCallerSrcLoc);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/accessor_inheritance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ int main() {
// CHECK: [[ARG_C]].addr.ascast = addrspacecast ptr [[ARG_C]].addr to ptr addrspace(4)
//
// Lambda object alloca
// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_]+]] = addrspacecast ptr [[KERNEL]] to ptr addrspace(4)
// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[KERNEL]] to ptr addrspace(4)
//
// Kernel argument stores
// CHECK: store i32 [[ARG_A]], ptr addrspace(4) [[ARG_A]].addr.ascast
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ int main() {
// Check alloca for pointer argument
// CHECK: [[MEM_ARG]].addr = alloca ptr addrspace(1)
// Check lambda object alloca
// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon
// CHECK: [[ANONALLOCA:%[a-zA-Z0-9_]+]] = alloca %class.anon
// Check allocas for ranges
// CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id"
// CHECK: [[ANON:%[0-9]+]] = addrspacecast ptr [[ANONALLOCA]] to ptr addrspace(4)
// CHECK: [[ANON:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANONALLOCA]] to ptr addrspace(4)
// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast ptr [[ARANGEA]] to ptr addrspace(4)
// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast ptr [[MRANGEA]] to ptr addrspace(4)
// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast ptr [[OIDA]] to ptr addrspace(4)
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ int main() {
}

// CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{
// CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{[0-9]+}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]]
// CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]]
// CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]]
// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]]
// CHECK: ret void, !dbg [[LINE_C0:![0-9]+]]
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/kernel-param-acc-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int main() {
// CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca ptr addrspace(1), align 8

// CHECK lambda object alloca
// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class.anon, align 4
// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class.anon, align 4

// CHECK allocas for ranges
// CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range"
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ int main() {
// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca ptr addrspace(1), align 8

// Check lambda object alloca
// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class{{.*}}.anon, align 4
// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class{{.*}}.anon, align 4

// Check allocas for ranges
// CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range"
Expand All @@ -53,7 +53,7 @@ int main() {
// CHECK: [[OFFSET2A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::id"

// Check lambda object addrspacecast
// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast ptr %0 to ptr addrspace(4)
// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast ptr %__SYCLKernel to ptr addrspace(4)

// Check addrspacecast for ranges
// CHECK: [[ACC_RANGE1AS:%.*]] = addrspacecast ptr [[ACC_RANGE1A]] to ptr addrspace(4)
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGenSYCL/kernel-param-pod-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ int main() {
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
Expand All @@ -74,8 +74,8 @@ int main() {
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
Expand All @@ -98,8 +98,8 @@ int main() {
// CHECK-SAME:(ptr noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast ptr %[[LOCAL_OBJECTA]] to ptr addrspace(4)

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, ptr addrspace(4) %[[LOCAL_OBJECT]], i32 0, i32 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ int main() {
// CHECK: [[ARG_C]].addr.ascast = addrspacecast i32* [[ARG_C]].addr to i32 addrspace(4)*
//
// Lambda object alloca
// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_]+]] = addrspacecast %class{{.*}}.anon* [[KERNEL]] to %class{{.*}}.anon addrspace(4)*
// CHECK: [[KERNEL_OBJ:%[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon* [[KERNEL]] to %class{{.*}}.anon addrspace(4)*
//
// Kernel argument stores
// CHECK: store i32 [[ARG_A]], i32 addrspace(4)* [[ARG_A]].addr.ascast
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ int main() {
// Check alloca for pointer argument
// CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)*
// Check lambda object alloca
// CHECK: [[ANONALLOCA:%[0-9]+]] = alloca %class.anon
// CHECK: [[ANONALLOCA:%[a-zA-Z0-9_]+]] = alloca %class.anon
// Check allocas for ranges
// CHECK: [[ARANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
// CHECK: [[MRANGEA:%agg.tmp.*]] = alloca %"struct.cl::sycl::range"
// CHECK: [[OIDA:%agg.tmp.*]] = alloca %"struct.cl::sycl::id"
// CHECK: [[ANON:%[0-9]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)*
// CHECK: [[ANON:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANONALLOCA]] to %class.anon addrspace(4)*
// CHECK: [[ARANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[ARANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
// CHECK: [[MRANGET:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::range"* [[MRANGEA]] to %"struct.cl::sycl::range" addrspace(4)*
// CHECK: [[OIDT:%agg.tmp.*]] = addrspacecast %"struct.cl::sycl::id"* [[OIDA]] to %"struct.cl::sycl::id" addrspace(4)*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int main() {
// CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8

// CHECK lambda object alloca
// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class.anon, align 4
// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class.anon, align 4

// CHECK allocas for ranges
// CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ int main() {
// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8

// Check lambda object alloca
// CHECK: [[LOCAL_OBJECTA:%0]] = alloca %class{{.*}}.anon, align 4
// CHECK: [[LOCAL_OBJECTA:%__SYCLKernel]] = alloca %class{{.*}}.anon, align 4

// Check allocas for ranges
// CHECK: [[ACC_RANGE1A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::range"
Expand All @@ -53,7 +53,7 @@ int main() {
// CHECK: [[OFFSET2A:%[a-zA-Z0-9_.]+]] = alloca %"struct.cl::sycl::id"

// Check lambda object addrspacecast
// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast %class{{.*}}.anon* %0 to %class{{.*}}.anon addrspace(4)*
// CHECK: [[LOCAL_OBJECT:%.*]] = addrspacecast %class{{.*}}.anon* %__SYCLKernel to %class{{.*}}.anon addrspace(4)*

// Check addrspacecast for ranges
// CHECK: [[ACC_RANGE1AS:%.*]] = addrspacecast %"struct.cl::sycl::range"* [[ACC_RANGE1A]] to %"struct.cl::sycl::range" addrspace(4)*
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGenSYCL/no_opaque_kernel-param-pod-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ int main() {
// CHECK-SAME:(%struct{{.*}}.__wrapper_class* noundef byval(%struct{{.*}}.__wrapper_class) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon addrspace(4)*
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon addrspace(4)*

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon, %class{{.*}}.anon addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0
Expand All @@ -74,8 +74,8 @@ int main() {
// CHECK-SAME:(%struct{{.*}}.__wrapper_class{{.*}}* noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)*
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)*

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, %class{{.*}}.anon{{.*}} addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0
Expand All @@ -100,8 +100,8 @@ int main() {
// CHECK-SAME:(%struct{{.*}}.__wrapper_class{{.*}}* noundef byval(%struct{{.*}}.__wrapper_class{{.*}}) align 4 %[[ARR_ARG:.*]])

// Check local lambda object alloca
// CHECK: %[[LOCAL_OBJECTA:[0-9]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[0-9]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)*
// CHECK: %[[LOCAL_OBJECTA:[a-zA-Z0-9_]+]] = alloca %class{{.*}}.anon{{.*}}, align 4
// CHECK: %[[LOCAL_OBJECT:[a-zA-Z0-9_.]+]] = addrspacecast %class{{.*}}.anon{{.*}}* %[[LOCAL_OBJECTA]] to %class{{.*}}.anon{{.*}} addrspace(4)*

// Check for Array init loop
// CHECK: %[[LAMBDA_PTR:.+]] = getelementptr inbounds %class{{.*}}.anon{{.*}}, %class{{.*}}.anon{{.*}} addrspace(4)* %[[LOCAL_OBJECT]], i32 0, i32 0
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenSYCL/no_opaque_sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8
// CHECK: [[ANON:%[0-9]+]] = alloca %class.anon, align 8
// CHECK: [[ANONCAST:%[0-9]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)*
// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8
// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)*
// CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8
// CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8*
// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4
Expand All @@ -17,8 +17,8 @@
// Check alloca
// CHECK: [[SAMPLER_ARG_WRAPPED]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8
// CHECK: [[ARG_A]].addr = alloca i32, align 4
// CHECK: [[LAMBDAA:%[0-9]+]] = alloca %class.anon.0, align 8
// CHECK: [[LAMBDA:%[0-9]+]] = addrspacecast %class.anon.0* [[LAMBDAA]] to %class.anon.0 addrspace(4)*
// CHECK: [[LAMBDAA:%[a-zA-Z0-9_]+]] = alloca %class.anon.0, align 8
// CHECK: [[LAMBDA:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon.0* [[LAMBDAA]] to %class.anon.0 addrspace(4)*

// Check argument store
// CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG_WRAPPED]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG_WRAPPED]].addr.ascast, align 8
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ int main() {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_A(%union.MyUnion* noundef byval(%union.MyUnion) align 4 [[MEM_ARG:%[a-zA-Z0-9_]+]])

// Check lambda object alloca
// CHECK: [[LOCAL_OBJECT:%0]] = alloca %class.anon, align 4
// CHECK: [[LOCAL_OBJECT:%__SYCLKernel]] = alloca %class.anon, align 4

// CHECK: [[LOCAL_OBJECTAS:%.*]] = addrspacecast %class.anon* [[LOCAL_OBJECT]] to %class.anon addrspace(4)*
// CHECK: [[MEM_ARGAS:%.*]] = addrspacecast %union.MyUnion* [[MEM_ARG]] to %union.MyUnion addrspace(4)*
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenSYCL/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]])
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8
// CHECK: [[ANON:%[0-9]+]] = alloca %class.anon, align 8
// CHECK: [[ANONCAST:%[0-9]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4)
// CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8
// CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4)
// CHECK: store ptr addrspace(2) [[SAMPLER_ARG]], ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8
// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) #4
// CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[ANONCAST]], i32 0, i32 0
Expand All @@ -16,8 +16,8 @@
// Check alloca
// CHECK: [[SAMPLER_ARG_WRAPPED]].addr = alloca ptr addrspace(2), align 8
// CHECK: [[ARG_A]].addr = alloca i32, align 4
// CHECK: [[LAMBDAA:%[0-9]+]] = alloca %class.anon.0, align 8
// CHECK: [[LAMBDA:%[0-9]+]] = addrspacecast ptr [[LAMBDAA]] to ptr addrspace(4)
// CHECK: [[LAMBDAA:%[a-zA-Z0-9_]+]] = alloca %class.anon.0, align 8
// CHECK: [[LAMBDA:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[LAMBDAA]] to ptr addrspace(4)

// Check argument store
// CHECK: store ptr addrspace(2) [[SAMPLER_ARG_WRAPPED]], ptr addrspace(4) [[SAMPLER_ARG_WRAPPED]].addr.ascast, align 8
Expand Down
6 changes: 1 addition & 5 deletions clang/test/CodeGenSYCL/union-kernel-param.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

// This test checks a kernel argument that is union with both array and non-array fields.

#include "Inputs/sycl.hpp"

using namespace cl::sycl;

union MyUnion {
int FldInt;
char FldChar;
Expand All @@ -31,7 +27,7 @@ int main() {
// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_A(ptr noundef byval(%union.MyUnion) align 4 [[MEM_ARG:%[a-zA-Z0-9_]+]])

// Check lambda object alloca
// CHECK: [[LOCAL_OBJECT:%0]] = alloca %class.anon, align 4
// CHECK: [[LOCAL_OBJECT:%__SYCLKernel]] = alloca %class.anon, align 4

// CHECK: [[LOCAL_OBJECTAS:%.*]] = addrspacecast ptr [[LOCAL_OBJECT]] to ptr addrspace(4)
// CHECK: [[MEM_ARGAS:%.*]] = addrspacecast ptr [[MEM_ARG]] to ptr addrspace(4)
Expand Down
Loading