diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index c424f81a84e00..14087420587e3 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -265,6 +265,7 @@ static void guardBlockWithIsLeaderCheck(BasicBlock *IfBB, BasicBlock *TrueBB, auto *Ty = LinearLocalID->getType(); Value *Zero = Constant::getNullValue(Ty); IRBuilder<> Builder(IfBB->getContext()); + spirv::genWGBarrier(*(IfBB->getTerminator()), TT); Builder.SetInsertPoint(IfBB->getTerminator()); Value *Cmp = Builder.CreateICmpEQ(LinearLocalID, Zero, "cmpz"); Builder.SetCurrentDebugLocation(DbgLoc); diff --git a/llvm/test/SYCLLowerIR/byval_arg.ll b/llvm/test/SYCLLowerIR/byval_arg.ll index 03c4bb2892f64..df3cc8062a796 100644 --- a/llvm/test/SYCLLowerIR/byval_arg.ll +++ b/llvm/test/SYCLLowerIR/byval_arg.ll @@ -11,6 +11,7 @@ define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !work_group_scope !0 { ; CHECK-LABEL: @wibble( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: diff --git a/llvm/test/SYCLLowerIR/byval_arg_cast.ll b/llvm/test/SYCLLowerIR/byval_arg_cast.ll index 14699c070a673..ef94ccd8e9461 100644 --- a/llvm/test/SYCLLowerIR/byval_arg_cast.ll +++ b/llvm/test/SYCLLowerIR/byval_arg_cast.ll @@ -20,6 +20,7 @@ define dso_local spir_func void @wombat(%struct.widget* byval(%struct.widget) al ; CHECK-LABEL: @wombat( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ1:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMPZ1]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: @@ -31,6 +32,7 @@ define dso_local spir_func void @wombat(%struct.widget* byval(%struct.widget) al ; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.widget* [[ARG]] to i8* ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP2]], i8 addrspace(3)* align 16 bitcast (%struct.widget addrspace(3)* @[[SHADOW]] to i8 addrspace(3)*), i64 32, i1 false) ; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: diff --git a/llvm/test/SYCLLowerIR/convergent.ll b/llvm/test/SYCLLowerIR/convergent.ll index 7c8d70c08d2a9..be9c9bd687776 100644 --- a/llvm/test/SYCLLowerIR/convergent.ll +++ b/llvm/test/SYCLLowerIR/convergent.ll @@ -19,8 +19,8 @@ define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !w ; CHECK-PTX: declare i64 @_Z27__spirv_LocalInvocationId_zv() ; CHECK: ; Function Attrs: convergent -; CHECK: declare void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) #1 +; CHECK: declare void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) #[[ATTR_NUM:[0-9]+]] -; CHECK: attributes #1 = { convergent } +; CHECK: attributes #[[ATTR_NUM]] = { convergent } !0 = !{} diff --git a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll index 615d11190d4cc..fdbdca4686f57 100644 --- a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll +++ b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll @@ -25,6 +25,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: @@ -40,6 +41,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: [[TMP4:%.*]] = bitcast [[STRUCT_BAR]] addrspace(4)* [[ARG]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 8 [[TMP4]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.bar, [[STRUCT_BAR]] addrspace(3)* @[[PFWG_SHADOW]], i32 0, i32 0), i64 1, i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: @@ -50,6 +52,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: br label [[WG_CF]] ; CHECK: wg_cf: ; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] ; CHECK: TestMat: diff --git a/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll b/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll new file mode 100644 index 0000000000000..84575dc77f27c --- /dev/null +++ b/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -LowerWGScope -S | FileCheck %s + +%struct.snork = type { i32 } +%struct.eggs = type { i8 } +%struct.snork.0 = type { %struct.widget, %struct.widget, %struct.widget, %struct.ham } +%struct.widget = type { %struct.wibble } +%struct.wibble = type { [3 x i64] } +%struct.ham = type { %struct.wibble } + +@global = internal addrspace(3) global [12 x %struct.snork] zeroinitializer, align 4 + +; CHECK: @[[WG_NEXT:[a-zA-Z0-9_.]+]] = internal unnamed_addr addrspace(3) global %struct.snork addrspace(4)* undef, align 8 +; CHECK: @[[WG_DONE:[a-zA-Z0-9_.]+]] = internal unnamed_addr addrspace(3) global i1 undef, align 1 + +define internal spir_func void @spam(%struct.eggs addrspace(4)* %arg, %struct.snork.0* byval(%struct.snork.0) align 8 %arg1) align 2 !work_group_scope !0 { +; CHECK: arrayctor.loop: +; CHECK-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi [[STRUCT_SNORK:%.*]] addrspace(4)* [ getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), [[WG_CF:%.*]] ], [ [[WG_VAL_ARRAYCTOR_NEXT:%.*]], [[WG_CF2:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[CMPZ3]], label [[WG_LEADER1:%.*]], label [[WG_CF2]] +; CHECK: wg_leader1: +; CHECK-NEXT: call spir_func void @bar(%struct.snork addrspace(4)* [[ARRAYCTOR_CUR]]) +; CHECK-NEXT: [[ARRAYCTOR_NEXT:%.*]] = getelementptr inbounds [[STRUCT_SNORK]], [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_CUR]], i64 1 +; CHECK-NEXT: store [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_NEXT]], [[STRUCT_SNORK]] addrspace(4)* addrspace(3)* @[[WG_NEXT]], align 8 +; CHECK-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_NEXT]], getelementptr inbounds (%struct.snork, [[STRUCT_SNORK]] addrspace(4)* getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), i64 12) +; CHECK-NEXT: store i1 [[ARRAYCTOR_DONE]], i1 addrspace(3)* @[[WG_DONE]], align 1 +; CHECK-NEXT: br label [[WG_CF2]] +; CHECK: wg_cf2: +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #0 +; CHECK-NEXT: [[WG_VAL_ARRAYCTOR_DONE:%.*]] = load i1, i1 addrspace(3)* @[[WG_DONE]], align 1 +; CHECK-NEXT: [[WG_VAL_ARRAYCTOR_NEXT]] = load [[STRUCT_SNORK]] addrspace(4)*, [[STRUCT_SNORK]] addrspace(4)* addrspace(3)* @[[WG_NEXT]], align 8 +; CHECK-NEXT: br i1 [[WG_VAL_ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP:%.*]] +; CHECK: arrayctor.cont: +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca %struct.eggs addrspace(4)*, align 8 + store %struct.eggs addrspace(4)* %arg, %struct.eggs addrspace(4)** %tmp, align 8 + %tmp2 = load %struct.eggs addrspace(4)*, %struct.eggs addrspace(4)** %tmp, align 8 + br label %arrayctor.loop + +arrayctor.loop: ; preds = %arrayctor.loop, %entry + %arrayctor.cur = phi %struct.snork addrspace(4)* [ getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), %entry ], [ %arrayctor.next, %arrayctor.loop ] + call spir_func void @bar(%struct.snork addrspace(4)* %arrayctor.cur) + %arrayctor.next = getelementptr inbounds %struct.snork, %struct.snork addrspace(4)* %arrayctor.cur, i64 1 + %arrayctor.done = icmp eq %struct.snork addrspace(4)* %arrayctor.next, getelementptr inbounds (%struct.snork, %struct.snork addrspace(4)* getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), i64 12) + br i1 %arrayctor.done, label %arrayctor.cont, label %arrayctor.loop + +arrayctor.cont: ; preds = %arrayctor.loop + ret void +} + +define linkonce_odr dso_local spir_func void @bar(%struct.snork addrspace(4)* %arg) unnamed_addr align 2 { +bb: + %tmp = alloca %struct.snork addrspace(4)*, align 8 + store %struct.snork addrspace(4)* %arg, %struct.snork addrspace(4)** %tmp, align 8 + %tmp1 = load %struct.snork addrspace(4)*, %struct.snork addrspace(4)** %tmp, align 8 + %tmp2 = getelementptr inbounds %struct.snork, %struct.snork addrspace(4)* %tmp1, i32 0, i32 0 + store i32 0, i32 addrspace(4)* %tmp2, align 4 + ret void +} + +!0 = !{}