@@ -685,7 +685,7 @@ static void fixupPrivateMemoryPFWILambdaCaptures(CallInst *PFWICall) {
685
685
686
686
// Go through "byval" parameters which are passed as AS(0) pointers
687
687
// and: (1) create local shadows for them (2) and initialize them from the
688
- // leader's copy and (3) replace usages with pointer to the shadow
688
+ // leader's copy and (3) materialize the value in the local variable before use
689
689
//
690
690
// Do the same for 'this' pointer which points to PFWG lamda object which is
691
691
// allocated in the caller. Caller is a kernel function which is generated by
@@ -707,7 +707,7 @@ static void sharePFWGPrivateObjects(Function &F, const Triple &TT) {
707
707
BasicBlock *LeaderBB = EntryBB->splitBasicBlock (SplitPoint, " leader" );
708
708
BasicBlock *MergeBB = LeaderBB->splitBasicBlock (&LeaderBB->front (), " merge" );
709
709
710
- // 1) rewire the above basic blocks so that LeaderBB is executed only for the
710
+ // Rewire the above basic blocks so that LeaderBB is executed only for the
711
711
// leader workitem
712
712
guardBlockWithIsLeaderCheck (EntryBB, LeaderBB, MergeBB,
713
713
EntryBB->back ().getDebugLoc (), TT);
@@ -719,50 +719,34 @@ static void sharePFWGPrivateObjects(Function &F, const Triple &TT) {
719
719
IRBuilder<> Builder (Ctx);
720
720
Builder.SetInsertPoint (&LeaderBB->front ());
721
721
722
- // 2) create the shared copy - "shadow" - for current arg
722
+ // Create the shared copy - "shadow" - for current arg
723
723
GlobalVariable *Shadow = nullptr ;
724
- Value *RepVal = nullptr ;
725
724
if (Arg.hasByValAttr ()) {
726
725
assert (Arg.getType ()->getPointerAddressSpace () ==
727
726
asUInt (spirv::AddrSpace::Private));
728
727
T = Arg.getParamByValType ();
729
728
Shadow = spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
730
- RepVal = Shadow;
731
- if (TT.isNVPTX ()) {
732
- // For NVPTX target address space inference for kernel arguments and
733
- // allocas is happening in the backend (NVPTXLowerArgs and
734
- // NVPTXLowerAlloca passes). After the frontend these pointers are in
735
- // LLVM default address space 0 which is the generic address space for
736
- // NVPTX target.
737
- assert (Arg.getType ()->getPointerAddressSpace () == 0 );
738
-
739
- // Cast a pointer in the shared address space to the generic address
740
- // space.
741
- RepVal = ConstantExpr::getPointerBitCastOrAddrSpaceCast (Shadow,
742
- Arg.getType ());
743
- }
744
729
}
745
730
// Process 'this' pointer which points to PFWG lambda object
746
731
else if (Arg.getArgNo () == 0 ) {
747
732
PointerType *PtrT = dyn_cast<PointerType>(Arg.getType ());
748
733
assert (PtrT && " Expected this pointer as the first argument" );
749
734
T = PtrT->getElementType ();
750
735
Shadow = spirv::createWGLocalVariable (*F.getParent (), T, " ArgShadow" );
751
- RepVal =
752
- Builder.CreatePointerBitCastOrAddrSpaceCast (Shadow, Arg.getType ());
753
736
}
754
737
755
- if (!Shadow || !RepVal )
738
+ if (!Shadow)
756
739
continue ;
757
740
758
- // 3) replace argument with shadow in all uses
759
- for (auto *U : Arg.users ())
760
- U->replaceUsesOfWith (&Arg, RepVal);
761
-
762
741
copyBetweenPrivateAndShadow (&Arg, Shadow, Builder,
763
742
true /* private->shadow*/ );
743
+ // Materialize the value in the local variable before use
744
+ Builder.SetInsertPoint (&MergeBB->front ());
745
+ copyBetweenPrivateAndShadow (&Arg, Shadow, Builder,
746
+ false /* shadow->private*/ );
764
747
}
765
- // 5) make sure workers use up-to-date shared values written by the leader
748
+ // Insert barrier to make sure workers use up-to-date shared values written by
749
+ // the leader
766
750
spirv::genWGBarrier (MergeBB->front (), TT);
767
751
}
768
752
0 commit comments