diff --git a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp b/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp index 76d31e76a75b2..04400ab01a45c 100644 --- a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp +++ b/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp @@ -40,10 +40,10 @@ // local "shadow" variable. Before each PFWI invocation leader WI stores its // private copy of the variable into the shadow (under "is leader" guard), then // all WIs (ouside of "is leader" guard) load the shadow value into their -// private copies ("materialize" the private copy). This works becase these +// private copies ("materialize" the private copy). This works because these // variables are uniform - i.e. have the same value in all WIs and are not // changed within PFWI. The only exceptions are captures of private_memory -// isntances - see next. +// instances - see next. // ** Kind 1: // Even though WG-scope locals are supposed to be uniform, there is one // exception - capture of local of kind 1. It is always captured by non-const @@ -52,7 +52,7 @@ // of kind 1 variable's alloca is stored within the PFWI lambda. // Materialization of the lambda object value writes result of alloca of the // leader WI's private variable into the private copy of the lambda object, -// which is wrong. So for tese variables this pass adds a write of the private +// which is wrong. So for these variables this pass adds a write of the private // variable's address into the private copy of the lambda object right after its // materialization: // if (is_leader()) @@ -120,9 +120,6 @@ class SYCLLowerWGScopeLegacyPass : public FunctionPass { // run the LowerWGScope pass on the specified module bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - FunctionAnalysisManager FAM; auto PA = Impl.run(F, FAM); return !PA.areAllPreserved(); @@ -479,7 +476,7 @@ static void materializeLocalsInWIScopeBlocksImpl( // Checks if there is a need to materialize value of given local in given work // item-scope basic block. static bool localMustBeMaterialized(const AllocaInst *L, const BasicBlock &BB) { - // TODO this is overly convervative - see speculations below. + // TODO this is overly conservative - see speculations below. return true; } @@ -745,9 +742,9 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, for (; I->getOpcode() == Instruction::Alloca; I = I->getNextNode()) { auto *AllocaI = dyn_cast(I); // Allocas marked with "work_item_scope" are those originating from - // cl::sycl::private_memory variables, which must in private. No - // shadows/materialization is needed for them because they can be updated - // only within PFWIs + // cl::sycl::private_memory variables, which must be in private memory. + // No shadows/materialization is needed for them because they can be + // updated only within PFWIs if (!AllocaI->getMetadata(WI_SCOPE_MD)) Allocas.insert(AllocaI); } @@ -801,7 +798,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, } // There can be allocas not corresponding to any variable declared in user // code but generated by the compiler - e.g. for non-trivially typed - // parameters passed by by value. There can be WG scope stores into such + // parameters passed by value. There can be WG scope stores into such // allocas, which need to be made visible to all WIs. This is done via // creating a "shadow" workgroup-shared variable and using it to propagate // the value of the alloca'ed variable to worker WIs from the leader. @@ -815,7 +812,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, // Now materialize the locals: materializeLocalsInWIScopeBlocks(Allocas, WIScopeBBs); - // Fixup captured addresses of private_memory isntances in current WI + // Fixup captured addresses of private_memory instances in current WI for (auto *PFWICall : PFWICalls) fixupPrivateMemoryPFWILambdaCaptures(PFWICall); diff --git a/sycl/test/hier_par/hier_par_wgscope.cpp b/sycl/test/hier_par/hier_par_wgscope.cpp index 49dfa0cb33016..aafe02fdfec01 100644 --- a/sycl/test/hier_par/hier_par_wgscope.cpp +++ b/sycl/test/hier_par/hier_par_wgscope.cpp @@ -12,6 +12,12 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -O0 -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + // This test checks correctness of hierarchical kernel execution when there is // code and data in the work group scope.