From 0d8fedba3c256163cd1c0a1e93ba43ce5384e0dc Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 20 Feb 2020 10:59:55 -0800 Subject: [PATCH] [SYCL] LowerWGScope pass should not be skipped when -O0 is used LowerWGScope pass performs required transformations to enable hierarchical parallelism semantics. This pass should not be skipped even if optimizations are disabled. Also some typos in the comments are fixed. Signed-off-by: Artur Gainullin --- .../lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp | 21 ++++++++----------- sycl/test/hier_par/hier_par_wgscope.cpp | 6 ++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp b/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp index 76d31e76a75b2..04400ab01a45c 100644 --- a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp +++ b/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp @@ -40,10 +40,10 @@ // local "shadow" variable. Before each PFWI invocation leader WI stores its // private copy of the variable into the shadow (under "is leader" guard), then // all WIs (ouside of "is leader" guard) load the shadow value into their -// private copies ("materialize" the private copy). This works becase these +// private copies ("materialize" the private copy). This works because these // variables are uniform - i.e. have the same value in all WIs and are not // changed within PFWI. The only exceptions are captures of private_memory -// isntances - see next. +// instances - see next. // ** Kind 1: // Even though WG-scope locals are supposed to be uniform, there is one // exception - capture of local of kind 1. It is always captured by non-const @@ -52,7 +52,7 @@ // of kind 1 variable's alloca is stored within the PFWI lambda. // Materialization of the lambda object value writes result of alloca of the // leader WI's private variable into the private copy of the lambda object, -// which is wrong. So for tese variables this pass adds a write of the private +// which is wrong. So for these variables this pass adds a write of the private // variable's address into the private copy of the lambda object right after its // materialization: // if (is_leader()) @@ -120,9 +120,6 @@ class SYCLLowerWGScopeLegacyPass : public FunctionPass { // run the LowerWGScope pass on the specified module bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - FunctionAnalysisManager FAM; auto PA = Impl.run(F, FAM); return !PA.areAllPreserved(); @@ -479,7 +476,7 @@ static void materializeLocalsInWIScopeBlocksImpl( // Checks if there is a need to materialize value of given local in given work // item-scope basic block. static bool localMustBeMaterialized(const AllocaInst *L, const BasicBlock &BB) { - // TODO this is overly convervative - see speculations below. + // TODO this is overly conservative - see speculations below. return true; } @@ -745,9 +742,9 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, for (; I->getOpcode() == Instruction::Alloca; I = I->getNextNode()) { auto *AllocaI = dyn_cast(I); // Allocas marked with "work_item_scope" are those originating from - // cl::sycl::private_memory variables, which must in private. No - // shadows/materialization is needed for them because they can be updated - // only within PFWIs + // cl::sycl::private_memory variables, which must be in private memory. + // No shadows/materialization is needed for them because they can be + // updated only within PFWIs if (!AllocaI->getMetadata(WI_SCOPE_MD)) Allocas.insert(AllocaI); } @@ -801,7 +798,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, } // There can be allocas not corresponding to any variable declared in user // code but generated by the compiler - e.g. for non-trivially typed - // parameters passed by by value. There can be WG scope stores into such + // parameters passed by value. There can be WG scope stores into such // allocas, which need to be made visible to all WIs. This is done via // creating a "shadow" workgroup-shared variable and using it to propagate // the value of the alloca'ed variable to worker WIs from the leader. @@ -815,7 +812,7 @@ PreservedAnalyses SYCLLowerWGScopePass::run(Function &F, // Now materialize the locals: materializeLocalsInWIScopeBlocks(Allocas, WIScopeBBs); - // Fixup captured addresses of private_memory isntances in current WI + // Fixup captured addresses of private_memory instances in current WI for (auto *PFWICall : PFWICalls) fixupPrivateMemoryPFWILambdaCaptures(PFWICall); diff --git a/sycl/test/hier_par/hier_par_wgscope.cpp b/sycl/test/hier_par/hier_par_wgscope.cpp index 49dfa0cb33016..aafe02fdfec01 100644 --- a/sycl/test/hier_par/hier_par_wgscope.cpp +++ b/sycl/test/hier_par/hier_par_wgscope.cpp @@ -12,6 +12,12 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -O0 -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + // This test checks correctness of hierarchical kernel execution when there is // code and data in the work group scope.