Skip to content

Commit 0b45aea

Browse files
committed
[CIR][Transform] Add constant load elimination pass
This patch tries to give a simple initial implementation for eliminating redundant loads of constant objects, an idea originally posted by OfekShilon. Specifically, this patch consists of two parts: * It adds a new unit attribute `const` to the `cir.alloca` operation. Presence of this attribute indicates that the alloca-ed object is declared `const` in the input source program. CIRGen is updated accordingly to start emitting this new attribute. * It adds a new pass to the CIR optimization pipeline. This new pass runs on function level, and identifies and eliminates all redundant loads of a constant alloca-ed object.
1 parent 52323c1 commit 0b45aea

File tree

9 files changed

+250
-7
lines changed

9 files changed

+250
-7
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

+5
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,9 @@ def AllocaOp : CIR_Op<"alloca", [
469469
cases, the first use contains the initialization (a cir.store, a cir.call
470470
to a ctor, etc).
471471

472+
The presence of the `const` attribute indicates that the local variable is
473+
declared with C/C++ `const` keyword.
474+
472475
The `dynAllocSize` specifies the size to dynamically allocate on the stack
473476
and ignores the allocation size based on the original type. This is useful
474477
when handling VLAs and is omitted when declaring regular local variables.
@@ -492,6 +495,7 @@ def AllocaOp : CIR_Op<"alloca", [
492495
TypeAttr:$allocaType,
493496
StrAttr:$name,
494497
UnitAttr:$init,
498+
UnitAttr:$constant,
495499
ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$alignment,
496500
OptionalAttr<ASTVarDeclInterface>:$ast
497501
);
@@ -529,6 +533,7 @@ def AllocaOp : CIR_Op<"alloca", [
529533
($dynAllocSize^ `:` type($dynAllocSize) `,`)?
530534
`[` $name
531535
(`,` `init` $init^)?
536+
(`,` `const` $constant^)?
532537
`]`
533538
(`ast` $ast^)? attr-dict
534539
}];

clang/include/clang/CIR/Dialect/Passes.h

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ std::unique_ptr<Pass> createLifetimeCheckPass(ArrayRef<StringRef> remark,
2828
clang::ASTContext *astCtx);
2929
std::unique_ptr<Pass> createCIRCanonicalizePass();
3030
std::unique_ptr<Pass> createCIRSimplifyPass();
31+
std::unique_ptr<Pass> createConstLoadEliminationPass();
3132
std::unique_ptr<Pass> createDropASTPass();
3233
std::unique_ptr<Pass> createSCFPreparePass();
3334
std::unique_ptr<Pass> createLoweringPreparePass();

clang/include/clang/CIR/Dialect/Passes.td

+18
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,24 @@ def CIRSimplify : Pass<"cir-simplify"> {
4343
let dependentDialects = ["cir::CIRDialect"];
4444
}
4545

46+
def ConstLoadElimination : Pass<"cir-const-load-elimination"> {
47+
let summary = "Eliminate redundant loads of constant objects";
48+
let description = [{
49+
This pass eliminates those redundant loads that load object known to be
50+
constant.
51+
52+
The value of an object declared with `const` cannot change during the
53+
object's whole lifetime. Thus multiple loads of a `const` object can be
54+
merged into a single load when the result load dominates all the original
55+
loads.
56+
57+
This pass is a function pass and it processes a single function within a
58+
single run.
59+
}];
60+
let constructor = "mlir::createConstLoadEliminationPass()";
61+
let dependentDialects = ["cir::CIRDialect"];
62+
}
63+
4664
def LifetimeCheck : Pass<"cir-lifetime-check"> {
4765
let summary = "Check lifetime safety and generate diagnostics";
4866
let description = [{

clang/lib/CIR/CodeGen/CIRGenFunction.cpp

+8-6
Original file line numberDiff line numberDiff line change
@@ -305,10 +305,11 @@ mlir::LogicalResult CIRGenFunction::declare(const Decl *var, QualType ty,
305305
assert(!symbolTable.count(var) && "not supposed to be available just yet");
306306

307307
addr = buildAlloca(namedVar->getName(), ty, loc, alignment);
308-
if (isParam) {
309-
auto allocaOp = cast<mlir::cir::AllocaOp>(addr.getDefiningOp());
308+
auto allocaOp = cast<mlir::cir::AllocaOp>(addr.getDefiningOp());
309+
if (isParam)
310310
allocaOp.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
311-
}
311+
if (ty.isConstQualified())
312+
allocaOp.setConstantAttr(mlir::UnitAttr::get(builder.getContext()));
312313

313314
symbolTable.insert(var, addr);
314315
return mlir::success();
@@ -324,10 +325,11 @@ mlir::LogicalResult CIRGenFunction::declare(Address addr, const Decl *var,
324325
assert(!symbolTable.count(var) && "not supposed to be available just yet");
325326

326327
addrVal = addr.getPointer();
327-
if (isParam) {
328-
auto allocaOp = cast<mlir::cir::AllocaOp>(addrVal.getDefiningOp());
328+
auto allocaOp = cast<mlir::cir::AllocaOp>(addrVal.getDefiningOp());
329+
if (isParam)
329330
allocaOp.setInitAttr(mlir::UnitAttr::get(builder.getContext()));
330-
}
331+
if (ty.isConstQualified())
332+
allocaOp.setConstantAttr(mlir::UnitAttr::get(builder.getContext()));
331333

332334
symbolTable.insert(var, addrVal);
333335
return mlir::success();

clang/lib/CIR/CodeGen/CIRPasses.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,10 @@ mlir::LogicalResult runCIRToCIRPasses(
6666
pm.addPass(std::move(libOpPass));
6767
}
6868

69-
if (enableCIRSimplify)
69+
if (enableCIRSimplify) {
7070
pm.addPass(mlir::createCIRSimplifyPass());
71+
pm.addPass(mlir::createConstLoadEliminationPass());
72+
}
7173

7274
pm.addPass(mlir::createLoweringPreparePass(&astCtx));
7375

clang/lib/CIR/Dialect/Transforms/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_clang_library(MLIRCIRTransforms
55
LoweringPrepare.cpp
66
CIRCanonicalize.cpp
77
CIRSimplify.cpp
8+
ConstLoadElimination.cpp
89
DropAST.cpp
910
IdiomRecognizer.cpp
1011
LibOpt.cpp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
//===- ConstLoadElimination.cpp - performs redundant load elimination -----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "PassDetail.h"
10+
#include "mlir/IR/Dominance.h"
11+
#include "mlir/Pass/Pass.h"
12+
#include "clang/CIR/Dialect/IR/CIRDialect.h"
13+
#include "clang/CIR/Dialect/Passes.h"
14+
15+
using namespace mlir;
16+
using namespace cir;
17+
18+
namespace {
19+
20+
void processConstAlloca(DominanceInfo &dom, AllocaOp alloca) {
21+
assert(alloca.getConstant() && "must be a constant alloca");
22+
23+
// First find out all loads and stores to the alloca-ed object.
24+
SmallVector<LoadOp> allLoads;
25+
SmallVector<StoreOp> allStores;
26+
for (Operation *user : alloca->getUsers()) {
27+
if (auto load = dyn_cast<LoadOp>(user))
28+
allLoads.push_back(load);
29+
else if (auto store = dyn_cast<StoreOp>(user))
30+
allStores.push_back(store);
31+
}
32+
33+
// For each non-volatile load:
34+
// - If there is a load operation that properly dominates it, replace the
35+
// load with that dominator load. This process is "recursive": if load A
36+
// dominates load B and load B dominates load C, we should eventually
37+
// replace load C with load A.
38+
// - If there is a store operation that dominates it, replace the load with
39+
// the stored value.
40+
41+
// Record the "immediate dominator" load of a load. During the process if we
42+
// find a store dominates the load, replace that load directly.
43+
DenseMap<LoadOp, LoadOp> idomLoad;
44+
for (LoadOp &load : allLoads) {
45+
// Try to replace the load with a previous store directly.
46+
// Note that volatile loads are not candidates for elimination.
47+
if (!load.getIsVolatile()) {
48+
for (StoreOp store : allStores) {
49+
if (dom.dominates(store, load)) {
50+
load.replaceAllUsesWith(store.getValue());
51+
load.erase();
52+
load = nullptr;
53+
break;
54+
}
55+
}
56+
if (!load)
57+
continue;
58+
}
59+
60+
// No store dominates the load. Find the "immediate dominator" load for the
61+
// load.
62+
for (LoadOp domLoad : allLoads) {
63+
if (dom.properlyDominates(domLoad.getOperation(), load)) {
64+
idomLoad[load] = domLoad;
65+
break;
66+
}
67+
}
68+
}
69+
70+
// Try to replace load with previous loads.
71+
for (LoadOp load : allLoads) {
72+
if (!load) {
73+
// Already replaced by a store.
74+
continue;
75+
}
76+
77+
// Volatile loads are not candidates for elimination.
78+
if (load.getIsVolatile())
79+
continue;
80+
81+
// Follow the "immediate dominator" link to find the load for replacement.
82+
LoadOp target = load;
83+
while (idomLoad.contains(target))
84+
target = idomLoad[target];
85+
86+
if (load != target) {
87+
load->replaceAllUsesWith(target);
88+
load->erase();
89+
}
90+
}
91+
}
92+
93+
void processFunc(mlir::cir::FuncOp func) {
94+
SmallVector<AllocaOp> constAllocaList;
95+
func->walk([&](AllocaOp alloca) {
96+
if (alloca.getConstant())
97+
constAllocaList.push_back(alloca);
98+
});
99+
100+
DominanceInfo dom;
101+
for (AllocaOp alloca : constAllocaList)
102+
processConstAlloca(dom, alloca);
103+
}
104+
105+
struct ConstLoadEliminationPass
106+
: public ConstLoadEliminationBase<ConstLoadEliminationPass> {
107+
using ConstLoadEliminationBase::ConstLoadEliminationBase;
108+
109+
void runOnOperation() override { getOperation()->walk(processFunc); }
110+
};
111+
112+
} // namespace
113+
114+
std::unique_ptr<Pass> mlir::createConstLoadEliminationPass() {
115+
return std::make_unique<ConstLoadEliminationPass>();
116+
}
+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --input-file=%t.cir %s
3+
4+
int produce_int();
5+
6+
void local_const_int() {
7+
const int x = produce_int();
8+
}
9+
10+
// CHECK-LABEL: @_Z15local_const_intv
11+
// CHECK: %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
12+
// CHECK: }
13+
14+
void param_const_int(const int x) {}
15+
16+
// CHECK-LABEL: @_Z15param_const_inti
17+
// CHECK: %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
18+
// CHECK: }
19+
20+
struct Foo {
21+
int a;
22+
int b;
23+
};
24+
25+
Foo produce_foo();
26+
27+
void local_const_struct() {
28+
const Foo x = produce_foo();
29+
}
30+
31+
// CHECK-LABEL: @_Z18local_const_structv
32+
// CHECK: %{{.+}} = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["x", init, const]
33+
// CHECK: }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir
2+
// FileCheck --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -fclangir-mem2reg -emit-cir %s -o %t.cir
4+
// FileCheck --input-file=%t.cir %s --check-prefix=MEM2REG
5+
6+
int produce_int();
7+
void blackbox(const int &);
8+
void blackbox(const volatile int &);
9+
10+
int load_local_const_int() {
11+
const int x = produce_int();
12+
int a = x;
13+
blackbox(x);
14+
int b = x;
15+
return a + b;
16+
}
17+
18+
// CHECK-LABEL: @_Z20load_local_const_intv
19+
// CHECK: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64}
20+
// CHECK-NEXT: %[[#a_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
21+
// CHECK-NEXT: %[[#b_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
22+
// CHECK-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
23+
// CHECK-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
24+
// CHECK-NEXT: cir.store %[[#init]], %[[#a_slot]] : !s32i, !cir.ptr<!s32i>
25+
// CHECK-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
26+
// CHECK-NEXT: cir.store %[[#init]], %[[#b_slot]] : !s32i, !cir.ptr<!s32i>
27+
// CHECK: }
28+
29+
// MEM2REG-LABEL: @_Z20load_local_const_intv
30+
// MEM2REG-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64}
31+
// MEM2REG-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
32+
// MEM2REG-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
33+
// MEM2REG-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
34+
// MEM2REG-NEXT: %{{.+}} = cir.binop(add, %[[#init]], %[[#init]]) nsw : !s32i
35+
// MEM2REG: }
36+
37+
int load_volatile_local_const_int() {
38+
const volatile int x = produce_int();
39+
int a = x;
40+
blackbox(x);
41+
int b = x;
42+
return a + b;
43+
}
44+
45+
// CHECKLABEL: @_Z29load_volatile_local_const_intv
46+
// CHECK: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64}
47+
// CHECK-NEXT: %[[#a_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
48+
// CHECK-NEXT: %[[#b_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
49+
// CHECK-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
50+
// CHECK-NEXT: cir.store volatile %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
51+
// CHECK-NEXT: %[[#reload_1:]] = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
52+
// CHECK-NEXT: cir.store %[[#reload_1]], %[[#a_slot]] : !s32i, !cir.ptr<!s32i>
53+
// CHECK-NEXT: cir.call @_Z8blackboxRVKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
54+
// CHECK-NEXT: %[[#reload_2:]] = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
55+
// CHECK-NEXT: cir.store %[[#reload_2]], %[[#b_slot]] : !s32i, !cir.ptr<!s32i>
56+
// CHECK: }
57+
58+
// MEM2REG-LABEL: @_Z29load_volatile_local_const_intv
59+
// MEM2REG-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64}
60+
// MEM2REG-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
61+
// MEM2REG-NEXT: cir.store volatile %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
62+
// MEM2REG-NEXT: %{{.+}} = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
63+
// MEM2REG-NEXT: cir.call @_Z8blackboxRVKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
64+
// MEM2REG-NEXT: %{{.+}} = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
65+
// MEM2REG: }

0 commit comments

Comments
 (0)