Skip to content

Commit 7e160d4

Browse files
committed
Auto merge of rust-lang#126444 - tesuji:gvn-const-arrays, r=<try>
[WIP] gvn: Promote/propagate const local array Rewriting of rust-lang#125916 which used PromoteTemps pass. Fix rust-lang#73825 ### Current status - [ ] Waiting for [consensus](https://rust-lang.zulipchat.com/#narrow/stream/136281-t-opsem/topic/Could.20const.20read-only.20arrays.20be.20const.20promoted.3F). r? ghost
2 parents f9515fd + 6c6de58 commit 7e160d4

File tree

6 files changed

+298
-18
lines changed

6 files changed

+298
-18
lines changed

Diff for: compiler/rustc_mir_transform/src/gvn.rs

+82-13
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,15 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
365365

366366
#[instrument(level = "trace", skip(self), ret)]
367367
fn eval_to_const(&mut self, value: VnIndex) -> Option<OpTy<'tcx>> {
368+
use abi::HasDataLayout;
368369
use Value::*;
369-
let op = match *self.get(value) {
370+
// LLVM optimizes the load of `sizeof(size_t) * 2` as a single `mov`,
371+
// which is cheap. Bigger values make more `mov` instructions generated.
372+
// After GVN, it becomes a single load (`lea`) of an address in `.rodata`.
373+
let stack_threshold = self.tcx.data_layout().pointer_size * 2;
374+
let vvalue = self.get(value);
375+
debug!(?vvalue);
376+
let op = match *vvalue {
370377
Opaque(_) => return None,
371378
// Do not bother evaluating repeat expressions. This would uselessly consume memory.
372379
Repeat(..) => return None,
@@ -375,14 +382,26 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
375382
self.ecx.eval_mir_constant(value, DUMMY_SP, None).ok()?
376383
}
377384
Aggregate(kind, variant, ref fields) => {
385+
debug!(?kind, ?variant, ?fields);
378386
let fields = fields
379387
.iter()
380388
.map(|&f| self.evaluated[f].as_ref())
381-
.collect::<Option<Vec<_>>>()?;
389+
.collect::<Option<Vec<&OpTy<'_>>>>()?;
382390
let ty = match kind {
383391
AggregateTy::Array => {
384-
assert!(fields.len() > 0);
385-
Ty::new_array(self.tcx, fields[0].layout.ty, fields.len() as u64)
392+
let [field, ..] = fields.as_slice() else {
393+
bug!("fields.len() == 0");
394+
};
395+
let field_ty = field.layout.ty;
396+
// Ignore nested array
397+
if field_ty.is_array() {
398+
trace!(
399+
"ignoring nested array of type: [{field_ty}; {len}]",
400+
len = fields.len(),
401+
);
402+
return None;
403+
}
404+
Ty::new_array(self.tcx, field_ty, fields.len() as u64)
386405
}
387406
AggregateTy::Tuple => {
388407
Ty::new_tup_from_iter(self.tcx, fields.iter().map(|f| f.layout.ty))
@@ -406,6 +425,32 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
406425
};
407426
let ptr_imm = Immediate::new_pointer_with_meta(data, meta, &self.ecx);
408427
ImmTy::from_immediate(ptr_imm, ty).into()
428+
} else if matches!(kind, AggregateTy::Array) {
429+
if ty.layout.size() <= stack_threshold {
430+
return None;
431+
}
432+
let mut mplace = None;
433+
let alloc_id = self
434+
.ecx
435+
.intern_with_temp_alloc(ty, |ecx, dest| {
436+
// FIXME: Can we speed it up by using `ecx.write_immediate(.ScalarPair(_), dest)`?
437+
for (field_index, op) in fields.iter().copied().enumerate() {
438+
let field_dest = ecx.project_field(dest, field_index)?;
439+
ecx.copy_op(op, &field_dest)?;
440+
}
441+
442+
let dest =
443+
dest.assert_mem_place().map_provenance(|prov| prov.as_immutable());
444+
mplace.replace(dest);
445+
Ok(())
446+
})
447+
.ok()?;
448+
let GlobalAlloc::Memory(_alloc) = self.tcx.global_alloc(alloc_id) else {
449+
bug!()
450+
};
451+
let mplace = mplace.unwrap();
452+
debug!(?mplace);
453+
return Some(mplace.into());
409454
} else if matches!(ty.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
410455
let dest = self.ecx.allocate(ty, MemoryKind::Stack).ok()?;
411456
let variant_dest = if let Some(variant) = variant {
@@ -428,6 +473,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
428473
}
429474

430475
Projection(base, elem) => {
476+
debug!(?base, ?elem);
431477
let value = self.evaluated[base].as_ref()?;
432478
let elem = match elem {
433479
ProjectionElem::Deref => ProjectionElem::Deref,
@@ -449,6 +495,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
449495
self.ecx.project(value, elem).ok()?
450496
}
451497
Address { place, kind, provenance: _ } => {
498+
debug!(?place, ?kind);
452499
if !place.is_indirect_first_projection() {
453500
return None;
454501
}
@@ -708,7 +755,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
708755
place.projection = self.tcx.mk_place_elems(&projection);
709756
}
710757

711-
trace!(?place);
758+
trace!(after_place = ?place);
712759
}
713760

714761
/// Represent the *value* which would be read from `place`, and point `place` to a preexisting
@@ -1232,6 +1279,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
12321279
}
12331280
}
12341281

1282+
#[instrument(level = "trace", skip(ecx), ret)]
12351283
fn op_to_prop_const<'tcx>(
12361284
ecx: &mut InterpCx<'tcx, DummyMachine>,
12371285
op: &OpTy<'tcx>,
@@ -1247,7 +1295,8 @@ fn op_to_prop_const<'tcx>(
12471295
}
12481296

12491297
// Do not synthetize too large constants. Codegen will just memcpy them, which we'd like to avoid.
1250-
if !matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)) {
1298+
if !(op.layout.ty.is_array() || matches!(op.layout.abi, Abi::Scalar(..) | Abi::ScalarPair(..)))
1299+
{
12511300
return None;
12521301
}
12531302

@@ -1301,20 +1350,32 @@ fn op_to_prop_const<'tcx>(
13011350

13021351
impl<'tcx> VnState<'_, 'tcx> {
13031352
/// If `index` is a `Value::Constant`, return the `Constant` to be put in the MIR.
1353+
#[instrument(level = "trace", skip(self, index), ret)]
13041354
fn try_as_constant(&mut self, index: VnIndex) -> Option<ConstOperand<'tcx>> {
13051355
// This was already constant in MIR, do not change it.
1306-
if let Value::Constant { value, disambiguator: _ } = *self.get(index)
1307-
// If the constant is not deterministic, adding an additional mention of it in MIR will
1308-
// not give the same value as the former mention.
1356+
let value = self.get(index);
1357+
debug!(?index, ?value);
1358+
// If the constant is not deterministic, adding an additional mention of it in MIR will
1359+
// not give the same value as the former mention.
1360+
if let Value::Constant { value, disambiguator: _ } = value
13091361
&& value.is_deterministic()
13101362
{
1311-
return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: value });
1363+
return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: *value });
13121364
}
13131365

13141366
let op = self.evaluated[index].as_ref()?;
1315-
if op.layout.is_unsized() {
1316-
// Do not attempt to propagate unsized locals.
1317-
return None;
1367+
1368+
if let Either::Left(mplace) = op.as_mplace_or_imm()
1369+
&& let ty::Array(ty, _const) = mplace.layout.ty.kind()
1370+
{
1371+
// ignore nested arrays
1372+
if ty.is_array() {
1373+
return None;
1374+
}
1375+
// ignore promoted arrays
1376+
else if let Value::Projection(_index, ProjectionElem::Deref) = value {
1377+
return None;
1378+
}
13181379
}
13191380

13201381
let value = op_to_prop_const(&mut self.ecx, op)?;
@@ -1325,6 +1386,10 @@ impl<'tcx> VnState<'_, 'tcx> {
13251386
assert!(!value.may_have_provenance(self.tcx, op.layout.size));
13261387

13271388
let const_ = Const::Val(value, op.layout.ty);
1389+
// cache the propagated const
1390+
if let Some(new_index) = self.insert_constant(const_) {
1391+
self.values.swap_indices(index.as_usize(), new_index.as_usize());
1392+
}
13281393
Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_ })
13291394
}
13301395

@@ -1352,6 +1417,7 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
13521417
self.simplify_operand(operand, location);
13531418
}
13541419

1420+
#[instrument(level = "trace", skip(self, stmt))]
13551421
fn visit_statement(&mut self, stmt: &mut Statement<'tcx>, location: Location) {
13561422
if let StatementKind::Assign(box (ref mut lhs, ref mut rvalue)) = stmt.kind {
13571423
self.simplify_place_projection(lhs, location);
@@ -1365,8 +1431,10 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
13651431
.as_local()
13661432
.and_then(|local| self.locals[local])
13671433
.or_else(|| self.simplify_rvalue(rvalue, location));
1434+
debug!(?value);
13681435
let Some(value) = value else { return };
13691436

1437+
debug!(before_rvalue = ?rvalue);
13701438
if let Some(const_) = self.try_as_constant(value) {
13711439
*rvalue = Rvalue::Use(Operand::Constant(Box::new(const_)));
13721440
} else if let Some(local) = self.try_as_local(value, location)
@@ -1375,6 +1443,7 @@ impl<'tcx> MutVisitor<'tcx> for VnState<'_, 'tcx> {
13751443
*rvalue = Rvalue::Use(Operand::Copy(local.into()));
13761444
self.reused_locals.insert(local);
13771445
}
1446+
debug!(after_rvalue = ?rvalue);
13781447

13791448
return;
13801449
}

Diff for: tests/codegen/issue-73825-gvn-const-local-array.rs

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// issue: <https://github.com/rust-lang/rust/issues/73825>
2+
//@ compile-flags: -C opt-level=1
3+
#![crate_type = "lib"]
4+
5+
// CHECK-LABEL: @foo
6+
// CHECK-NEXT: {{.*}}:
7+
// CHECK-NEXT: and
8+
// CHECK-NEXT: getelementptr inbounds
9+
// CHECK-NEXT: load i32
10+
// CHECK-NEXT: ret i32
11+
#[no_mangle]
12+
#[rustfmt::skip]
13+
pub fn foo(x: usize) -> i32 {
14+
let base: [i32; 64] = [
15+
67, 754, 860, 559, 368, 870, 548, 972,
16+
141, 731, 351, 664, 32, 4, 996, 741,
17+
203, 292, 237, 480, 151, 940, 777, 540,
18+
143, 587, 747, 65, 152, 517, 882, 880,
19+
712, 595, 370, 901, 237, 53, 789, 785,
20+
912, 650, 896, 367, 316, 392, 62, 473,
21+
675, 691, 281, 192, 445, 970, 225, 425,
22+
628, 324, 322, 206, 912, 867, 462, 92
23+
];
24+
base[x % 64]
25+
}

Diff for: tests/mir-opt/const_array_locals.main.GVN.diff

+138
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
- // MIR for `main` before GVN
2+
+ // MIR for `main` after GVN
3+
4+
fn main() -> () {
5+
let mut _0: ();
6+
let _1: [i32; 32];
7+
let mut _4: [i32; 12];
8+
let mut _5: [i32; 12];
9+
let mut _7: &[i32; 32];
10+
let _8: [i32; 32];
11+
let _9: ();
12+
let mut _10: [u16; 32];
13+
let mut _12: [f32; 8];
14+
let _13: [[i32; 3]; 3];
15+
let mut _14: [i32; 3];
16+
let mut _15: [i32; 3];
17+
let mut _16: [i32; 3];
18+
scope 1 {
19+
debug _arr => _1;
20+
let _2: [i32; 32];
21+
scope 2 {
22+
debug _barr => _2;
23+
let _3: [[i32; 12]; 2];
24+
scope 3 {
25+
debug _foo => _3;
26+
let _6: [i32; 32];
27+
let mut _17: &[i32; 32];
28+
scope 4 {
29+
debug _darr => _6;
30+
let _11: F32x8;
31+
scope 5 {
32+
debug _f => _11;
33+
}
34+
}
35+
}
36+
}
37+
}
38+
39+
bb0: {
40+
StorageLive(_1);
41+
- _1 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32, const 251_i32, const 191_i32, const 178_i32, const 9_i32, const 4_i32, const 56_i32, const 221_i32, const 193_i32, const 164_i32, const 194_i32, const 197_i32, const 6_i32, const 243_i32, const 218_i32, const 171_i32, const 87_i32, const 247_i32, const 104_i32, const 159_i32, const 22_i32, const 157_i32, const 105_i32, const 31_i32, const 96_i32, const 173_i32, const 50_i32, const 1_i32];
42+
+ _1 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32, 251_i32, 191_i32, 178_i32, 9_i32, 4_i32, 56_i32, 221_i32, 193_i32, 164_i32, 194_i32, 197_i32, 6_i32, 243_i32, 218_i32, 171_i32, 87_i32, 247_i32, 104_i32, 159_i32, 22_i32, 157_i32, 105_i32, 31_i32, 96_i32, 173_i32, 50_i32, 1_i32];
43+
StorageLive(_2);
44+
- _2 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32, const 251_i32, const 191_i32, const 178_i32, const 9_i32, const 4_i32, const 56_i32, const 221_i32, const 193_i32, const 164_i32, const 194_i32, const 197_i32, const 6_i32, const 243_i32, const 218_i32, const 171_i32, const 87_i32, const 247_i32, const 104_i32, const 159_i32, const 22_i32, const 157_i32, const 105_i32, const 31_i32, const 96_i32, const 173_i32, const 50_i32, const 1_i32];
45+
+ _2 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32, 251_i32, 191_i32, 178_i32, 9_i32, 4_i32, 56_i32, 221_i32, 193_i32, 164_i32, 194_i32, 197_i32, 6_i32, 243_i32, 218_i32, 171_i32, 87_i32, 247_i32, 104_i32, 159_i32, 22_i32, 157_i32, 105_i32, 31_i32, 96_i32, 173_i32, 50_i32, 1_i32];
46+
StorageLive(_3);
47+
StorageLive(_4);
48+
- _4 = [const 255_i32, const 105_i32, const 15_i32, const 39_i32, const 62_i32, const 251_i32, const 191_i32, const 178_i32, const 9_i32, const 4_i32, const 56_i32, const 221_i32];
49+
+ _4 = const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32, 251_i32, 191_i32, 178_i32, 9_i32, 4_i32, 56_i32, 221_i32];
50+
StorageLive(_5);
51+
- _5 = [const 193_i32, const 164_i32, const 194_i32, const 197_i32, const 6_i32, const 243_i32, const 218_i32, const 171_i32, const 87_i32, const 247_i32, const 104_i32, const 42_i32];
52+
- _3 = [move _4, move _5];
53+
+ _5 = const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32, 243_i32, 218_i32, 171_i32, 87_i32, 247_i32, 104_i32, 42_i32];
54+
+ _3 = [const [255_i32, 105_i32, 15_i32, 39_i32, 62_i32, 251_i32, 191_i32, 178_i32, 9_i32, 4_i32, 56_i32, 221_i32], const [193_i32, 164_i32, 194_i32, 197_i32, 6_i32, 243_i32, 218_i32, 171_i32, 87_i32, 247_i32, 104_i32, 42_i32]];
55+
StorageDead(_5);
56+
StorageDead(_4);
57+
StorageLive(_6);
58+
StorageLive(_7);
59+
_17 = const main::promoted[0];
60+
_7 = &(*_17);
61+
- _6 = (*_7);
62+
+ _6 = (*_17);
63+
StorageDead(_7);
64+
StorageLive(_9);
65+
StorageLive(_10);
66+
- _10 = [const 255_u16, const 105_u16, const 15_u16, const 39_u16, const 62_u16, const 251_u16, const 191_u16, const 178_u16, const 9_u16, const 4_u16, const 56_u16, const 221_u16, const 193_u16, const 164_u16, const 194_u16, const 197_u16, const 6_u16, const 243_u16, const 218_u16, const 171_u16, const 87_u16, const 247_u16, const 104_u16, const 159_u16, const 22_u16, const 157_u16, const 105_u16, const 31_u16, const 96_u16, const 173_u16, const 50_u16, const 1_u16];
67+
- _9 = consume(move _10) -> [return: bb1, unwind continue];
68+
+ _10 = const [255_u16, 105_u16, 15_u16, 39_u16, 62_u16, 251_u16, 191_u16, 178_u16, 9_u16, 4_u16, 56_u16, 221_u16, 193_u16, 164_u16, 194_u16, 197_u16, 6_u16, 243_u16, 218_u16, 171_u16, 87_u16, 247_u16, 104_u16, 159_u16, 22_u16, 157_u16, 105_u16, 31_u16, 96_u16, 173_u16, 50_u16, 1_u16];
69+
+ _9 = consume(const [255_u16, 105_u16, 15_u16, 39_u16, 62_u16, 251_u16, 191_u16, 178_u16, 9_u16, 4_u16, 56_u16, 221_u16, 193_u16, 164_u16, 194_u16, 197_u16, 6_u16, 243_u16, 218_u16, 171_u16, 87_u16, 247_u16, 104_u16, 159_u16, 22_u16, 157_u16, 105_u16, 31_u16, 96_u16, 173_u16, 50_u16, 1_u16]) -> [return: bb1, unwind continue];
70+
}
71+
72+
bb1: {
73+
StorageDead(_10);
74+
StorageDead(_9);
75+
StorageLive(_11);
76+
StorageLive(_12);
77+
- _12 = [const 1f32, const 2f32, const 3f32, const 1f32, const 1f32, const 1f32, const 1f32, const 42f32];
78+
- _11 = F32x8(move _12);
79+
+ _12 = const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32];
80+
+ _11 = F32x8(const [1f32, 2f32, 3f32, 1f32, 1f32, 1f32, 1f32, 42f32]);
81+
StorageDead(_12);
82+
StorageLive(_13);
83+
StorageLive(_14);
84+
_14 = [const 1_i32, const 0_i32, const 0_i32];
85+
StorageLive(_15);
86+
_15 = [const 0_i32, const 1_i32, const 0_i32];
87+
StorageLive(_16);
88+
_16 = [const 0_i32, const 0_i32, const 1_i32];
89+
_13 = [move _14, move _15, move _16];
90+
StorageDead(_16);
91+
StorageDead(_15);
92+
StorageDead(_14);
93+
StorageDead(_13);
94+
_0 = const ();
95+
StorageDead(_11);
96+
StorageDead(_6);
97+
StorageDead(_3);
98+
StorageDead(_2);
99+
StorageDead(_1);
100+
return;
101+
}
102+
+ }
103+
+
104+
+ ALLOC0 (size: 32, align: 4) {
105+
+ 0x00 │ 00 00 80 3f 00 00 00 40 00 00 40 40 00 00 80 3f │ ...?...@..@@...?
106+
+ 0x10 │ 00 00 80 3f 00 00 80 3f 00 00 80 3f 00 00 28 42 │ ...?...?...?..(B
107+
+ }
108+
+
109+
+ ALLOC1 (size: 64, align: 2) {
110+
+ 0x00 │ ff 00 69 00 0f 00 27 00 3e 00 fb 00 bf 00 b2 00 │ ..i...'.>.......
111+
+ 0x10 │ 09 00 04 00 38 00 dd 00 c1 00 a4 00 c2 00 c5 00 │ ....8...........
112+
+ 0x20 │ 06 00 f3 00 da 00 ab 00 57 00 f7 00 68 00 9f 00 │ ........W...h...
113+
+ 0x30 │ 16 00 9d 00 69 00 1f 00 60 00 ad 00 32 00 01 00 │ ....i...`...2...
114+
+ }
115+
+
116+
+ ALLOC2 (size: 48, align: 4) {
117+
+ 0x00 │ c1 00 00 00 a4 00 00 00 c2 00 00 00 c5 00 00 00 │ ................
118+
+ 0x10 │ 06 00 00 00 f3 00 00 00 da 00 00 00 ab 00 00 00 │ ................
119+
+ 0x20 │ 57 00 00 00 f7 00 00 00 68 00 00 00 2a 00 00 00 │ W.......h...*...
120+
+ }
121+
+
122+
+ ALLOC3 (size: 48, align: 4) {
123+
+ 0x00 │ ff 00 00 00 69 00 00 00 0f 00 00 00 27 00 00 00 │ ....i.......'...
124+
+ 0x10 │ 3e 00 00 00 fb 00 00 00 bf 00 00 00 b2 00 00 00 │ >...............
125+
+ 0x20 │ 09 00 00 00 04 00 00 00 38 00 00 00 dd 00 00 00 │ ........8.......
126+
+ }
127+
+
128+
+ ALLOC4 (size: 128, align: 4) {
129+
+ 0x00 │ ff 00 00 00 69 00 00 00 0f 00 00 00 27 00 00 00 │ ....i.......'...
130+
+ 0x10 │ 3e 00 00 00 fb 00 00 00 bf 00 00 00 b2 00 00 00 │ >...............
131+
+ 0x20 │ 09 00 00 00 04 00 00 00 38 00 00 00 dd 00 00 00 │ ........8.......
132+
+ 0x30 │ c1 00 00 00 a4 00 00 00 c2 00 00 00 c5 00 00 00 │ ................
133+
+ 0x40 │ 06 00 00 00 f3 00 00 00 da 00 00 00 ab 00 00 00 │ ................
134+
+ 0x50 │ 57 00 00 00 f7 00 00 00 68 00 00 00 9f 00 00 00 │ W.......h.......
135+
+ 0x60 │ 16 00 00 00 9d 00 00 00 69 00 00 00 1f 00 00 00 │ ........i.......
136+
+ 0x70 │ 60 00 00 00 ad 00 00 00 32 00 00 00 01 00 00 00 │ `.......2.......
137+
}
138+

0 commit comments

Comments
 (0)