rustc_codegen_ssa: Use llvm.invariant intrinsics on arguments that are

pcwalton · pcwalton · commit 5327e78e86a2 · 2022-10-14T17:17:52.000-07:00
immutable references; off by default.

Optimization failures around reloads and memcpy optimizations are frequently
traceable to LLVM's failure to prove that memory can't be mutated by a call or
store. This problem is especially acute in Rust, where large values tend to be
memcpy'd more often than in C++. Thankfully, Rust has stronger guarantees on
mutability available than C++ does, via the strong immutability of `&amp;`
references. This should allow LLVM to prove that memory can't be modified by
stores and calls in more cases.

We're already using LLVM's `readonly` parameter attribute on such calls.
However, the semantics of `readonly` are akin to `const` in C++, in that they
only promise to LLVM that the function won't mutate the parameter *through that
pointer*, not that the pointed-to memory is immutable for the entire duration
of the function. These weak semantics limit the applicability of `readonly` to
LLVM's alias analysis. Instead of `readonly`, the correct way to express strong
immutability guarantees on memory is through the `llvm.invariant.start` and
`llvm.invariant.end` intrinsics. These enable a frontend like `rustc` to
describe immutability of memory regions in an expressive, flow-sensitive
manner.

Unfortunately, LLVM doesn't use the `llvm.invariant.start` and
`llvm.invariant.end` intrinsics for much at the moment. It's only used in one
optimization in loop-invariant code motion at this time. Follow-up work will
need to be done in LLVM to integrate these intrinsics into alias analysis.
Possibly there will need to be some sort of "MemoryInvarianceAnalysis" that
uses graph reachability algorithms to analyze the extent of the guarantees
provided by these intrinsics to the control flow graph.

Regardless, this front-end work needs to happen as a prerequisite for any LLVM
work, so that the improvements to LLVM can be measured and tested. So this
commit makes `rustc` use `llvm.invariant` in a minimal way: on immutable
references to "freeze" types (i.e. not transitively containing UnsafeCell)
passed directly as parameters to functions. This is off by default, gated
behind the non-default `-Z emit-invariant-markers=yes` flag.

Obviously, a lot more can be done to use `llvm.invariant` more liberally in the
future, but this can be added over time, especially once more LLVM optimization
passes use that infrastructure. This is simply the bare minimum for now. Once
LLVM uses those intrinsics for more optimizations, the effects of more
`llvm.invariant` use can be measured more precisely.
diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs
@@ -1235,6 +1235,16 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // TODO(antoyo)
     }
 
+    fn invariant_start(&mut self, _value_ptr: RValue<'gcc>, _size: Size) -> RValue<'gcc> {
+        // TODO
+        unimplemented!()
+    }
+
+    fn invariant_end(&mut self, _marker_ptr: RValue<'gcc>, _value_ptr: RValue<'gcc>, _size: Size) {
+        // TODO
+        unimplemented!()
+    }
+
     fn call(
         &mut self,
         _typ: Type<'gcc>,
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1113,6 +1113,21 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         self.call_lifetime_intrinsic("llvm.lifetime.end.p0i8", ptr, size);
     }
 
+    fn invariant_start(&mut self, value_ptr: &'ll Value, size: Size) -> &'ll Value {
+        let size = size.bytes();
+        let value_ptr = self.pointercast(value_ptr, self.cx.type_i8p());
+        self.call_intrinsic("llvm.invariant.start.p0i8", &[self.cx.const_u64(size), value_ptr])
+    }
+
+    fn invariant_end(&mut self, marker_ptr: &'ll Value, value_ptr: &'ll Value, size: Size) {
+        let size = size.bytes();
+        let value_ptr = self.pointercast(value_ptr, self.cx.type_i8p());
+        self.call_intrinsic(
+            "llvm.invariant.end.p0i8",
+            &[marker_ptr, self.cx.const_u64(size), value_ptr],
+        );
+    }
+
     fn instrprof_increment(
         &mut self,
         fn_name: &'ll Value,
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
@@ -629,6 +629,7 @@ impl<'ll> CodegenCx<'ll, '_> {
         }
 
         let i8p = self.type_i8p();
+        let unitp = self.type_ptr_to(self.type_struct(&[], false));
         let void = self.type_void();
         let i1 = self.type_i1();
         let t_i8 = self.type_i8();
@@ -840,6 +841,9 @@ impl<'ll> CodegenCx<'ll, '_> {
         ifn!("llvm.lifetime.start.p0i8", fn(t_i64, i8p) -> void);
         ifn!("llvm.lifetime.end.p0i8", fn(t_i64, i8p) -> void);
 
+        ifn!("llvm.invariant.start.p0i8", fn(t_i64, i8p) -> unitp);
+        ifn!("llvm.invariant.end.p0i8", fn(unitp, t_i64, i8p) -> void);
+
         ifn!("llvm.expect.i1", fn(i1, i1) -> i1);
         ifn!("llvm.eh.typeid.for", fn(i8p) -> t_i32);
         ifn!("llvm.localescape", fn(...) -> void);
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -249,6 +249,8 @@ impl<'a, 'tcx> TerminatorCodegenHelper<'tcx> {
 impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     /// Generates code for a `Resume` terminator.
     fn codegen_resume_terminator(&mut self, helper: TerminatorCodegenHelper<'tcx>, mut bx: Bx) {
+        self.emit_invariant_end_markers(&mut bx);
+
         if let Some(funclet) = helper.funclet(self) {
             bx.cleanup_ret(funclet, None);
         } else {
@@ -306,6 +308,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     }
 
     fn codegen_return_terminator(&mut self, mut bx: Bx) {
+        self.emit_invariant_end_markers(&mut bx);
+
         // Call `va_end` if this is the definition of a C-variadic function.
         if self.fn_abi.c_variadic {
             // The `VaList` "spoofed" argument is just after all the real arguments.
@@ -1723,6 +1727,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     }
 }
 
+impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
+    fn emit_invariant_end_markers(&mut self, bx: &mut Bx) {
+        for invariant_value in self.invariant_values.iter().rev() {
+            bx.invariant_end(
+                invariant_value.marker_ptr,
+                invariant_value.value_ptr,
+                invariant_value.size,
+            )
+        }
+    }
+}
+
 enum ReturnDest<'tcx, V> {
     // Do nothing; the return value is indirect or ignored.
     Nothing,
diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -1,9 +1,11 @@
 use crate::traits::*;
-use rustc_middle::mir;
 use rustc_middle::mir::interpret::ErrorHandled;
-use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, TyAndLayout};
+use rustc_middle::mir::{self, Mutability};
+use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TypeFoldable, TypeVisitable};
-use rustc_target::abi::call::{FnAbi, PassMode};
+use rustc_span::DUMMY_SP;
+use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode};
+use rustc_target::abi::Size;
 
 use std::iter;
 
@@ -87,6 +89,11 @@ pub struct FunctionCx<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> {
 
     /// Caller location propagated if this function has `#[track_caller]`.
     caller_location: Option<OperandRef<'tcx, Bx::Value>>,
+
+    /// Information about pointers that are known immutable for the entire execution of the
+    /// function. We need to keep this information around so that we can call `invariant_end()`
+    /// before exiting this function.
+    invariant_values: Vec<InvariantValue<Bx::Value>>,
 }
 
 impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
@@ -103,6 +110,15 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     }
 }
 
+/// Information about pointers that are known immutable for the entire execution of the
+/// function. We need to keep this information around so that we can call `invariant_end()`
+/// before exiting this function.
+struct InvariantValue<V> {
+    marker_ptr: V,
+    value_ptr: V,
+    size: Size,
+}
+
 enum LocalRef<'tcx, V> {
     Place(PlaceRef<'tcx, V>),
     /// `UnsizedPlace(p)`: `p` itself is a thin pointer (indirect place).
@@ -178,6 +194,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
         debug_context,
         per_local_var_debug_info: None,
         caller_location: None,
+        invariant_values: vec![],
     };
 
     fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut bx);
@@ -325,6 +342,7 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
                     PassMode::Direct(_) => {
                         let llarg = bx.get_param(llarg_idx);
                         llarg_idx += 1;
+                        emit_invariant_markers_for(bx, fx, arg, llarg);
                         return local(OperandRef::from_immediate_or_packed_pair(
                             bx, llarg, arg.layout,
                         ));
@@ -398,6 +416,23 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     args
 }
 
+fn emit_invariant_markers_for<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    fx: &mut FunctionCx<'a, 'tcx, Bx>,
+    arg: &ArgAbi<'tcx, Ty<'tcx>>,
+    value_ptr: Bx::Value,
+) {
+    if bx.sess().opts.unstable_opts.emit_invariant_markers {
+        if let ty::Ref(_, subty, Mutability::Not) = arg.layout.ty.kind() {
+            if subty.is_freeze(bx.tcx().at(DUMMY_SP), ty::ParamEnv::reveal_all()) {
+                let size = bx.cx().layout_of(*subty).size;
+                let marker_ptr = bx.invariant_start(value_ptr, size);
+                fx.invariant_values.push(InvariantValue { marker_ptr, value_ptr, size });
+            }
+        }
+    }
+}
+
 mod analyze;
 mod block;
 pub mod constant;
diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs
@@ -310,6 +310,9 @@ pub trait BuilderMethods<'a, 'tcx>:
     /// Called for `StorageDead`
     fn lifetime_end(&mut self, ptr: Self::Value, size: Size);
 
+    fn invariant_start(&mut self, ptr: Self::Value, size: Size) -> Self::Value;
+    fn invariant_end(&mut self, marker_ptr: Self::Value, value_ptr: Self::Value, size: Size);
+
     fn instrprof_increment(
         &mut self,
         fn_name: Self::Value,
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
@@ -1295,6 +1295,8 @@ options! {
         an additional `.html` file showing the computed coverage spans."),
     dwarf_version: Option<u32> = (None, parse_opt_number, [TRACKED],
         "version of DWARF debug information to emit (default: 2 or 4, depending on platform)"),
+    emit_invariant_markers: bool = (false, parse_bool, [UNTRACKED],
+        "emit `llvm.invariant` markers, which may enable better optimization (default: no)"),
     emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],
         "emit a section containing stack size metadata (default: no)"),
     emit_thin_lto: bool = (true, parse_bool, [TRACKED],
diff --git a/src/test/codegen/invariant-basic.rs b/src/test/codegen/invariant-basic.rs
@@ -0,0 +1,12 @@
+// compile-flags: -C opt-level=0 -Z emit-invariant-markers=yes
+#![crate_type="lib"]
+
+#[no_mangle]
+pub fn foo(x: &i32) {
+    // CHECK-LABEL: @foo
+    // CHECK: call {{.*}} @llvm.invariant.start{{[^(]*}}(i64 4
+    // CHECK: call void @{{.*}}drop{{.*}}
+    // CHECK: call void @llvm.invariant.end
+    // CHECK: ret void
+    drop(x);
+}
diff --git a/src/test/rustdoc-ui/z-help.stdout b/src/test/rustdoc-ui/z-help.stdout
@@ -36,6 +36,7 @@
     -Z                       dump-mir-graphviz=val -- in addition to `.mir` files, create graphviz `.dot` files (and with `-Z instrument-coverage`, also create a `.dot` file for the MIR-derived coverage graph) (default: no)
     -Z                       dump-mir-spanview=val -- in addition to `.mir` files, create `.html` files to view spans for all `statement`s (including terminators), only `terminator` spans, or computed `block` spans (one span encompassing a block's terminator and all statements). If `-Z instrument-coverage` is also enabled, create an additional `.html` file showing the computed coverage spans.
     -Z                           dwarf-version=val -- version of DWARF debug information to emit (default: 2 or 4, depending on platform)
+    -Z                  emit-invariant-markers=val -- emit `llvm.invariant` markers, which may enable better optimization (default: no)
     -Z                        emit-stack-sizes=val -- emit a section containing stack size metadata (default: no)
     -Z                           emit-thin-lto=val -- emit the bc module with thin LTO info (default: yes)
     -Z               export-executable-symbols=val -- export symbols from executables, as if they were dynamic libraries