From def3fd8e9224128ffdf4cb0a7f08306f757a059a Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 13:29:45 +0100 Subject: [PATCH 1/8] Add -Zvirtual-function-elimination flag Adds the virtual-function-elimination unstable compiler flag and a check that this flag is only used in combination with -Clto. LLVM can only apply this optimization with fat LTO. --- compiler/rustc_interface/src/tests.rs | 1 + compiler/rustc_session/src/options.rs | 3 +++ compiler/rustc_session/src/session.rs | 12 ++++++------ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 3747fb5eca0cc..30a29ed6ed38f 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -797,6 +797,7 @@ fn test_debugging_options_tracking_hash() { tracked!(unleash_the_miri_inside_of_you, true); tracked!(use_ctors_section, Some(true)); tracked!(verify_llvm_ir, true); + tracked!(virtual_function_elimination, true); tracked!(wasi_exec_model, Some(WasiExecModel::Reactor)); macro_rules! tracked_no_crate_hash { diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 007fa87189fb0..0f60ffda2ee57 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1579,6 +1579,9 @@ options! { "in general, enable more debug printouts (default: no)"), verify_llvm_ir: bool = (false, parse_bool, [TRACKED], "verify LLVM IR (default: no)"), + virtual_function_elimination: bool = (false, parse_bool, [TRACKED], + "enables dead virtual function elimination optimization. \ + Requires `-Clto[=[fat,yes]]`"), wasi_exec_model: Option = (None, parse_wasi_exec_model, [TRACKED], "whether to build a wasi command or reactor"), diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index b2c23cda6aae5..18aa717f6768c 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -1431,14 +1431,14 @@ fn validate_commandline_args_with_session_available(sess: &Session) { ); } - // LLVM CFI requires LTO. - if sess.is_sanitizer_cfi_enabled() { - if sess.opts.cg.lto == config::LtoCli::Unspecified - || sess.opts.cg.lto == config::LtoCli::No - || sess.opts.cg.lto == config::LtoCli::Thin - { + // LLVM CFI and VFE both require LTO. + if sess.lto() != config::Lto::Fat { + if sess.is_sanitizer_cfi_enabled() { sess.err("`-Zsanitizer=cfi` requires `-Clto`"); } + if sess.opts.debugging_opts.virtual_function_elimination { + sess.err("`-Zvirtual-function-elimination` requires `-Clto`"); + } } if sess.opts.debugging_opts.stack_protector != StackProtector::None { From 20f597ffcd29d71d116c617def76291689549d6c Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 13:35:40 +0100 Subject: [PATCH 2/8] Add LLVM module flags required for the VFE opt To apply the optimization the `Virtual Function Elim` module flag has to be set. To apply this optimization post-link the `LTOPostLink` module flag has to be set. --- compiler/rustc_codegen_llvm/src/back/lto.rs | 14 +++++++++++++- compiler/rustc_codegen_llvm/src/context.rs | 9 +++++++++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 1 + compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp | 5 +++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index c7497bfd355e5..38402e0431379 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -586,9 +586,21 @@ pub(crate) fn run_pass_manager( // LTO-specific optimization passes that LLVM provides. // // This code is based off the code found in llvm's LTO code generator: - // tools/lto/LTOCodeGenerator.cpp + // llvm/lib/LTO/LTOCodeGenerator.cpp debug!("running the pass manager"); unsafe { + if !llvm::LLVMRustHasModuleFlag( + module.module_llvm.llmod(), + "LTOPostLink".as_ptr().cast(), + 11, + ) { + llvm::LLVMRustAddModuleFlag( + module.module_llvm.llmod(), + llvm::LLVMModFlagBehavior::Error, + "LTOPostLink\0".as_ptr().cast(), + 1, + ); + } if llvm_util::should_use_new_llvm_pass_manager( &config.new_llvm_pass_manager, &cgcx.target_arch, diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 5544f0d3f6058..27fe60161d599 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -326,6 +326,15 @@ pub unsafe fn create_module<'ll>( ) } + if sess.opts.debugging_opts.virtual_function_elimination { + llvm::LLVMRustAddModuleFlag( + llmod, + llvm::LLVMModFlagBehavior::Error, + "Virtual Function Elim\0".as_ptr().cast(), + 1, + ); + } + llmod } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 1d9a4655db637..4f7b082dac4c2 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1936,6 +1936,7 @@ extern "C" { name: *const c_char, value: u32, ); + pub fn LLVMRustHasModuleFlag(M: &Module, name: *const c_char, len: size_t) -> bool; pub fn LLVMRustMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value; diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index f90bb7f236868..a2dd0a7bcdc61 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -672,6 +672,11 @@ extern "C" void LLVMRustAddModuleFlag( unwrap(M)->addModuleFlag(MergeBehavior, Name, Value); } +extern "C" bool LLVMRustHasModuleFlag(LLVMModuleRef M, const char *Name, + size_t Len) { + return unwrap(M)->getModuleFlag(StringRef(Name, Len)) != nullptr; +} + extern "C" LLVMValueRef LLVMRustMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD) { return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD))); } From d55787a155a34753c1dd8751dd2b77804aa8f442 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 13:42:46 +0100 Subject: [PATCH 3/8] Add typeid_for_trait_ref function This function computes a Itanium-like typeid for a trait_ref. This is required for the VFE optimization in LLVM. It is used to map `llvm.type.checked.load` invocations, that is loading the function from a vtable, to the vtables this function could be from. It is important to note that `typeid`s are not unique. So multiple vtables of the same trait can share `typeid`s. --- compiler/rustc_symbol_mangling/src/lib.rs | 7 +++++++ compiler/rustc_symbol_mangling/src/v0.rs | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 46f70bb1674df..bed0e81e66e14 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -155,6 +155,13 @@ pub fn typeid_for_fnabi<'tcx>(tcx: TyCtxt<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) v0::mangle_typeid_for_fnabi(tcx, fn_abi) } +pub fn typeid_for_trait_ref<'tcx>( + tcx: TyCtxt<'tcx>, + trait_ref: ty::PolyExistentialTraitRef<'tcx>, +) -> String { + v0::mangle_typeid_for_trait_ref(tcx, trait_ref) +} + /// Computes the symbol name for the given instance. This function will call /// `compute_instantiating_crate` if it needs to factor the instantiating crate /// into the symbol name. diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs index dc1946bcdc2eb..a00b86e79acb7 100644 --- a/compiler/rustc_symbol_mangling/src/v0.rs +++ b/compiler/rustc_symbol_mangling/src/v0.rs @@ -95,6 +95,24 @@ pub(super) fn mangle_typeid_for_fnabi<'tcx>( format!("typeid{}", arg_count) } +pub(super) fn mangle_typeid_for_trait_ref<'tcx>( + tcx: TyCtxt<'tcx>, + trait_ref: ty::PolyExistentialTraitRef<'tcx>, +) -> String { + // FIXME(flip1995): See comment in `mangle_typeid_for_fnabi`. + let mut cx = &mut SymbolMangler { + tcx, + start_offset: 0, + paths: FxHashMap::default(), + types: FxHashMap::default(), + consts: FxHashMap::default(), + binders: vec![], + out: String::new(), + }; + cx = cx.print_def_path(trait_ref.def_id(), &[]).unwrap(); + std::mem::take(&mut cx.out) +} + struct BinderLevel { /// The range of distances from the root of what's /// being printed, to the lifetimes in a binder. From e1c1d0f8c2fde787e346420b47c9205f52dc04d6 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 13:58:25 +0100 Subject: [PATCH 4/8] Add llvm.type.checked.load intrinsic Add the intrinsic declare {i8*, i1} @llvm.type.checked.load(i8* %ptr, i32 %offset, metadata %type) This is used in the VFE optimization when lowering loading functions from vtables to LLVM IR. The `metadata` is used to map the function to all vtables this function could belong to. This ensures that functions from vtables that might be used somewhere won't get removed. --- .../rustc_codegen_gcc/src/intrinsic/mod.rs | 10 ++++ compiler/rustc_codegen_llvm/src/context.rs | 8 +-- compiler/rustc_codegen_llvm/src/intrinsic.rs | 10 ++++ compiler/rustc_codegen_ssa/src/meth.rs | 52 +++++++++++++++---- compiler/rustc_codegen_ssa/src/mir/block.rs | 18 +++++-- .../rustc_codegen_ssa/src/traits/intrinsic.rs | 8 +++ 6 files changed, 88 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index c6681de68e267..5fbdedac0c45c 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -356,6 +356,16 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { self.context.new_rvalue_from_int(self.int_type, 0) } + fn type_checked_load( + &mut self, + _llvtable: Self::Value, + _vtable_byte_offset: u64, + _typeid: Self::Value, + ) -> Self::Value { + // Unsupported. + self.context.new_rvalue_from_int(self.int_type, 0) + } + fn va_start(&mut self, _va_list: RValue<'gcc>) -> RValue<'gcc> { unimplemented!(); } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 27fe60161d599..b5c31fcebe0c2 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -665,6 +665,7 @@ impl<'ll> CodegenCx<'ll, '_> { let t_isize = self.type_isize(); let t_f32 = self.type_f32(); let t_f64 = self.type_f64(); + let t_metadata = self.type_metadata(); ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32); ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32); @@ -890,11 +891,12 @@ impl<'ll> CodegenCx<'ll, '_> { ifn!("llvm.instrprof.increment", fn(i8p, t_i64, t_i32, t_i32) -> void); } - ifn!("llvm.type.test", fn(i8p, self.type_metadata()) -> i1); + ifn!("llvm.type.test", fn(i8p, t_metadata) -> i1); + ifn!("llvm.type.checked.load", fn(i8p, t_i32, t_metadata) -> mk_struct! {i8p, i1}); if self.sess().opts.debuginfo != DebugInfo::None { - ifn!("llvm.dbg.declare", fn(self.type_metadata(), self.type_metadata()) -> void); - ifn!("llvm.dbg.value", fn(self.type_metadata(), t_i64, self.type_metadata()) -> void); + ifn!("llvm.dbg.declare", fn(t_metadata, t_metadata) -> void); + ifn!("llvm.dbg.value", fn(t_metadata, t_i64, t_metadata) -> void); } None } diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 9927f5f399bcd..a18f5b9dd7f9c 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -406,6 +406,16 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> { self.call_intrinsic("llvm.type.test", &[bitcast, typeid]) } + fn type_checked_load( + &mut self, + llvtable: &'ll Value, + vtable_byte_offset: u64, + typeid: &'ll Value, + ) -> Self::Value { + let vtable_byte_offset = self.const_i32(vtable_byte_offset as i32); + self.call_intrinsic("llvm.type.checked.load", &[llvtable, vtable_byte_offset, typeid]) + } + fn va_start(&mut self, va_list: &'ll Value) -> &'ll Value { self.call_intrinsic("llvm.va_start", &[va_list]) } diff --git a/compiler/rustc_codegen_ssa/src/meth.rs b/compiler/rustc_codegen_ssa/src/meth.rs index 00f101595f27a..5203ebfad75de 100644 --- a/compiler/rustc_codegen_ssa/src/meth.rs +++ b/compiler/rustc_codegen_ssa/src/meth.rs @@ -1,6 +1,8 @@ use crate::traits::*; -use rustc_middle::ty::{self, Ty}; +use rustc_middle::ty::{self, subst::GenericArgKind, ExistentialPredicate, Ty, TyCtxt}; +use rustc_session::config::Lto; +use rustc_symbol_mangling::typeid_for_trait_ref; use rustc_target::abi::call::FnAbi; #[derive(Copy, Clone, Debug)] @@ -15,20 +17,32 @@ impl<'a, 'tcx> VirtualIndex { self, bx: &mut Bx, llvtable: Bx::Value, + ty: Ty<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, ) -> Bx::Value { // Load the data pointer from the object. - debug!("get_fn({:?}, {:?})", llvtable, self); - + debug!("get_fn({llvtable:?}, {ty:?}, {self:?})"); let llty = bx.fn_ptr_backend_type(fn_abi); let llvtable = bx.pointercast(llvtable, bx.type_ptr_to(llty)); - let ptr_align = bx.tcx().data_layout.pointer_align.abi; - let gep = bx.inbounds_gep(llty, llvtable, &[bx.const_usize(self.0)]); - let ptr = bx.load(llty, gep, ptr_align); - bx.nonnull_metadata(ptr); - // Vtable loads are invariant. - bx.set_invariant_load(ptr); - ptr + + if bx.cx().sess().opts.debugging_opts.virtual_function_elimination + && bx.cx().sess().lto() == Lto::Fat + { + let typeid = + bx.typeid_metadata(typeid_for_trait_ref(bx.tcx(), get_trait_ref(bx.tcx(), ty))); + let vtable_byte_offset = self.0 * bx.data_layout().pointer_size.bytes(); + let type_checked_load = bx.type_checked_load(llvtable, vtable_byte_offset, typeid); + let func = bx.extract_value(type_checked_load, 0); + bx.pointercast(func, llty) + } else { + let ptr_align = bx.tcx().data_layout.pointer_align.abi; + let gep = bx.inbounds_gep(llty, llvtable, &[bx.const_usize(self.0)]); + let ptr = bx.load(llty, gep, ptr_align); + bx.nonnull_metadata(ptr); + // Vtable loads are invariant. + bx.set_invariant_load(ptr); + ptr + } } pub fn get_usize>( @@ -50,6 +64,24 @@ impl<'a, 'tcx> VirtualIndex { } } +fn get_trait_ref<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> ty::PolyExistentialTraitRef<'tcx> { + for arg in ty.peel_refs().walk() { + if let GenericArgKind::Type(ty) = arg.unpack() { + if let ty::Dynamic(trait_refs, _) = ty.kind() { + return trait_refs[0].map_bound(|trait_ref| match trait_ref { + ExistentialPredicate::Trait(tr) => tr, + ExistentialPredicate::Projection(proj) => proj.trait_ref(tcx), + ExistentialPredicate::AutoTrait(_) => { + bug!("auto traits don't have functions") + } + }); + } + } + } + + bug!("expected a `dyn Trait` ty, found {ty:?}") +} + /// Creates a dynamic vtable for the given type and vtable origin. /// This is used only for objects. /// diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index 03ef6d50d44cd..db348f2bdd507 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -401,7 +401,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { args = &args[..1]; ( meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) - .get_fn(&mut bx, vtable, &fn_abi), + .get_fn(&mut bx, vtable, ty, &fn_abi), fn_abi, ) } @@ -819,9 +819,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { // the data pointer as the first argument match op.val { Pair(data_ptr, meta) => { - llfn = Some( - meth::VirtualIndex::from_index(idx).get_fn(&mut bx, meta, &fn_abi), - ); + llfn = Some(meth::VirtualIndex::from_index(idx).get_fn( + &mut bx, + meta, + op.layout.ty, + &fn_abi, + )); llargs.push(data_ptr); continue 'make_args; } @@ -829,7 +832,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } else if let Ref(data_ptr, Some(meta), _) = op.val { // by-value dynamic dispatch - llfn = Some(meth::VirtualIndex::from_index(idx).get_fn(&mut bx, meta, &fn_abi)); + llfn = Some(meth::VirtualIndex::from_index(idx).get_fn( + &mut bx, + meta, + op.layout.ty, + &fn_abi, + )); llargs.push(data_ptr); continue; } else { diff --git a/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs b/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs index 02be6cd360c72..7755e67938c32 100644 --- a/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs +++ b/compiler/rustc_codegen_ssa/src/traits/intrinsic.rs @@ -22,6 +22,14 @@ pub trait IntrinsicCallMethods<'tcx>: BackendTypes { fn expect(&mut self, cond: Self::Value, expected: bool) -> Self::Value; /// Trait method used to test whether a given pointer is associated with a type identifier. fn type_test(&mut self, pointer: Self::Value, typeid: Self::Value) -> Self::Value; + /// Trait method used to load a function while testing if it is associated with a type + /// identifier. + fn type_checked_load( + &mut self, + llvtable: Self::Value, + vtable_byte_offset: u64, + typeid: Self::Value, + ) -> Self::Value; /// Trait method used to inject `va_start` on the "spoofed" `VaListImpl` in /// Rust defined C-variadic functions. fn va_start(&mut self, val: Self::Value) -> Self::Value; From e96e6e2c89265613e136369ccf314c408e1eb002 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 14:02:54 +0100 Subject: [PATCH 5/8] Add metadata generation for vtables when using VFE This adds the typeid and `vcall_visibility` metadata to vtables when the -Cvirtual-function-elimination flag is set. The typeid is generated in the same way as for the `llvm.type.checked.load` intrinsic from the trait_ref. The offset that is added to the typeid is always 0. This is because LLVM assumes that vtables are constructed according to the definition in the Itanium ABI. This includes an "address point" of the vtable. In C++ this is the offset in the vtable where information for RTTI is placed. Since there is no RTTI information in Rust's vtables, this "address point" is always 0. This "address point" in combination with the offset passed to the `llvm.type.checked.load` intrinsic determines the final function that should be loaded from the vtable in the `WholeProgramDevirtualization` pass in LLVM. That's why the `llvm.type.checked.load` intrinsics are generated with the typeid of the trait, rather than with that of the function that is called. This matches what `clang` does for C++. The vcall_visibility metadata depends on three factors: 1. LTO level: Currently this is always fat LTO, because LLVM only supports this optimization with fat LTO. 2. Visibility of the trait: If the trait is publicly visible, VFE can only act on its vtables after linking. 3. Number of CGUs: if there is more than one CGU, also vtables with restricted visibility could be seen outside of the CGU, so VFE can only act on them after linking. To reflect this, there are three visibility levels: Public, LinkageUnit, and TranslationUnit. --- Cargo.lock | 1 + compiler/rustc_codegen_llvm/Cargo.toml | 1 + .../src/debuginfo/metadata.rs | 99 +++++++++++++++++-- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 7 ++ .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 5 + 5 files changed, 106 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5716ee2fc1499..f586e9facc62a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3695,6 +3695,7 @@ dependencies = [ "rustc_serialize", "rustc_session", "rustc_span", + "rustc_symbol_mangling", "rustc_target", "smallvec", "tracing", diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 67183ff5887e9..9bded25c09e07 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -19,6 +19,7 @@ rustc-demangle = "0.1.21" rustc_arena = { path = "../rustc_arena" } rustc_attr = { path = "../rustc_attr" } rustc_codegen_ssa = { path = "../rustc_codegen_ssa" } +rustc_symbol_mangling = { path = "../rustc_symbol_mangling" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_fs_util = { path = "../rustc_fs_util" } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index f5cbbc7ca9198..d5f39a4567066 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -30,20 +30,21 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_index::vec::{Idx, IndexVec}; use rustc_middle::bug; use rustc_middle::mir::{self, GeneratorLayout}; -use rustc_middle::ty::layout::LayoutOf; -use rustc_middle::ty::layout::TyAndLayout; +use rustc_middle::ty::layout::{LayoutOf, TyAndLayout}; use rustc_middle::ty::subst::GenericArgKind; -use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt}; -use rustc_session::config::{self, DebugInfo}; +use rustc_middle::ty::{ + self, AdtKind, Instance, ParamEnv, PolyExistentialTraitRef, Ty, TyCtxt, Visibility, +}; +use rustc_session::config::{self, DebugInfo, Lto}; use rustc_span::symbol::Symbol; use rustc_span::FileName; -use rustc_span::FileNameDisplayPreference; -use rustc_span::{self, SourceFile}; +use rustc_span::{self, FileNameDisplayPreference, SourceFile}; +use rustc_symbol_mangling::typeid_for_trait_ref; use rustc_target::abi::{Align, Size}; use smallvec::smallvec; use tracing::debug; -use libc::{c_longlong, c_uint}; +use libc::{c_char, c_longlong, c_uint}; use std::borrow::Cow; use std::fmt::{self, Write}; use std::hash::{Hash, Hasher}; @@ -1468,6 +1469,84 @@ fn build_vtable_type_di_node<'ll, 'tcx>( .di_node } +fn vcall_visibility_metadata<'ll, 'tcx>( + cx: &CodegenCx<'ll, 'tcx>, + ty: Ty<'tcx>, + trait_ref: Option>, + vtable: &'ll Value, +) { + enum VCallVisibility { + Public = 0, + LinkageUnit = 1, + TranslationUnit = 2, + } + + let Some(trait_ref) = trait_ref else { return }; + + let trait_ref_self = trait_ref.with_self_ty(cx.tcx, ty); + let trait_ref_self = cx.tcx.erase_regions(trait_ref_self); + let trait_def_id = trait_ref_self.def_id(); + let trait_vis = cx.tcx.visibility(trait_def_id); + + let cgus = cx.sess().codegen_units(); + let single_cgu = cgus == 1; + + let lto = cx.sess().lto(); + + // Since LLVM requires full LTO for the virtual function elimination optimization to apply, + // only the `Lto::Fat` cases are relevant currently. + let vcall_visibility = match (lto, trait_vis, single_cgu) { + // If there is not LTO and the visibility in public, we have to assume that the vtable can + // be seen from anywhere. With multiple CGUs, the vtable is quasi-public. + (Lto::No | Lto::ThinLocal, Visibility::Public, _) + | (Lto::No, Visibility::Restricted(_) | Visibility::Invisible, false) => { + VCallVisibility::Public + } + // With LTO and a quasi-public visibility, the usages of the functions of the vtable are + // all known by the `LinkageUnit`. + // FIXME: LLVM only supports this optimization for `Lto::Fat` currently. Once it also + // supports `Lto::Thin` the `VCallVisibility` may have to be adjusted for those. + (Lto::Fat | Lto::Thin, Visibility::Public, _) + | ( + Lto::ThinLocal | Lto::Thin | Lto::Fat, + Visibility::Restricted(_) | Visibility::Invisible, + false, + ) => VCallVisibility::LinkageUnit, + // If there is only one CGU, private vtables can only be seen by that CGU/translation unit + // and therefore we know of all usages of functions in the vtable. + (_, Visibility::Restricted(_) | Visibility::Invisible, true) => { + VCallVisibility::TranslationUnit + } + }; + + let trait_ref_typeid = typeid_for_trait_ref(cx.tcx, trait_ref); + + unsafe { + let typeid = llvm::LLVMMDStringInContext( + cx.llcx, + trait_ref_typeid.as_ptr() as *const c_char, + trait_ref_typeid.as_bytes().len() as c_uint, + ); + let v = [cx.const_usize(0), typeid]; + llvm::LLVMRustGlobalAddMetadata( + vtable, + llvm::MD_type as c_uint, + llvm::LLVMValueAsMetadata(llvm::LLVMMDNodeInContext( + cx.llcx, + v.as_ptr(), + v.len() as c_uint, + )), + ); + let vcall_visibility = llvm::LLVMValueAsMetadata(cx.const_u64(vcall_visibility as u64)); + let vcall_visibility_metadata = llvm::LLVMMDNodeInContext2(cx.llcx, &vcall_visibility, 1); + llvm::LLVMGlobalSetMetadata( + vtable, + llvm::MetadataType::MD_vcall_visibility as c_uint, + vcall_visibility_metadata, + ); + } +} + /// Creates debug information for the given vtable, which is for the /// given type. /// @@ -1478,6 +1557,12 @@ pub fn create_vtable_di_node<'ll, 'tcx>( poly_trait_ref: Option>, vtable: &'ll Value, ) { + // FIXME(flip1995): The virtual function elimination optimization only works with full LTO in + // LLVM at the moment. + if cx.sess().opts.debugging_opts.virtual_function_elimination && cx.sess().lto() == Lto::Fat { + vcall_visibility_metadata(cx, ty, poly_trait_ref, vtable); + } + if cx.dbg_cx.is_none() { return; } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 4f7b082dac4c2..b831423994f24 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -442,6 +442,7 @@ pub enum MetadataType { MD_nonnull = 11, MD_align = 17, MD_type = 19, + MD_vcall_visibility = 28, MD_noundef = 29, } @@ -1067,6 +1068,7 @@ extern "C" { pub fn LLVMReplaceAllUsesWith<'a>(OldVal: &'a Value, NewVal: &'a Value); pub fn LLVMSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Node: &'a Value); pub fn LLVMGlobalSetMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata); + pub fn LLVMRustGlobalAddMetadata<'a>(Val: &'a Value, KindID: c_uint, Metadata: &'a Metadata); pub fn LLVMValueAsMetadata(Node: &Value) -> &Metadata; // Operations on constants of any type @@ -1080,6 +1082,11 @@ extern "C" { Vals: *const &'a Value, Count: c_uint, ) -> &'a Value; + pub fn LLVMMDNodeInContext2<'a>( + C: &'a Context, + Vals: *const &'a Metadata, + Count: size_t, + ) -> &'a Metadata; pub fn LLVMAddNamedMetadataOperand<'a>(M: &'a Module, Name: *const c_char, Val: &'a Value); // Operations on scalar constants diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index a2dd0a7bcdc61..a52d534024206 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -681,6 +681,11 @@ extern "C" LLVMValueRef LLVMRustMetadataAsValue(LLVMContextRef C, LLVMMetadataRe return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD))); } +extern "C" void LLVMRustGlobalAddMetadata( + LLVMValueRef Global, unsigned Kind, LLVMMetadataRef MD) { + unwrap(Global)->addMetadata(Kind, *unwrap(MD)); +} + extern "C" LLVMRustDIBuilderRef LLVMRustDIBuilderCreate(LLVMModuleRef M) { return new DIBuilder(*unwrap(M)); } From 996c6b7964a5d587f253a9b469cb00e20dc1b5fe Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 21 Apr 2022 14:16:02 +0100 Subject: [PATCH 6/8] Add test for VFE optimization --- .../codegen/virtual-function-elimination.rs | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 src/test/codegen/virtual-function-elimination.rs diff --git a/src/test/codegen/virtual-function-elimination.rs b/src/test/codegen/virtual-function-elimination.rs new file mode 100644 index 0000000000000..46f96d505b882 --- /dev/null +++ b/src/test/codegen/virtual-function-elimination.rs @@ -0,0 +1,99 @@ +// compile-flags: -Zvirtual-function-elimination -Clto -O -Csymbol-mangling-version=v0 + +// CHECK: @vtable.0 = {{.*}}, !type ![[TYPE0:[0-9]+]], !vcall_visibility ![[VCALL_VIS0:[0-9]+]] +// CHECK: @vtable.1 = {{.*}}, !type ![[TYPE1:[0-9]+]], !vcall_visibility ![[VCALL_VIS0:[0-9]+]] +// CHECK: @vtable.2 = {{.*}}, !type ![[TYPE2:[0-9]+]], !vcall_visibility ![[VCALL_VIS2:[0-9]+]] + +#![crate_type = "lib"] +#![allow(incomplete_features)] +#![feature(unsized_locals)] + +use std::rc::Rc; + +trait T { + // CHECK-LABEL: ; ::used + fn used(&self) -> i32 { + 1 + } + // CHECK-LABEL: ; ::used_through_sub_trait + fn used_through_sub_trait(&self) -> i32 { + 3 + } + // CHECK-LABEL: ; ::by_rc + fn by_rc(self: Rc) -> i32 { + self.used() + self.used() + } + // CHECK-LABEL-NOT: {{.*}}::unused + fn unused(&self) -> i32 { + 2 + } + // CHECK-LABEL-NOT: {{.*}}::by_rc_unused + fn by_rc_unused(self: Rc) -> i32 { + self.by_rc() + } +} + +trait U: T { + // CHECK-LABEL: ; ::subtrait_used + fn subtrait_used(&self) -> i32 { + 4 + } + // CHECK-LABEL-NOT: {{.*}}::subtrait_unused + fn subtrait_unused(&self) -> i32 { + 5 + } +} + +pub trait V { + // CHECK-LABEL: ; ::public_function + fn public_function(&self) -> i32; +} + +#[derive(Copy, Clone)] +struct S; + +impl T for S {} + +impl U for S {} + +impl V for S { + fn public_function(&self) -> i32 { + 6 + } +} + +fn taking_t(t: &dyn T) -> i32 { + // CHECK: @llvm.type.checked.load({{.*}}, i32 24, metadata !"[[MANGLED_TYPE0:[0-9a-zA-Z_]+]]") + t.used() +} + +fn taking_rc_t(t: Rc) -> i32 { + // CHECK: @llvm.type.checked.load({{.*}}, i32 40, metadata !"[[MANGLED_TYPE0:[0-9a-zA-Z_]+]]") + t.by_rc() +} + +fn taking_u(u: &dyn U) -> i32 { + // CHECK: @llvm.type.checked.load({{.*}}, i32 64, metadata !"[[MANGLED_TYPE1:[0-9a-zA-Z_]+]]") + // CHECK: @llvm.type.checked.load({{.*}}, i32 24, metadata !"[[MANGLED_TYPE1:[0-9a-zA-Z_]+]]") + // CHECK: @llvm.type.checked.load({{.*}}, i32 32, metadata !"[[MANGLED_TYPE1:[0-9a-zA-Z_]+]]") + u.subtrait_used() + u.used() + u.used_through_sub_trait() +} + +pub fn taking_v(v: &dyn V) -> i32 { + // CHECK: @llvm.type.checked.load({{.*}}, i32 24, metadata !"NtCsfRpWlKdQPZn_28virtual_function_elimination1V") + v.public_function() +} + +pub fn main() { + let s = S; + taking_t(&s); + taking_rc_t(Rc::new(s)); + taking_u(&s); + taking_v(&s); +} + +// CHECK: ![[TYPE0]] = !{i64 0, !"[[MANGLED_TYPE0]]"} +// CHECK: ![[VCALL_VIS0]] = !{i64 2} +// CHECK: ![[TYPE1]] = !{i64 0, !"[[MANGLED_TYPE1]]"} +// CHECK: ![[TYPE2]] = !{i64 0, !"NtCsfRpWlKdQPZn_28virtual_function_elimination1V"} +// CHECK: ![[VCALL_VIS2]] = !{i64 1} From a93ea7ebc83560c2c62243a9190eff09dd3a7e62 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Mon, 2 May 2022 10:55:39 +0100 Subject: [PATCH 7/8] Add user documentation for -Zvirtual-function-elimination --- .../virtual-function-elimination.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/doc/unstable-book/src/compiler-flags/virtual-function-elimination.md diff --git a/src/doc/unstable-book/src/compiler-flags/virtual-function-elimination.md b/src/doc/unstable-book/src/compiler-flags/virtual-function-elimination.md new file mode 100644 index 0000000000000..c6516d838ddc8 --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/virtual-function-elimination.md @@ -0,0 +1,39 @@ +# `virtual-function-elimination` + +This option controls whether LLVM runs the Virtual Function Elimination (VFE) +optimization. This optimization in only available with LTO, so this flag can +only be passed if [`-Clto`][Clto] is also passed. + +VFE makes it possible to remove functions from vtables that are never +dynamically called by the rest of the code. Without this flag, LLVM makes the +really conservative assumption, that if any function in a vtable is called, no +function that is referenced by this vtable can be removed. With this flag +additional information are given to LLVM, so that it can determine which +functions are actually called and remove the unused functions. + +## Limitations + +At the time of writing this flag may remove vtable functions too eagerly. One +such example is in this code: + +```rust +trait Foo { fn foo(&self) { println!("foo") } } + +impl Foo for usize {} + +pub struct FooBox(Box); + +pub fn make_foo() -> FooBox { FooBox(Box::new(0)) } + +#[inline] +pub fn f(a: FooBox) { a.0.foo() } +``` + +In the above code the `Foo` trait is private, so an assumption is made that its +functions can only be seen/called from the current crate and can therefore get +optimized out, if unused. However, with `make_foo` you can produce a wrapped +`dyn Foo` type outside of the current crate, which can then be used in `f`. Due +to inlining of `f`, `Foo::foo` can then be called from a foreign crate. This can +lead to miscompilations. + +[Clto]: https://doc.rust-lang.org/rustc/codegen-options/index.html#lto From 195f2082002c9db456e0fde8c1d5db79929ae293 Mon Sep 17 00:00:00 2001 From: flip1995 Date: Thu, 12 May 2022 11:04:42 +0100 Subject: [PATCH 8/8] Add VFE test for 32 bit The offset in the llvm.type.checked.load intrinsic differs on 32 bit platforms --- .../virtual-function-elimination-32bit.rs | 35 +++++++++++++++++++ .../codegen/virtual-function-elimination.rs | 1 + 2 files changed, 36 insertions(+) create mode 100644 src/test/codegen/virtual-function-elimination-32bit.rs diff --git a/src/test/codegen/virtual-function-elimination-32bit.rs b/src/test/codegen/virtual-function-elimination-32bit.rs new file mode 100644 index 0000000000000..6f963363a998c --- /dev/null +++ b/src/test/codegen/virtual-function-elimination-32bit.rs @@ -0,0 +1,35 @@ +// compile-flags: -Zvirtual-function-elimination -Clto -O -Csymbol-mangling-version=v0 +// ignore-64bit + +// CHECK: @vtable.0 = {{.*}}, !type ![[TYPE0:[0-9]+]], !vcall_visibility ![[VCALL_VIS0:[0-9]+]] + +#![crate_type = "lib"] + +trait T { + // CHECK-LABEL: ; ::used + fn used(&self) -> i32 { + 1 + } + // CHECK-LABEL-NOT: {{.*}}::unused + fn unused(&self) -> i32 { + 2 + } +} + +#[derive(Copy, Clone)] +struct S; + +impl T for S {} + +fn taking_t(t: &dyn T) -> i32 { + // CHECK: @llvm.type.checked.load({{.*}}, i32 12, metadata !"[[MANGLED_TYPE0:[0-9a-zA-Z_]+]]") + t.used() +} + +pub fn main() { + let s = S; + taking_t(&s); +} + +// CHECK: ![[TYPE0]] = !{i32 0, !"[[MANGLED_TYPE0]]"} +// CHECK: ![[VCALL_VIS0]] = !{i64 2} diff --git a/src/test/codegen/virtual-function-elimination.rs b/src/test/codegen/virtual-function-elimination.rs index 46f96d505b882..4cf7e12fee215 100644 --- a/src/test/codegen/virtual-function-elimination.rs +++ b/src/test/codegen/virtual-function-elimination.rs @@ -1,4 +1,5 @@ // compile-flags: -Zvirtual-function-elimination -Clto -O -Csymbol-mangling-version=v0 +// ignore-32bit // CHECK: @vtable.0 = {{.*}}, !type ![[TYPE0:[0-9]+]], !vcall_visibility ![[VCALL_VIS0:[0-9]+]] // CHECK: @vtable.1 = {{.*}}, !type ![[TYPE1:[0-9]+]], !vcall_visibility ![[VCALL_VIS0:[0-9]+]]