Skip to content

Commit 5ad978d

Browse files
committed
Auto merge of rust-lang#122582 - scottmcm:swap-intrinsic-v2, r=<try>
Let codegen decide when to `mem::swap` with immediates Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea. Thus this PR introduces a new `typed_swap` intrinsic with a fallback body, and replaces that fallback implementation when swapping immediates or scalar pairs. r? oli-obk Replaces rust-lang#111744, and means we'll never need more libs PRs like rust-lang#111803 or rust-lang#107140
2 parents 35dfc67 + 6d2cb39 commit 5ad978d

File tree

16 files changed

+349
-33
lines changed

16 files changed

+349
-33
lines changed

compiler/rustc_codegen_ssa/src/mir/intrinsic.rs

+24
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::traits::*;
1010
use crate::MemFlags;
1111

1212
use rustc_middle::ty::{self, Ty, TyCtxt};
13+
use rustc_session::config::OptLevel;
1314
use rustc_span::{sym, Span};
1415
use rustc_target::abi::{
1516
call::{FnAbi, PassMode},
@@ -76,6 +77,29 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
7677
let name = bx.tcx().item_name(def_id);
7778
let name_str = name.as_str();
7879

80+
// If we're swapping something that's *not* an `OperandValue::Ref`,
81+
// then we can do it directly and avoid the alloca.
82+
// Otherwise, we'll let the fallback MIR body take care of it.
83+
if let sym::typed_swap = name {
84+
let pointee_ty = fn_args.type_at(0);
85+
let pointee_layout = bx.layout_of(pointee_ty);
86+
if !bx.is_backend_ref(pointee_layout)
87+
// But if we're not going to optimize, trying to use the fallback
88+
// body just makes things worse, so don't bother.
89+
|| bx.sess().opts.optimize == OptLevel::No
90+
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
91+
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
92+
// block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
93+
// into the (unoptimized) direct swapping implementation, so we disable it.
94+
|| bx.sess().target.arch == "spirv"
95+
{
96+
let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
97+
let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
98+
bx.typed_place_swap(x_place, y_place);
99+
return Ok(());
100+
}
101+
}
102+
79103
let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
80104
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
81105

compiler/rustc_codegen_ssa/src/traits/builder.rs

+52-2
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,24 @@
11
use super::abi::AbiBuilderMethods;
22
use super::asm::AsmBuilderMethods;
3+
use super::consts::ConstMethods;
34
use super::coverageinfo::CoverageInfoBuilderMethods;
45
use super::debuginfo::DebugInfoBuilderMethods;
56
use super::intrinsic::IntrinsicCallMethods;
67
use super::misc::MiscMethods;
7-
use super::type_::{ArgAbiMethods, BaseTypeMethods};
8+
use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
89
use super::{HasCodegen, StaticBuilderMethods};
910

1011
use crate::common::{
1112
AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
1213
};
13-
use crate::mir::operand::OperandRef;
14+
use crate::mir::operand::{OperandRef, OperandValue};
1415
use crate::mir::place::PlaceRef;
1516
use crate::MemFlags;
1617

1718
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
1819
use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
1920
use rustc_middle::ty::Ty;
21+
use rustc_session::config::OptLevel;
2022
use rustc_span::Span;
2123
use rustc_target::abi::call::FnAbi;
2224
use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
@@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
267269
flags: MemFlags,
268270
);
269271

272+
/// *Typed* copy for non-overlapping places.
273+
///
274+
/// Has a default implementation in terms of `memcpy`, but specific backends
275+
/// can override to do something smarter if possible.
276+
///
277+
/// (For example, typed load-stores with alias metadata.)
278+
fn typed_place_copy(
279+
&mut self,
280+
dst: PlaceRef<'tcx, Self::Value>,
281+
src: PlaceRef<'tcx, Self::Value>,
282+
) {
283+
debug_assert!(src.llextra.is_none());
284+
debug_assert!(dst.llextra.is_none());
285+
debug_assert_eq!(dst.layout.size, src.layout.size);
286+
if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
287+
// If we're not optimizing, the aliasing information from `memcpy`
288+
// isn't useful, so just load-store the value for smaller code.
289+
let temp = self.load_operand(src);
290+
temp.val.store(self, dst);
291+
} else if !dst.layout.is_zst() {
292+
let bytes = self.const_usize(dst.layout.size.bytes());
293+
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
294+
}
295+
}
296+
297+
/// *Typed* swap for non-overlapping places.
298+
///
299+
/// Avoids `alloca`s for Immediates and ScalarPairs.
300+
///
301+
/// FIXME: Maybe do something smarter for Ref types too?
302+
/// For now, the `typed_swap` intrinsic just doesn't call this for those
303+
/// cases (in non-debug), preferring the fallback body instead.
304+
fn typed_place_swap(
305+
&mut self,
306+
left: PlaceRef<'tcx, Self::Value>,
307+
right: PlaceRef<'tcx, Self::Value>,
308+
) {
309+
let mut temp = self.load_operand(left);
310+
if let OperandValue::Ref(..) = temp.val {
311+
// The SSA value isn't stand-alone, so we need to copy it elsewhere
312+
let alloca = PlaceRef::alloca(self, left.layout);
313+
self.typed_place_copy(alloca, left);
314+
temp = self.load_operand(alloca);
315+
}
316+
self.typed_place_copy(left, right);
317+
temp.val.store(self, right);
318+
}
319+
270320
fn select(
271321
&mut self,
272322
cond: Self::Value,

compiler/rustc_codegen_ssa/src/traits/type_.rs

+14
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
120120
immediate: bool,
121121
) -> Self::Type;
122122

123+
/// A type that produces an [`OperandValue::Ref`] when loaded.
124+
///
125+
/// AKA one that's not a ZST, not `is_backend_immediate`, and
126+
/// not `is_backend_scalar_pair`. For such a type, a
127+
/// [`load_operand`] doesn't actually `load` anything.
128+
///
129+
/// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
130+
/// [`load_operand`]: super::BuilderMethods::load_operand
131+
fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
132+
!(layout.is_zst()
133+
|| self.is_backend_immediate(layout)
134+
|| self.is_backend_scalar_pair(layout))
135+
}
136+
123137
/// A type that can be used in a [`super::BuilderMethods::load`] +
124138
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
125139
/// such as a MIR `*_0 = *_1`.

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ use rustc_span::symbol::{sym, Symbol};
2121
use rustc_target::abi::Size;
2222

2323
use super::{
24-
util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, MPlaceTy, Machine, OpTy,
25-
Pointer,
24+
memory::MemoryKind, util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx,
25+
MPlaceTy, Machine, OpTy, Pointer,
2626
};
2727

2828
use crate::fluent_generated as fluent;
@@ -415,6 +415,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
415415
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
416416
self.write_scalar(result, dest)?;
417417
}
418+
sym::typed_swap => {
419+
self.typed_swap_intrinsic(&args[0], &args[1])?;
420+
}
418421

419422
sym::vtable_size => {
420423
let ptr = self.read_pointer(&args[0])?;
@@ -608,6 +611,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
608611
self.mem_copy(src, dst, size, nonoverlapping)
609612
}
610613

614+
/// Does a *typed* swap of `*left` and `*right`.
615+
fn typed_swap_intrinsic(
616+
&mut self,
617+
left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
618+
right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
619+
) -> InterpResult<'tcx> {
620+
let left = self.deref_pointer(left)?;
621+
let right = self.deref_pointer(right)?;
622+
debug_assert_eq!(left.layout, right.layout);
623+
let kind = MemoryKind::Stack;
624+
let temp = self.allocate(left.layout, kind)?;
625+
self.copy_op(&left, &temp)?;
626+
self.copy_op(&right, &left)?;
627+
self.copy_op(&temp, &right)?;
628+
self.deallocate_ptr(temp.ptr(), None, kind)?;
629+
Ok(())
630+
}
631+
611632
pub(crate) fn write_bytes_intrinsic(
612633
&mut self,
613634
dst: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+2
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,8 @@ pub fn check_intrinsic_type(
500500
(1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
501501
}
502502

503+
sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),
504+
503505
sym::discriminant_value => {
504506
let assoc_items = tcx.associated_item_def_ids(
505507
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,7 @@ symbols! {
18351835
type_macros,
18361836
type_name,
18371837
type_privacy_lints,
1838+
typed_swap,
18381839
u128,
18391840
u128_legacy_const_max,
18401841
u128_legacy_const_min,

library/core/src/intrinsics.rs

+22
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
use crate::marker::DiscriminantKind;
6767
use crate::marker::Tuple;
6868
use crate::mem::align_of;
69+
use crate::ptr;
6970

7071
pub mod mir;
7172
pub mod simd;
@@ -2628,6 +2629,27 @@ pub const fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
26282629
false
26292630
}
26302631

2632+
/// Non-overlapping *typed* swap of a single value.
2633+
///
2634+
/// The codegen backends will replace this with a better implementation when
2635+
/// `T` is a simple type that can be loaded and stored as an immediate.
2636+
///
2637+
/// The stabilized form of this intrinsic is [`crate::mem::swap`].
2638+
///
2639+
/// # Safety
2640+
///
2641+
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
2642+
#[rustc_nounwind]
2643+
#[inline]
2644+
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
2645+
// This has fallback `const fn` MIR, so shouldn't need stability, see #122652
2646+
#[rustc_const_unstable(feature = "const_typed_swap", issue = "none")]
2647+
pub const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
2648+
// SAFETY: The caller provided single non-overlapping items behind
2649+
// pointers, so swapping them with `count: 1` is fine.
2650+
unsafe { ptr::swap_nonoverlapping(x, y, 1) };
2651+
}
2652+
26312653
/// Returns whether we should check for library UB. This evaluate to the value of `cfg!(debug_assertions)`
26322654
/// during monomorphization.
26332655
///

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@
169169
#![feature(const_try)]
170170
#![feature(const_type_id)]
171171
#![feature(const_type_name)]
172+
#![feature(const_typed_swap)]
172173
#![feature(const_unicode_case_lookup)]
173174
#![feature(const_unsafecell_get_mut)]
174175
#![feature(const_waker)]

library/core/src/mem/mod.rs

+3-25
Original file line numberDiff line numberDiff line change
@@ -726,31 +726,9 @@ pub unsafe fn uninitialized<T>() -> T {
726726
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
727727
#[rustc_diagnostic_item = "mem_swap"]
728728
pub const fn swap<T>(x: &mut T, y: &mut T) {
729-
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
730-
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
731-
// block optimization in `swap_slice` is hard to rewrite back
732-
// into the (unoptimized) direct swapping implementation, so we disable it.
733-
#[cfg(not(any(target_arch = "spirv")))]
734-
{
735-
// For types that are larger multiples of their alignment, the simple way
736-
// tends to copy the whole thing to stack rather than doing it one part
737-
// at a time, so instead treat them as one-element slices and piggy-back
738-
// the slice optimizations that will split up the swaps.
739-
if const { size_of::<T>() / align_of::<T>() > 2 } {
740-
// SAFETY: exclusive references always point to one non-overlapping
741-
// element and are non-null and properly aligned.
742-
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
743-
}
744-
}
745-
746-
// If a scalar consists of just a small number of alignment units, let
747-
// the codegen just swap those pieces directly, as it's likely just a
748-
// few instructions and anything else is probably overcomplicated.
749-
//
750-
// Most importantly, this covers primitives and simd types that tend to
751-
// have size=align where doing anything else can be a pessimization.
752-
// (This will also be used for ZSTs, though any solution works for them.)
753-
swap_simple(x, y);
729+
// SAFETY: `&mut` guarantees these are typed readable and writable
730+
// as well as non-overlapping.
731+
unsafe { intrinsics::typed_swap(x, y) }
754732
}
755733

756734
/// Same as [`swap`] semantically, but always uses the simple implementation.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#![feature(core_intrinsics)]
2+
#![feature(rustc_attrs)]
3+
4+
use std::intrinsics::typed_swap;
5+
use std::ptr::addr_of_mut;
6+
7+
fn invalid_array() {
8+
let mut a = [1_u8; 100];
9+
let mut b = [2_u8; 100];
10+
unsafe {
11+
let a = addr_of_mut!(a).cast::<[bool; 100]>();
12+
let b = addr_of_mut!(b).cast::<[bool; 100]>();
13+
typed_swap(a, b); //~ERROR: constructing invalid value
14+
}
15+
}
16+
17+
fn main() {
18+
invalid_array();
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: Undefined Behavior: constructing invalid value at [0]: encountered 0x02, but expected a boolean
2+
--> $DIR/typed-swap-invalid-array.rs:LL:CC
3+
|
4+
LL | typed_swap(a, b);
5+
| ^^^^^^^^^^^^^^^^ constructing invalid value at [0]: encountered 0x02, but expected a boolean
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `invalid_array` at $DIR/typed-swap-invalid-array.rs:LL:CC
11+
note: inside `main`
12+
--> $DIR/typed-swap-invalid-array.rs:LL:CC
13+
|
14+
LL | invalid_array();
15+
| ^^^^^^^^^^^^^^^
16+
17+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
18+
19+
error: aborting due to 1 previous error
20+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#![feature(core_intrinsics)]
2+
#![feature(rustc_attrs)]
3+
4+
use std::intrinsics::typed_swap;
5+
use std::ptr::addr_of_mut;
6+
7+
fn invalid_scalar() {
8+
let mut a = 1_u8;
9+
let mut b = 2_u8;
10+
unsafe {
11+
let a = addr_of_mut!(a).cast::<bool>();
12+
let b = addr_of_mut!(b).cast::<bool>();
13+
typed_swap(a, b); //~ERROR: constructing invalid value
14+
}
15+
}
16+
17+
fn main() {
18+
invalid_scalar();
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: Undefined Behavior: constructing invalid value: encountered 0x02, but expected a boolean
2+
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
3+
|
4+
LL | typed_swap(a, b);
5+
| ^^^^^^^^^^^^^^^^ constructing invalid value: encountered 0x02, but expected a boolean
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `invalid_scalar` at $DIR/typed-swap-invalid-scalar.rs:LL:CC
11+
note: inside `main`
12+
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
13+
|
14+
LL | invalid_scalar();
15+
| ^^^^^^^^^^^^^^^^
16+
17+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
18+
19+
error: aborting due to 1 previous error
20+

0 commit comments

Comments
 (0)