Skip to content

Commit 42043c1

Browse files
committed
De-LLVM the unchecked shifts [MCP#693]
This is just one part of the MCP, but it's the one that IMHO removes the most noise from the standard library code. Seems net simpler this way, since MIR already supported heterogeneous shifts anyway, and thus it's not more work for backends than before.
1 parent 69fa40c commit 42043c1

File tree

36 files changed

+432
-576
lines changed

36 files changed

+432
-576
lines changed

compiler/rustc_codegen_ssa/src/base.rs

+28-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::back::write::{
55
compute_per_cgu_lto_type, start_async_codegen, submit_codegened_module_to_llvm,
66
submit_post_lto_module_to_llvm, submit_pre_lto_module_to_llvm, ComputedLtoType, OngoingCodegen,
77
};
8-
use crate::common::{IntPredicate, RealPredicate, TypeKind};
8+
use crate::common::{self, IntPredicate, RealPredicate, TypeKind};
99
use crate::errors;
1010
use crate::meth;
1111
use crate::mir;
@@ -33,7 +33,7 @@ use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem};
3333
use rustc_middle::query::Providers;
3434
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, TyAndLayout};
3535
use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
36-
use rustc_session::config::{self, CrateType, EntryFnType, OutputType};
36+
use rustc_session::config::{self, CrateType, EntryFnType, OptLevel, OutputType};
3737
use rustc_session::Session;
3838
use rustc_span::symbol::sym;
3939
use rustc_span::Symbol;
@@ -300,14 +300,32 @@ pub fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
300300
}
301301
}
302302

303-
pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
303+
/// Shifts in MIR are all allowed to have mismatched LHS & RHS types.
304+
///
305+
/// This does all the appropriate conversions needed to pass it to the builder's
306+
/// shift methods, which are UB for out-of-range shifts.
307+
///
308+
/// If `is_unchecked` is false, this masks the RHS to ensure it stays in-bounds.
309+
/// For 32- and 64-bit types, this matches the semantics
310+
/// of Java. (See related discussion on #1877 and #10183.)
311+
///
312+
/// If `is_unchecked` is true, this does no masking, and adds sufficient `assume`
313+
/// calls or operation flags to preserve as much freedom to optimize as possible.
314+
pub fn build_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
304315
bx: &mut Bx,
305316
lhs: Bx::Value,
306-
rhs: Bx::Value,
317+
mut rhs: Bx::Value,
318+
is_unchecked: bool,
307319
) -> Bx::Value {
308320
// Shifts may have any size int on the rhs
309321
let mut rhs_llty = bx.cx().val_ty(rhs);
310322
let mut lhs_llty = bx.cx().val_ty(lhs);
323+
324+
let mask = common::shift_mask_val(bx, lhs_llty, rhs_llty, false);
325+
if !is_unchecked {
326+
rhs = bx.and(rhs, mask);
327+
}
328+
311329
if bx.cx().type_kind(rhs_llty) == TypeKind::Vector {
312330
rhs_llty = bx.cx().element_type(rhs_llty)
313331
}
@@ -317,6 +335,12 @@ pub fn cast_shift_expr_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
317335
let rhs_sz = bx.cx().int_width(rhs_llty);
318336
let lhs_sz = bx.cx().int_width(lhs_llty);
319337
if lhs_sz < rhs_sz {
338+
if is_unchecked && bx.sess().opts.optimize != OptLevel::No {
339+
// FIXME: Use `trunc nuw` once that's available
340+
let inrange = bx.icmp(IntPredicate::IntULE, rhs, mask);
341+
bx.assume(inrange);
342+
}
343+
320344
bx.trunc(rhs, lhs_llty)
321345
} else if lhs_sz > rhs_sz {
322346
// We zero-extend even if the RHS is signed. So e.g. `(x: i32) << -1i8` will zero-extend the

compiler/rustc_codegen_ssa/src/common.rs

+1-40
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
use rustc_hir::LangItem;
44
use rustc_middle::mir;
55
use rustc_middle::ty::Instance;
6-
use rustc_middle::ty::{self, layout::TyAndLayout, Ty, TyCtxt};
6+
use rustc_middle::ty::{self, layout::TyAndLayout, TyCtxt};
77
use rustc_span::Span;
88

9-
use crate::base;
109
use crate::traits::*;
1110

1211
#[derive(Copy, Clone)]
@@ -128,44 +127,6 @@ pub fn build_langcall<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
128127
(bx.fn_abi_of_instance(instance, ty::List::empty()), bx.get_fn_addr(instance), instance)
129128
}
130129

131-
// To avoid UB from LLVM, these two functions mask RHS with an
132-
// appropriate mask unconditionally (i.e., the fallback behavior for
133-
// all shifts). For 32- and 64-bit types, this matches the semantics
134-
// of Java. (See related discussion on #1877 and #10183.)
135-
136-
pub fn build_masked_lshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
137-
bx: &mut Bx,
138-
lhs: Bx::Value,
139-
rhs: Bx::Value,
140-
) -> Bx::Value {
141-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
142-
// #1877, #10183: Ensure that input is always valid
143-
let rhs = shift_mask_rhs(bx, rhs);
144-
bx.shl(lhs, rhs)
145-
}
146-
147-
pub fn build_masked_rshift<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
148-
bx: &mut Bx,
149-
lhs_t: Ty<'tcx>,
150-
lhs: Bx::Value,
151-
rhs: Bx::Value,
152-
) -> Bx::Value {
153-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
154-
// #1877, #10183: Ensure that input is always valid
155-
let rhs = shift_mask_rhs(bx, rhs);
156-
let is_signed = lhs_t.is_signed();
157-
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
158-
}
159-
160-
fn shift_mask_rhs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
161-
bx: &mut Bx,
162-
rhs: Bx::Value,
163-
) -> Bx::Value {
164-
let rhs_llty = bx.val_ty(rhs);
165-
let shift_val = shift_mask_val(bx, rhs_llty, rhs_llty, false);
166-
bx.and(rhs, shift_val)
167-
}
168-
169130
pub fn shift_mask_val<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
170131
bx: &mut Bx,
171132
llty: Bx::Type,

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

+5-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use super::place::PlaceRef;
33
use super::{FunctionCx, LocalRef};
44

55
use crate::base;
6-
use crate::common::{self, IntPredicate};
6+
use crate::common::IntPredicate;
77
use crate::traits::*;
88
use crate::MemFlags;
99

@@ -860,14 +860,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
860860
bx.inbounds_gep(llty, lhs, &[rhs])
861861
}
862862
}
863-
mir::BinOp::Shl => common::build_masked_lshift(bx, lhs, rhs),
864-
mir::BinOp::ShlUnchecked => {
865-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
863+
mir::BinOp::Shl | mir::BinOp::ShlUnchecked => {
864+
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShlUnchecked);
866865
bx.shl(lhs, rhs)
867866
}
868-
mir::BinOp::Shr => common::build_masked_rshift(bx, input_ty, lhs, rhs),
869-
mir::BinOp::ShrUnchecked => {
870-
let rhs = base::cast_shift_expr_rhs(bx, lhs, rhs);
867+
mir::BinOp::Shr | mir::BinOp::ShrUnchecked => {
868+
let rhs = base::build_shift_expr_rhs(bx, lhs, rhs, op == mir::BinOp::ShrUnchecked);
871869
if is_signed { bx.ashr(lhs, rhs) } else { bx.lshr(lhs, rhs) }
872870
}
873871
mir::BinOp::Ne

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -454,9 +454,8 @@ pub fn check_intrinsic_type(
454454
sym::unchecked_div | sym::unchecked_rem | sym::exact_div => {
455455
(1, 0, vec![param(0), param(0)], param(0))
456456
}
457-
sym::unchecked_shl | sym::unchecked_shr | sym::rotate_left | sym::rotate_right => {
458-
(1, 0, vec![param(0), param(0)], param(0))
459-
}
457+
sym::unchecked_shl | sym::unchecked_shr => (2, 0, vec![param(0), param(1)], param(0)),
458+
sym::rotate_left | sym::rotate_right => (1, 0, vec![param(0), param(0)], param(0)),
460459
sym::unchecked_add | sym::unchecked_sub | sym::unchecked_mul => {
461460
(1, 0, vec![param(0), param(0)], param(0))
462461
}

compiler/rustc_middle/src/mir/syntax.rs

+16
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,22 @@ pub enum BinOp {
14421442
Offset,
14431443
}
14441444

1445+
impl BinOp {
1446+
/// If there's an `Unchecked` version of this `BinOp`, return that.
1447+
/// Otherwise return `None`.
1448+
pub fn to_unchecked(self) -> Option<Self> {
1449+
use BinOp::*;
1450+
Some(match self {
1451+
Add => AddUnchecked,
1452+
Sub => SubUnchecked,
1453+
Mul => MulUnchecked,
1454+
Shl => ShlUnchecked,
1455+
Shr => ShrUnchecked,
1456+
_ => return None,
1457+
})
1458+
}
1459+
}
1460+
14451461
// Some nodes are used a lot. Make sure they don't unintentionally get bigger.
14461462
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
14471463
mod size_asserts {

compiler/rustc_mir_build/src/build/custom/parse/instruction.rs

+8
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,14 @@ impl<'tcx, 'body> ParseCtxt<'tcx, 'body> {
200200
)),
201201
)
202202
},
203+
@call(mir_unchecked, args) => {
204+
parse_by_kind!(self, args[0], _, "binary op",
205+
ExprKind::Binary { op, lhs, rhs } => Ok(Rvalue::BinaryOp(
206+
op.to_unchecked().unwrap_or_else(|| bug!("No unchecked version of {op:?}")),
207+
Box::new((self.parse_operand(*lhs)?, self.parse_operand(*rhs)?)),
208+
)),
209+
)
210+
},
203211
@call(mir_offset, args) => {
204212
let ptr = self.parse_operand(args[0])?;
205213
let offset = self.parse_operand(args[1])?;

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,7 @@ symbols! {
11681168
mir_static_mut,
11691169
mir_storage_dead,
11701170
mir_storage_live,
1171+
mir_unchecked,
11711172
mir_unreachable,
11721173
mir_unwind_cleanup,
11731174
mir_unwind_continue,

library/core/src/intrinsics.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -2224,18 +2224,20 @@ extern "rust-intrinsic" {
22242224
/// Safe wrappers for this intrinsic are available on the integer
22252225
/// primitives via the `checked_shl` method. For example,
22262226
/// [`u32::checked_shl`]
2227+
#[cfg(not(bootstrap))]
22272228
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
22282229
#[rustc_nounwind]
2229-
pub fn unchecked_shl<T: Copy>(x: T, y: T) -> T;
2230+
pub fn unchecked_shl<T: Copy, U: Copy>(x: T, y: U) -> T;
22302231
/// Performs an unchecked right shift, resulting in undefined behavior when
22312232
/// `y < 0` or `y >= N`, where N is the width of T in bits.
22322233
///
22332234
/// Safe wrappers for this intrinsic are available on the integer
22342235
/// primitives via the `checked_shr` method. For example,
22352236
/// [`u32::checked_shr`]
2237+
#[cfg(not(bootstrap))]
22362238
#[rustc_const_stable(feature = "const_int_unchecked", since = "1.40.0")]
22372239
#[rustc_nounwind]
2238-
pub fn unchecked_shr<T: Copy>(x: T, y: T) -> T;
2240+
pub fn unchecked_shr<T: Copy, U: Copy>(x: T, y: U) -> T;
22392241

22402242
/// Returns the result of an unchecked addition, resulting in
22412243
/// undefined behavior when `x + y > T::MAX` or `x + y < T::MIN`.

library/core/src/intrinsics/mir.rs

+2
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@
235235
//! - Unary and binary operations use their normal Rust syntax - `a * b`, `!c`, etc.
236236
//! - The binary operation `Offset` can be created via [`Offset`].
237237
//! - Checked binary operations are represented by wrapping the associated binop in [`Checked`].
238+
//! - Unchecked binary operations are represented by wrapping the associated binop in [`Unchecked`].
238239
//! - Array repetition syntax (`[foo; 10]`) creates the associated rvalue.
239240
//!
240241
//! #### Terminators
@@ -359,6 +360,7 @@ define!("mir_storage_dead", fn StorageDead<T>(local: T));
359360
define!("mir_assume", fn Assume(operand: bool));
360361
define!("mir_deinit", fn Deinit<T>(place: T));
361362
define!("mir_checked", fn Checked<T>(binop: T) -> (T, bool));
363+
define!("mir_unchecked", fn Unchecked<T>(binop: T) -> T);
362364
define!("mir_len", fn Len<T>(place: T) -> usize);
363365
define!("mir_copy_for_deref", fn CopyForDeref<T>(place: T) -> T);
364366
define!("mir_retag", fn Retag<T>(place: T));

library/core/src/num/int_macros.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -1253,10 +1253,18 @@ macro_rules! int_impl {
12531253
#[inline(always)]
12541254
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
12551255
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
1256-
// SAFETY: the caller must uphold the safety contract for
1257-
// `unchecked_shl`.
1258-
// Any legal shift amount is losslessly representable in the self type.
1259-
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1256+
#[cfg(bootstrap)]
1257+
{
1258+
// For bootstrapping, just use built-in primitive shift.
1259+
// panicking is a legal manifestation of UB
1260+
self << rhs
1261+
}
1262+
#[cfg(not(bootstrap))]
1263+
{
1264+
// SAFETY: the caller must uphold the safety contract for
1265+
// `unchecked_shl`.
1266+
unsafe { intrinsics::unchecked_shl(self, rhs) }
1267+
}
12601268
}
12611269

12621270
/// Checked shift right. Computes `self >> rhs`, returning `None` if `rhs` is
@@ -1336,10 +1344,18 @@ macro_rules! int_impl {
13361344
#[inline(always)]
13371345
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13381346
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
1339-
// SAFETY: the caller must uphold the safety contract for
1340-
// `unchecked_shr`.
1341-
// Any legal shift amount is losslessly representable in the self type.
1342-
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1347+
#[cfg(bootstrap)]
1348+
{
1349+
// For bootstrapping, just use built-in primitive shift.
1350+
// panicking is a legal manifestation of UB
1351+
self >> rhs
1352+
}
1353+
#[cfg(not(bootstrap))]
1354+
{
1355+
// SAFETY: the caller must uphold the safety contract for
1356+
// `unchecked_shr`.
1357+
unsafe { intrinsics::unchecked_shr(self, rhs) }
1358+
}
13431359
}
13441360

13451361
/// Checked absolute value. Computes `self.abs()`, returning `None` if

library/core/src/num/mod.rs

-11
Original file line numberDiff line numberDiff line change
@@ -286,17 +286,6 @@ macro_rules! widening_impl {
286286
};
287287
}
288288

289-
macro_rules! conv_rhs_for_unchecked_shift {
290-
($SelfT:ty, $x:expr) => {{
291-
// If the `as` cast will truncate, ensure we still tell the backend
292-
// that the pre-truncation value was also small.
293-
if <$SelfT>::BITS < 32 {
294-
intrinsics::assume($x <= (<$SelfT>::MAX as u32));
295-
}
296-
$x as $SelfT
297-
}};
298-
}
299-
300289
impl i8 {
301290
int_impl! {
302291
Self = i8,

library/core/src/num/uint_macros.rs

+24-8
Original file line numberDiff line numberDiff line change
@@ -1313,10 +1313,18 @@ macro_rules! uint_impl {
13131313
#[inline(always)]
13141314
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13151315
pub const unsafe fn unchecked_shl(self, rhs: u32) -> Self {
1316-
// SAFETY: the caller must uphold the safety contract for
1317-
// `unchecked_shl`.
1318-
// Any legal shift amount is losslessly representable in the self type.
1319-
unsafe { intrinsics::unchecked_shl(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1316+
#[cfg(bootstrap)]
1317+
{
1318+
// For bootstrapping, just use built-in primitive shift.
1319+
// panicking is a legal manifestation of UB
1320+
self << rhs
1321+
}
1322+
#[cfg(not(bootstrap))]
1323+
{
1324+
// SAFETY: the caller must uphold the safety contract for
1325+
// `unchecked_shl`.
1326+
unsafe { intrinsics::unchecked_shl(self, rhs) }
1327+
}
13201328
}
13211329

13221330
/// Checked shift right. Computes `self >> rhs`, returning `None`
@@ -1396,10 +1404,18 @@ macro_rules! uint_impl {
13961404
#[inline(always)]
13971405
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
13981406
pub const unsafe fn unchecked_shr(self, rhs: u32) -> Self {
1399-
// SAFETY: the caller must uphold the safety contract for
1400-
// `unchecked_shr`.
1401-
// Any legal shift amount is losslessly representable in the self type.
1402-
unsafe { intrinsics::unchecked_shr(self, conv_rhs_for_unchecked_shift!($SelfT, rhs)) }
1407+
#[cfg(bootstrap)]
1408+
{
1409+
// For bootstrapping, just use built-in primitive shift.
1410+
// panicking is a legal manifestation of UB
1411+
self >> rhs
1412+
}
1413+
#[cfg(not(bootstrap))]
1414+
{
1415+
// SAFETY: the caller must uphold the safety contract for
1416+
// `unchecked_shr`.
1417+
unsafe { intrinsics::unchecked_shr(self, rhs) }
1418+
}
14031419
}
14041420

14051421
/// Checked exponentiation. Computes `self.pow(exp)`, returning `None` if

library/core/src/ptr/mod.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -1781,9 +1781,19 @@ pub(crate) const unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usiz
17811781
// FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <=
17821782
// 1, where the method versions of these operations are not inlined.
17831783
use intrinsics::{
1784-
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl,
1785-
unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub,
1784+
assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_sub,
1785+
wrapping_add, wrapping_mul, wrapping_sub,
17861786
};
1787+
#[cfg(bootstrap)]
1788+
const unsafe fn unchecked_shl(value: usize, shift: usize) -> usize {
1789+
value << shift
1790+
}
1791+
#[cfg(bootstrap)]
1792+
const unsafe fn unchecked_shr(value: usize, shift: usize) -> usize {
1793+
value >> shift
1794+
}
1795+
#[cfg(not(bootstrap))]
1796+
use intrinsics::{unchecked_shl, unchecked_shr};
17871797

17881798
/// Calculate multiplicative modular inverse of `x` modulo `m`.
17891799
///

0 commit comments

Comments
 (0)