Skip to content

Commit a9ca1ec

Browse files
committed
Auto merge of #72460 - RalfJung:rollup-28fs06y, r=RalfJung
Rollup of 4 pull requests Successful merges: - #71610 (InvalidUndefBytes: Track size of undef region used) - #72161 (Replace fcntl-based file lock with flock) - #72306 (Break tokens before checking if they are 'probably equal') - #72325 (Always generated object code for `#![no_builtins]`) Failed merges: r? @ghost
2 parents de6060b + 1119421 commit a9ca1ec

File tree

11 files changed

+252
-54
lines changed

11 files changed

+252
-54
lines changed

src/librustc_ast/tokenstream.rs

+67-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ use rustc_macros::HashStable_Generic;
2121
use rustc_span::{Span, DUMMY_SP};
2222
use smallvec::{smallvec, SmallVec};
2323

24+
use log::debug;
25+
2426
use std::{iter, mem};
2527

2628
/// When the main rust parser encounters a syntax-extension invocation, it
@@ -338,8 +340,71 @@ impl TokenStream {
338340
true
339341
}
340342

341-
let mut t1 = self.trees().filter(semantic_tree);
342-
let mut t2 = other.trees().filter(semantic_tree);
343+
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
344+
//
345+
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
346+
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
347+
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
348+
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
349+
// when determining if two `TokenStream`s are 'probably equal'.
350+
//
351+
// Therefore, we use `break_two_token_op` to convert all tokens
352+
// to the 'unglued' form (if it exists). This ensures that two
353+
// `TokenStream`s which differ only in how their tokens are glued
354+
// will be considered 'probably equal', which allows us to keep spans.
355+
//
356+
// This is important when the original `TokenStream` contained
357+
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
358+
// will be omitted when we pretty-print, which can cause the original
359+
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
360+
// leading to some tokens being 'glued' together in one stream but not
361+
// the other. See #68489 for more details.
362+
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
363+
// In almost all cases, we should have either zero or one levels
364+
// of 'unglueing'. However, in some unusual cases, we may need
365+
// to iterate breaking tokens mutliple times. For example:
366+
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
367+
let mut token_trees: SmallVec<[_; 2]>;
368+
if let TokenTree::Token(token) = &tree {
369+
let mut out = SmallVec::<[_; 2]>::new();
370+
out.push(token.clone());
371+
// Iterate to fixpoint:
372+
// * We start off with 'out' containing our initial token, and `temp` empty
373+
// * If we are able to break any tokens in `out`, then `out` will have
374+
// at least one more element than 'temp', so we will try to break tokens
375+
// again.
376+
// * If we cannot break any tokens in 'out', we are done
377+
loop {
378+
let mut temp = SmallVec::<[_; 2]>::new();
379+
let mut changed = false;
380+
381+
for token in out.into_iter() {
382+
if let Some((first, second)) = token.kind.break_two_token_op() {
383+
temp.push(Token::new(first, DUMMY_SP));
384+
temp.push(Token::new(second, DUMMY_SP));
385+
changed = true;
386+
} else {
387+
temp.push(token);
388+
}
389+
}
390+
out = temp;
391+
if !changed {
392+
break;
393+
}
394+
}
395+
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
396+
if token_trees.len() != 1 {
397+
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
398+
}
399+
} else {
400+
token_trees = SmallVec::new();
401+
token_trees.push(tree);
402+
}
403+
token_trees.into_iter()
404+
}
405+
406+
let mut t1 = self.trees().filter(semantic_tree).flat_map(break_tokens);
407+
let mut t2 = other.trees().filter(semantic_tree).flat_map(break_tokens);
343408
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
344409
if !t1.probably_equal_for_proc_macro(&t2) {
345410
return false;

src/librustc_codegen_ssa/back/write.rs

+15-1
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,22 @@ impl ModuleConfig {
142142
let emit_obj = if !should_emit_obj {
143143
EmitObj::None
144144
} else if sess.target.target.options.obj_is_bitcode
145-
|| sess.opts.cg.linker_plugin_lto.enabled()
145+
|| (sess.opts.cg.linker_plugin_lto.enabled() && !no_builtins)
146146
{
147+
// This case is selected if the target uses objects as bitcode, or
148+
// if linker plugin LTO is enabled. In the linker plugin LTO case
149+
// the assumption is that the final link-step will read the bitcode
150+
// and convert it to object code. This may be done by either the
151+
// native linker or rustc itself.
152+
//
153+
// Note, however, that the linker-plugin-lto requested here is
154+
// explicitly ignored for `#![no_builtins]` crates. These crates are
155+
// specifically ignored by rustc's LTO passes and wouldn't work if
156+
// loaded into the linker. These crates define symbols that LLVM
157+
// lowers intrinsics to, and these symbol dependencies aren't known
158+
// until after codegen. As a result any crate marked
159+
// `#![no_builtins]` is assumed to not participate in LTO and
160+
// instead goes on to generate object code.
147161
EmitObj::Bitcode
148162
} else if need_bitcode_in_object(sess) {
149163
EmitObj::ObjectCode(BitcodeSection::Full)

src/librustc_data_structures/flock.rs

+57-24
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,22 @@
77
#![allow(non_camel_case_types)]
88
#![allow(nonstandard_style)]
99

10+
use std::fs::{File, OpenOptions};
1011
use std::io;
1112
use std::path::Path;
1213

1314
cfg_if! {
14-
if #[cfg(unix)] {
15-
use std::ffi::{CString, OsStr};
16-
use std::mem;
15+
// We use `flock` rather than `fcntl` on Linux, because WSL1 does not support
16+
// `fcntl`-style advisory locks properly (rust-lang/rust#72157).
17+
//
18+
// For other Unix targets we still use `fcntl` because it's more portable than
19+
// `flock`.
20+
if #[cfg(target_os = "linux")] {
1721
use std::os::unix::prelude::*;
1822

1923
#[derive(Debug)]
2024
pub struct Lock {
21-
fd: libc::c_int,
25+
_file: File,
2226
}
2327

2428
impl Lock {
@@ -27,22 +31,55 @@ cfg_if! {
2731
create: bool,
2832
exclusive: bool)
2933
-> io::Result<Lock> {
30-
let os: &OsStr = p.as_ref();
31-
let buf = CString::new(os.as_bytes()).unwrap();
32-
let open_flags = if create {
33-
libc::O_RDWR | libc::O_CREAT
34+
let file = OpenOptions::new()
35+
.read(true)
36+
.write(true)
37+
.create(create)
38+
.mode(libc::S_IRWXU as u32)
39+
.open(p)?;
40+
41+
let mut operation = if exclusive {
42+
libc::LOCK_EX
3443
} else {
35-
libc::O_RDWR
36-
};
37-
38-
let fd = unsafe {
39-
libc::open(buf.as_ptr(), open_flags,
40-
libc::S_IRWXU as libc::c_int)
44+
libc::LOCK_SH
4145
};
46+
if !wait {
47+
operation |= libc::LOCK_NB
48+
}
4249

43-
if fd < 0 {
44-
return Err(io::Error::last_os_error());
50+
let ret = unsafe { libc::flock(file.as_raw_fd(), operation) };
51+
if ret == -1 {
52+
Err(io::Error::last_os_error())
53+
} else {
54+
Ok(Lock { _file: file })
4555
}
56+
}
57+
}
58+
59+
// Note that we don't need a Drop impl to execute `flock(fd, LOCK_UN)`. Lock acquired by
60+
// `flock` is associated with the file descriptor and closing the file release it
61+
// automatically.
62+
} else if #[cfg(unix)] {
63+
use std::mem;
64+
use std::os::unix::prelude::*;
65+
66+
#[derive(Debug)]
67+
pub struct Lock {
68+
file: File,
69+
}
70+
71+
impl Lock {
72+
pub fn new(p: &Path,
73+
wait: bool,
74+
create: bool,
75+
exclusive: bool)
76+
-> io::Result<Lock> {
77+
let file = OpenOptions::new()
78+
.read(true)
79+
.write(true)
80+
.create(create)
81+
.mode(libc::S_IRWXU as u32)
82+
.open(p)?;
4683

4784
let lock_type = if exclusive {
4885
libc::F_WRLCK
@@ -58,14 +95,12 @@ cfg_if! {
5895

5996
let cmd = if wait { libc::F_SETLKW } else { libc::F_SETLK };
6097
let ret = unsafe {
61-
libc::fcntl(fd, cmd, &flock)
98+
libc::fcntl(file.as_raw_fd(), cmd, &flock)
6299
};
63100
if ret == -1 {
64-
let err = io::Error::last_os_error();
65-
unsafe { libc::close(fd); }
66-
Err(err)
101+
Err(io::Error::last_os_error())
67102
} else {
68-
Ok(Lock { fd })
103+
Ok(Lock { file })
69104
}
70105
}
71106
}
@@ -79,15 +114,13 @@ cfg_if! {
79114
flock.l_len = 0;
80115

81116
unsafe {
82-
libc::fcntl(self.fd, libc::F_SETLK, &flock);
83-
libc::close(self.fd);
117+
libc::fcntl(self.file.as_raw_fd(), libc::F_SETLK, &flock);
84118
}
85119
}
86120
}
87121
} else if #[cfg(windows)] {
88122
use std::mem;
89123
use std::os::windows::prelude::*;
90-
use std::fs::{File, OpenOptions};
91124

92125
use winapi::um::minwinbase::{OVERLAPPED, LOCKFILE_FAIL_IMMEDIATELY, LOCKFILE_EXCLUSIVE_LOCK};
93126
use winapi::um::fileapi::LockFileEx;

src/librustc_middle/mir/interpret/allocation.rs

+25-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use rustc_target::abi::{Align, HasDataLayout, Size};
1111

1212
use super::{
1313
read_target_uint, write_target_uint, AllocId, InterpResult, Pointer, Scalar, ScalarMaybeUninit,
14+
UninitBytesAccess,
1415
};
1516

1617
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
@@ -545,17 +546,23 @@ impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
545546
impl<'tcx, Tag: Copy, Extra> Allocation<Tag, Extra> {
546547
/// Checks whether the given range is entirely defined.
547548
///
548-
/// Returns `Ok(())` if it's defined. Otherwise returns the index of the byte
549-
/// at which the first undefined access begins.
550-
fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Size> {
549+
/// Returns `Ok(())` if it's defined. Otherwise returns the range of byte
550+
/// indexes of the first contiguous undefined access.
551+
fn is_defined(&self, ptr: Pointer<Tag>, size: Size) -> Result<(), Range<Size>> {
551552
self.init_mask.is_range_initialized(ptr.offset, ptr.offset + size) // `Size` addition
552553
}
553554

554-
/// Checks that a range of bytes is defined. If not, returns the `ReadUndefBytes`
555-
/// error which will report the first byte which is undefined.
555+
/// Checks that a range of bytes is defined. If not, returns the `InvalidUndefBytes`
556+
/// error which will report the first range of bytes which is undefined.
556557
fn check_defined(&self, ptr: Pointer<Tag>, size: Size) -> InterpResult<'tcx> {
557-
self.is_defined(ptr, size)
558-
.or_else(|idx| throw_ub!(InvalidUninitBytes(Some(Pointer::new(ptr.alloc_id, idx)))))
558+
self.is_defined(ptr, size).or_else(|idx_range| {
559+
throw_ub!(InvalidUninitBytes(Some(Box::new(UninitBytesAccess {
560+
access_ptr: ptr.erase_tag(),
561+
access_size: size,
562+
uninit_ptr: Pointer::new(ptr.alloc_id, idx_range.start),
563+
uninit_size: idx_range.end - idx_range.start, // `Size` subtraction
564+
}))))
565+
})
559566
}
560567

561568
pub fn mark_definedness(&mut self, ptr: Pointer<Tag>, size: Size, new_state: bool) {
@@ -758,19 +765,25 @@ impl InitMask {
758765

759766
/// Checks whether the range `start..end` (end-exclusive) is entirely initialized.
760767
///
761-
/// Returns `Ok(())` if it's initialized. Otherwise returns the index of the byte
762-
/// at which the first uninitialized access begins.
768+
/// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
769+
/// indexes for the first contiguous span of the uninitialized access.
763770
#[inline]
764-
pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Size> {
771+
pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Range<Size>> {
765772
if end > self.len {
766-
return Err(self.len);
773+
return Err(self.len..end);
767774
}
768775

769776
// FIXME(oli-obk): optimize this for allocations larger than a block.
770777
let idx = (start.bytes()..end.bytes()).map(Size::from_bytes).find(|&i| !self.get(i));
771778

772779
match idx {
773-
Some(idx) => Err(idx),
780+
Some(idx) => {
781+
let undef_end = (idx.bytes()..end.bytes())
782+
.map(Size::from_bytes)
783+
.find(|&i| self.get(i))
784+
.unwrap_or(end);
785+
Err(idx..undef_end)
786+
}
774787
None => Ok(()),
775788
}
776789
}

src/librustc_middle/mir/interpret/error.rs

+33-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::ty::query::TyCtxtAt;
66
use crate::ty::{self, layout, tls, FnSig, Ty};
77

88
use rustc_data_structures::sync::Lock;
9-
use rustc_errors::{struct_span_err, DiagnosticBuilder, ErrorReported};
9+
use rustc_errors::{pluralize, struct_span_err, DiagnosticBuilder, ErrorReported};
1010
use rustc_hir as hir;
1111
use rustc_hir::definitions::DefPathData;
1212
use rustc_macros::HashStable;
@@ -327,6 +327,19 @@ impl fmt::Display for CheckInAllocMsg {
327327
}
328328
}
329329

330+
/// Details of an access to uninitialized bytes where it is not allowed.
331+
#[derive(Debug)]
332+
pub struct UninitBytesAccess {
333+
/// Location of the original memory access.
334+
pub access_ptr: Pointer,
335+
/// Size of the original memory access.
336+
pub access_size: Size,
337+
/// Location of the first uninitialized byte that was accessed.
338+
pub uninit_ptr: Pointer,
339+
/// Number of consecutive uninitialized bytes that were accessed.
340+
pub uninit_size: Size,
341+
}
342+
330343
/// Error information for when the program caused Undefined Behavior.
331344
pub enum UndefinedBehaviorInfo<'tcx> {
332345
/// Free-form case. Only for errors that are never caught!
@@ -384,7 +397,7 @@ pub enum UndefinedBehaviorInfo<'tcx> {
384397
/// Using a string that is not valid UTF-8,
385398
InvalidStr(std::str::Utf8Error),
386399
/// Using uninitialized data where it is not allowed.
387-
InvalidUninitBytes(Option<Pointer>),
400+
InvalidUninitBytes(Option<Box<UninitBytesAccess>>),
388401
/// Working with a local that is not currently live.
389402
DeadLocal,
390403
/// Data size is not equal to target size.
@@ -455,10 +468,18 @@ impl fmt::Display for UndefinedBehaviorInfo<'_> {
455468
write!(f, "using {} as function pointer but it does not point to a function", p)
456469
}
457470
InvalidStr(err) => write!(f, "this string is not valid UTF-8: {}", err),
458-
InvalidUninitBytes(Some(p)) => write!(
471+
InvalidUninitBytes(Some(access)) => write!(
459472
f,
460-
"reading uninitialized memory at {}, but this operation requires initialized memory",
461-
p
473+
"reading {} byte{} of memory starting at {}, \
474+
but {} byte{} {} uninitialized starting at {}, \
475+
and this operation requires initialized memory",
476+
access.access_size.bytes(),
477+
pluralize!(access.access_size.bytes()),
478+
access.access_ptr,
479+
access.uninit_size.bytes(),
480+
pluralize!(access.uninit_size.bytes()),
481+
if access.uninit_size.bytes() != 1 { "are" } else { "is" },
482+
access.uninit_ptr,
462483
),
463484
InvalidUninitBytes(None) => write!(
464485
f,
@@ -556,6 +577,9 @@ impl dyn MachineStopType {
556577
}
557578
}
558579

580+
#[cfg(target_arch = "x86_64")]
581+
static_assert_size!(InterpError<'_>, 40);
582+
559583
pub enum InterpError<'tcx> {
560584
/// The program caused undefined behavior.
561585
UndefinedBehavior(UndefinedBehaviorInfo<'tcx>),
@@ -604,7 +628,10 @@ impl InterpError<'_> {
604628
InterpError::MachineStop(b) => mem::size_of_val::<dyn MachineStopType>(&**b) > 0,
605629
InterpError::Unsupported(UnsupportedOpInfo::Unsupported(_))
606630
| InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ValidationFailure(_))
607-
| InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_)) => true,
631+
| InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_))
632+
| InterpError::UndefinedBehavior(UndefinedBehaviorInfo::InvalidUninitBytes(Some(_))) => {
633+
true
634+
}
608635
_ => false,
609636
}
610637
}

src/librustc_middle/mir/interpret/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ use crate::ty::{self, Instance, Ty, TyCtxt};
119119
pub use self::error::{
120120
struct_error, CheckInAllocMsg, ConstEvalErr, ConstEvalRawResult, ConstEvalResult, ErrorHandled,
121121
FrameInfo, InterpError, InterpErrorInfo, InterpResult, InvalidProgramInfo, MachineStopType,
122-
ResourceExhaustionInfo, UndefinedBehaviorInfo, UnsupportedOpInfo,
122+
ResourceExhaustionInfo, UndefinedBehaviorInfo, UninitBytesAccess, UnsupportedOpInfo,
123123
};
124124

125125
pub use self::value::{get_slice_bytes, ConstValue, RawConst, Scalar, ScalarMaybeUninit};

0 commit comments

Comments
 (0)