Skip to content

Commit d573fe1

Browse files
committed
Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk
Infinite loop detection for const evaluation Resolves #50637. An `EvalContext` stores the transient state (stack, heap, etc.) of the MIRI virtual machine while it executing code. As long as MIRI only executes pure functions, we can detect if a program is in a state where it will never terminate by periodically taking a "snapshot" of this transient state and comparing it to previous ones. If any two states are exactly equal, the machine must be in an infinite loop. Instead of fully cloning a snapshot every time the detector is run, we store a snapshot's hash. Only when a hash collision occurs do we fully clone the interpreter state. Future snapshots which cause a collision will be compared against this clone, causing the interpreter to abort if they are equal. At the moment, snapshots are not taken until MIRI has progressed a certain amount. After this threshold, snapshots are taken every `DETECTOR_SNAPSHOT_PERIOD` steps. This means that an infinite loop with period `P` will be detected after a maximum of `2 * P * DETECTOR_SNAPSHOT_PERIOD` interpreter steps. The factor of 2 arises because we only clone a snapshot after it causes a hash collision.
2 parents 66787e0 + cf5eaa7 commit d573fe1

File tree

13 files changed

+301
-26
lines changed

13 files changed

+301
-26
lines changed

src/librustc/ich/impls_ty.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,8 @@ for ::mir::interpret::EvalErrorKind<'gcx, O> {
549549
RemainderByZero |
550550
DivisionByZero |
551551
GeneratorResumedAfterReturn |
552-
GeneratorResumedAfterPanic => {}
552+
GeneratorResumedAfterPanic |
553+
InfiniteLoop => {}
553554
ReferencedConstant(ref err) => err.hash_stable(hcx, hasher),
554555
MachineError(ref err) => err.hash_stable(hcx, hasher),
555556
FunctionPointerTyMismatch(a, b) => {

src/librustc/mir/interpret/error.rs

+3
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ pub enum EvalErrorKind<'tcx, O> {
264264
ReferencedConstant(Lrc<ConstEvalErr<'tcx>>),
265265
GeneratorResumedAfterReturn,
266266
GeneratorResumedAfterPanic,
267+
InfiniteLoop,
267268
}
268269

269270
pub type EvalResult<'tcx, T = ()> = Result<T, EvalError<'tcx>>;
@@ -398,6 +399,8 @@ impl<'tcx, O> EvalErrorKind<'tcx, O> {
398399
RemainderByZero => "attempt to calculate the remainder with a divisor of zero",
399400
GeneratorResumedAfterReturn => "generator resumed after completion",
400401
GeneratorResumedAfterPanic => "generator resumed after panicking",
402+
InfiniteLoop =>
403+
"duplicate interpreter state observed here, const evaluation will never terminate",
401404
}
402405
}
403406
}

src/librustc/mir/interpret/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use ty::codec::TyDecoder;
3636
use std::sync::atomic::{AtomicU32, Ordering};
3737
use std::num::NonZeroU32;
3838

39-
#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)]
39+
#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)]
4040
pub enum Lock {
4141
NoLock,
4242
WriteLock(DynamicLifetime),

src/librustc/mir/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1636,7 +1636,7 @@ impl Debug for ValidationOp {
16361636
}
16371637

16381638
// This is generic so that it can be reused by miri
1639-
#[derive(Clone, RustcEncodable, RustcDecodable)]
1639+
#[derive(Clone, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)]
16401640
pub struct ValidationOperand<'tcx, T> {
16411641
pub place: T,
16421642
pub ty: Ty<'tcx>,

src/librustc/ty/structural_impls.rs

+1
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ impl<'a, 'tcx, O: Lift<'tcx>> Lift<'tcx> for interpret::EvalErrorKind<'a, O> {
587587
RemainderByZero => RemainderByZero,
588588
GeneratorResumedAfterReturn => GeneratorResumedAfterReturn,
589589
GeneratorResumedAfterPanic => GeneratorResumedAfterPanic,
590+
InfiniteLoop => InfiniteLoop,
590591
})
591592
}
592593
}

src/librustc_mir/interpret/const_eval.rs

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ fn eval_body_using_ecx<'a, 'mir, 'tcx>(
185185
Ok((value, ptr, layout.ty))
186186
}
187187

188+
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
188189
pub struct CompileTimeEvaluator;
189190

190191
impl<'tcx> Into<EvalError<'tcx>> for ConstEvalError {

src/librustc_mir/interpret/eval_context.rs

+130-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::fmt::Write;
2+
use std::hash::{Hash, Hasher};
23
use std::mem;
34

45
use rustc::hir::def_id::DefId;
@@ -9,6 +10,7 @@ use rustc::ty::layout::{self, Size, Align, HasDataLayout, IntegerExt, LayoutOf,
910
use rustc::ty::subst::{Subst, Substs};
1011
use rustc::ty::{self, Ty, TyCtxt, TypeAndMut};
1112
use rustc::ty::query::TyCtxtAt;
13+
use rustc_data_structures::fx::{FxHashSet, FxHasher};
1214
use rustc_data_structures::indexed_vec::{IndexVec, Idx};
1315
use rustc::mir::interpret::{
1416
FrameInfo, GlobalId, Value, Scalar,
@@ -41,13 +43,17 @@ pub struct EvalContext<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
4143
/// The maximum number of stack frames allowed
4244
pub(crate) stack_limit: usize,
4345

44-
/// The maximum number of terminators that may be evaluated.
45-
/// This prevents infinite loops and huge computations from freezing up const eval.
46-
/// Remove once halting problem is solved.
47-
pub(crate) terminators_remaining: usize,
46+
/// When this value is negative, it indicates the number of interpreter
47+
/// steps *until* the loop detector is enabled. When it is positive, it is
48+
/// the number of steps after the detector has been enabled modulo the loop
49+
/// detector period.
50+
pub(crate) steps_since_detector_enabled: isize,
51+
52+
pub(crate) loop_detector: InfiniteLoopDetector<'a, 'mir, 'tcx, M>,
4853
}
4954

5055
/// A stack frame.
56+
#[derive(Clone)]
5157
pub struct Frame<'mir, 'tcx: 'mir> {
5258
////////////////////////////////////////////////////////////////////////////////
5359
// Function and callsite information
@@ -89,6 +95,121 @@ pub struct Frame<'mir, 'tcx: 'mir> {
8995
pub stmt: usize,
9096
}
9197

98+
impl<'mir, 'tcx: 'mir> Eq for Frame<'mir, 'tcx> {}
99+
100+
impl<'mir, 'tcx: 'mir> PartialEq for Frame<'mir, 'tcx> {
101+
fn eq(&self, other: &Self) -> bool {
102+
let Frame {
103+
mir: _,
104+
instance,
105+
span: _,
106+
return_to_block,
107+
return_place,
108+
locals,
109+
block,
110+
stmt,
111+
} = self;
112+
113+
// Some of these are constant during evaluation, but are included
114+
// anyways for correctness.
115+
*instance == other.instance
116+
&& *return_to_block == other.return_to_block
117+
&& *return_place == other.return_place
118+
&& *locals == other.locals
119+
&& *block == other.block
120+
&& *stmt == other.stmt
121+
}
122+
}
123+
124+
impl<'mir, 'tcx: 'mir> Hash for Frame<'mir, 'tcx> {
125+
fn hash<H: Hasher>(&self, state: &mut H) {
126+
let Frame {
127+
mir: _,
128+
instance,
129+
span: _,
130+
return_to_block,
131+
return_place,
132+
locals,
133+
block,
134+
stmt,
135+
} = self;
136+
137+
instance.hash(state);
138+
return_to_block.hash(state);
139+
return_place.hash(state);
140+
locals.hash(state);
141+
block.hash(state);
142+
stmt.hash(state);
143+
}
144+
}
145+
146+
/// The virtual machine state during const-evaluation at a given point in time.
147+
type EvalSnapshot<'a, 'mir, 'tcx, M>
148+
= (M, Vec<Frame<'mir, 'tcx>>, Memory<'a, 'mir, 'tcx, M>);
149+
150+
pub(crate) struct InfiniteLoopDetector<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
151+
/// The set of all `EvalSnapshot` *hashes* observed by this detector.
152+
///
153+
/// When a collision occurs in this table, we store the full snapshot in
154+
/// `snapshots`.
155+
hashes: FxHashSet<u64>,
156+
157+
/// The set of all `EvalSnapshot`s observed by this detector.
158+
///
159+
/// An `EvalSnapshot` will only be fully cloned once it has caused a
160+
/// collision in `hashes`. As a result, the detector must observe at least
161+
/// *two* full cycles of an infinite loop before it triggers.
162+
snapshots: FxHashSet<EvalSnapshot<'a, 'mir, 'tcx, M>>,
163+
}
164+
165+
impl<'a, 'mir, 'tcx, M> Default for InfiniteLoopDetector<'a, 'mir, 'tcx, M>
166+
where M: Machine<'mir, 'tcx>,
167+
'tcx: 'a + 'mir,
168+
{
169+
fn default() -> Self {
170+
InfiniteLoopDetector {
171+
hashes: FxHashSet::default(),
172+
snapshots: FxHashSet::default(),
173+
}
174+
}
175+
}
176+
177+
impl<'a, 'mir, 'tcx, M> InfiniteLoopDetector<'a, 'mir, 'tcx, M>
178+
where M: Machine<'mir, 'tcx>,
179+
'tcx: 'a + 'mir,
180+
{
181+
/// Returns `true` if the loop detector has not yet observed a snapshot.
182+
pub fn is_empty(&self) -> bool {
183+
self.hashes.is_empty()
184+
}
185+
186+
pub fn observe_and_analyze(
187+
&mut self,
188+
machine: &M,
189+
stack: &Vec<Frame<'mir, 'tcx>>,
190+
memory: &Memory<'a, 'mir, 'tcx, M>,
191+
) -> EvalResult<'tcx, ()> {
192+
let snapshot = (machine, stack, memory);
193+
194+
let mut fx = FxHasher::default();
195+
snapshot.hash(&mut fx);
196+
let hash = fx.finish();
197+
198+
if self.hashes.insert(hash) {
199+
// No collision
200+
return Ok(())
201+
}
202+
203+
if self.snapshots.insert((machine.clone(), stack.clone(), memory.clone())) {
204+
// Spurious collision or first cycle
205+
return Ok(())
206+
}
207+
208+
// Second cycle
209+
Err(EvalErrorKind::InfiniteLoop.into())
210+
}
211+
}
212+
92213
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
93214
pub enum StackPopCleanup {
94215
/// The stackframe existed to compute the initial value of a static/constant, make sure it
@@ -173,7 +294,7 @@ impl<'c, 'b, 'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> LayoutOf
173294
}
174295
}
175296

176-
const MAX_TERMINATORS: usize = 1_000_000;
297+
const STEPS_UNTIL_DETECTOR_ENABLED: isize = 1_000_000;
177298

178299
impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
179300
pub fn new(
@@ -189,16 +310,17 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
189310
memory: Memory::new(tcx, memory_data),
190311
stack: Vec::new(),
191312
stack_limit: tcx.sess.const_eval_stack_frame_limit,
192-
terminators_remaining: MAX_TERMINATORS,
313+
loop_detector: Default::default(),
314+
steps_since_detector_enabled: -STEPS_UNTIL_DETECTOR_ENABLED,
193315
}
194316
}
195317

196318
pub(crate) fn with_fresh_body<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R {
197319
let stack = mem::replace(&mut self.stack, Vec::new());
198-
let terminators_remaining = mem::replace(&mut self.terminators_remaining, MAX_TERMINATORS);
320+
let steps = mem::replace(&mut self.steps_since_detector_enabled, -STEPS_UNTIL_DETECTOR_ENABLED);
199321
let r = f(self);
200322
self.stack = stack;
201-
self.terminators_remaining = terminators_remaining;
323+
self.steps_since_detector_enabled = steps;
202324
r
203325
}
204326

@@ -538,8 +660,6 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
538660
}
539661

540662
Aggregate(ref kind, ref operands) => {
541-
self.inc_step_counter_and_check_limit(operands.len());
542-
543663
let (dest, active_field_index) = match **kind {
544664
mir::AggregateKind::Adt(adt_def, variant_index, _, active_field_index) => {
545665
self.write_discriminant_value(dest_ty, dest, variant_index)?;

src/librustc_mir/interpret/machine.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
//! This separation exists to ensure that no fancy miri features like
33
//! interpreting common C functions leak into CTFE.
44
5+
use std::hash::Hash;
6+
57
use rustc::mir::interpret::{AllocId, EvalResult, Scalar, Pointer, AccessKind, GlobalId};
68
use super::{EvalContext, Place, ValTy, Memory};
79

@@ -13,9 +15,9 @@ use syntax::ast::Mutability;
1315

1416
/// Methods of this trait signifies a point where CTFE evaluation would fail
1517
/// and some use case dependent behaviour can instead be applied
16-
pub trait Machine<'mir, 'tcx>: Sized {
18+
pub trait Machine<'mir, 'tcx>: Clone + Eq + Hash {
1719
/// Additional data that can be accessed via the Memory
18-
type MemoryData;
20+
type MemoryData: Clone + Eq + Hash;
1921

2022
/// Additional memory kinds a machine wishes to distinguish from the builtin ones
2123
type MemoryKinds: ::std::fmt::Debug + PartialEq + Copy + Clone;

src/librustc_mir/interpret/memory.rs

+63-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::collections::VecDeque;
2+
use std::hash::{Hash, Hasher};
23
use std::ptr;
34

45
use rustc::hir::def_id::DefId;
@@ -9,7 +10,7 @@ use rustc::ty::layout::{self, Align, TargetDataLayout, Size};
910
use rustc::mir::interpret::{Pointer, AllocId, Allocation, AccessKind, Value,
1011
EvalResult, Scalar, EvalErrorKind, GlobalId, AllocType};
1112
pub use rustc::mir::interpret::{write_target_uint, write_target_int, read_target_uint};
12-
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
13+
use rustc_data_structures::fx::{FxHashSet, FxHashMap, FxHasher};
1314

1415
use syntax::ast::Mutability;
1516

@@ -19,7 +20,7 @@ use super::{EvalContext, Machine};
1920
// Allocations and pointers
2021
////////////////////////////////////////////////////////////////////////////////
2122

22-
#[derive(Debug, PartialEq, Copy, Clone)]
23+
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
2324
pub enum MemoryKind<T> {
2425
/// Error if deallocated except during a stack pop
2526
Stack,
@@ -31,6 +32,7 @@ pub enum MemoryKind<T> {
3132
// Top-level interpreter memory
3233
////////////////////////////////////////////////////////////////////////////////
3334

35+
#[derive(Clone)]
3436
pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
3537
/// Additional data required by the Machine
3638
pub data: M::MemoryData,
@@ -47,6 +49,64 @@ pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
4749
pub tcx: TyCtxtAt<'a, 'tcx, 'tcx>,
4850
}
4951

52+
impl<'a, 'mir, 'tcx, M> Eq for Memory<'a, 'mir, 'tcx, M>
53+
where M: Machine<'mir, 'tcx>,
54+
'tcx: 'a + 'mir,
55+
{}
56+
57+
impl<'a, 'mir, 'tcx, M> PartialEq for Memory<'a, 'mir, 'tcx, M>
58+
where M: Machine<'mir, 'tcx>,
59+
'tcx: 'a + 'mir,
60+
{
61+
fn eq(&self, other: &Self) -> bool {
62+
let Memory {
63+
data,
64+
alloc_kind,
65+
alloc_map,
66+
cur_frame,
67+
tcx: _,
68+
} = self;
69+
70+
*data == other.data
71+
&& *alloc_kind == other.alloc_kind
72+
&& *alloc_map == other.alloc_map
73+
&& *cur_frame == other.cur_frame
74+
}
75+
}
76+
77+
impl<'a, 'mir, 'tcx, M> Hash for Memory<'a, 'mir, 'tcx, M>
78+
where M: Machine<'mir, 'tcx>,
79+
'tcx: 'a + 'mir,
80+
{
81+
fn hash<H: Hasher>(&self, state: &mut H) {
82+
let Memory {
83+
data,
84+
alloc_kind: _,
85+
alloc_map: _,
86+
cur_frame,
87+
tcx: _,
88+
} = self;
89+
90+
data.hash(state);
91+
cur_frame.hash(state);
92+
93+
// We ignore some fields which don't change between evaluation steps.
94+
95+
// Since HashMaps which contain the same items may have different
96+
// iteration orders, we use a commutative operation (in this case
97+
// addition, but XOR would also work), to combine the hash of each
98+
// `Allocation`.
99+
self.allocations()
100+
.map(|allocs| {
101+
let mut h = FxHasher::default();
102+
allocs.hash(&mut h);
103+
h.finish()
104+
})
105+
.fold(0u64, |hash, x| hash.wrapping_add(x))
106+
.hash(state);
107+
}
108+
}
109+
50110
impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
51111
pub fn new(tcx: TyCtxtAt<'a, 'tcx, 'tcx>, data: M::MemoryData) -> Self {
52112
Memory {
@@ -866,7 +926,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
866926

867927
for i in 0..size.bytes() {
868928
let defined = undef_mask.get(src.offset + Size::from_bytes(i));
869-
929+
870930
for j in 0..repeat {
871931
dest_allocation.undef_mask.set(
872932
dest.offset + Size::from_bytes(i + (size.bytes() * j)),

0 commit comments

Comments
 (0)