Skip to content

optimize superset method of IntervalSet #97862

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 9, 2022
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 57 additions & 12 deletions compiler/rustc_index/src/interval.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::iter::Step;
use std::marker::PhantomData;
use std::ops::Bound;
use std::ops::RangeBounds;
use std::ops::{Bound, Range};

use crate::vec::Idx;
use crate::vec::IndexVec;
Expand All @@ -11,6 +11,10 @@ use smallvec::SmallVec;
mod tests;

/// Stores a set of intervals on the indices.
///
/// The elements in `map` are sorted and non-adjacent, which means
/// the second value of the previous element is *greater* than the
/// first value of the following element.
#[derive(Debug, Clone)]
pub struct IntervalSet<I> {
// Start, end
Expand Down Expand Up @@ -84,7 +88,7 @@ impl<I: Idx> IntervalSet<I> {
// continue to the next range. We're looking here for the first
// range which starts *non-adjacently* to our end.
let next = self.map.partition_point(|r| r.0 <= end + 1);
if let Some(right) = next.checked_sub(1) {
let result = if let Some(right) = next.checked_sub(1) {
let (prev_start, prev_end) = self.map[right];
if prev_end + 1 >= start {
// If the start for the inserted range is adjacent to the
Expand All @@ -99,25 +103,25 @@ impl<I: Idx> IntervalSet<I> {
if left != right {
self.map.drain(left..right);
}
return true;
true
} else {
// We overlap with the previous range, increase it to
// include us.
//
// Make sure we're actually going to *increase* it though --
// it may be that end is just inside the previously existing
// set.
return if end > prev_end {
if end > prev_end {
self.map[right].1 = end;
true
} else {
false
};
}
}
} else {
// Otherwise, we don't overlap, so just insert
self.map.insert(right + 1, (start, end));
return true;
true
}
} else {
if self.map.is_empty() {
Expand All @@ -127,8 +131,16 @@ impl<I: Idx> IntervalSet<I> {
} else {
self.map.insert(next, (start, end));
}
return true;
}
true
};
debug_assert!(
self.check_invariants(),
"wrong intervals after insert {:?}..={:?} to {:?}",
start,
end,
self
);
result
}

pub fn contains(&self, needle: I) -> bool {
Expand All @@ -145,9 +157,26 @@ impl<I: Idx> IntervalSet<I> {
where
I: Step,
{
// FIXME: Performance here is probably not great. We will be doing a lot
// of pointless tree traversals.
other.iter().all(|elem| self.contains(elem))
let mut sup_iter = self.iter_intervals();
let mut current = None;
let contains = |sup: Range<I>, sub: Range<I>, current: &mut Option<Range<I>>| {
if sup.end < sub.start {
// if `sup.end == sub.start`, the next sup doesn't contain `sub.start`
None // continue to the next sup
} else if sup.end >= sub.end && sup.start <= sub.start {
*current = Some(sup); // save the current sup
Some(true)
} else {
Some(false)
}
};
other.iter_intervals().all(|sub| {
current
.take()
.and_then(|sup| contains(sup, sub.clone(), &mut current))
.or_else(|| sup_iter.find_map(|sup| contains(sup, sub.clone(), &mut current)))
.unwrap_or(false)
})
}

pub fn is_empty(&self) -> bool {
Expand All @@ -174,7 +203,10 @@ impl<I: Idx> IntervalSet<I> {

pub fn insert_all(&mut self) {
self.clear();
self.map.push((0, self.domain.try_into().unwrap()));
if let Some(end) = self.domain.checked_sub(1) {
self.map.push((0, end.try_into().unwrap()));
}
debug_assert!(self.check_invariants());
}

pub fn union(&mut self, other: &IntervalSet<I>) -> bool
Expand All @@ -186,8 +218,21 @@ impl<I: Idx> IntervalSet<I> {
for range in other.iter_intervals() {
did_insert |= self.insert_range(range);
}
debug_assert!(self.check_invariants());
did_insert
}

// Check the intervals are valid, sorted and non-adjacent
fn check_invariants(&self) -> bool {
let mut current: Option<u32> = None;
for (start, end) in &self.map {
if start > end || current.map_or(false, |x| x + 1 >= *start) {
return false;
}
current = Some(*end);
}
current.map_or(true, |x| x < self.domain as u32)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like that invariant isn't actually assumed rn? 🤔 generally, the domain seems mostly irrelevant, could "just" use u32::MAX instead for the range end.

🤷 don't really care how you deal with the test failure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'd better make sure all intervals don't exceed domain, after all, both insert_all and insert_range use domain as the range maximum. I increased the domain in the test from 3000 to 10000, which I think should be reasonable

}
}

/// This data structure optimizes for cases where the stored bits in each row
Expand Down