From 0d032ba738914ce4c18cb1424f994f40877e6210 Mon Sep 17 00:00:00 2001 From: Kunal Sareen Date: Thu, 16 Jun 2022 23:20:57 +1000 Subject: [PATCH 01/15] GC space overhead --- src/plan/mod.rs | 2 +- src/plan/mutator_context.rs | 9 +- src/policy/copyspace.rs | 2 +- src/policy/immix/immixspace.rs | 2 +- src/policy/immortalspace.rs | 2 +- src/policy/largeobjectspace.rs | 2 +- src/policy/lockfreeimmortalspace.rs | 2 +- src/policy/mallocspace/global.rs | 55 ++++++++- src/policy/markcompactspace.rs | 2 +- src/policy/sft.rs | 4 +- src/util/alloc/allocator.rs | 3 +- src/util/analysis/mod.rs | 24 ++-- src/util/analysis/reserved_pages.rs | 74 +++++++++++++ src/util/statistics/counter/mod.rs | 2 + src/util/statistics/counter/single_counter.rs | 104 ++++++++++++++++++ src/util/statistics/stats.rs | 17 +++ 16 files changed, 273 insertions(+), 33 deletions(-) create mode 100644 src/util/analysis/reserved_pages.rs create mode 100644 src/util/statistics/counter/single_counter.rs diff --git a/src/plan/mod.rs b/src/plan/mod.rs index 5888436f50..acf74653c3 100644 --- a/src/plan/mod.rs +++ b/src/plan/mod.rs @@ -41,7 +41,7 @@ pub use tracing::{ObjectQueue, ObjectsClosure, VectorObjectQueue, VectorQueue}; mod generational; mod immix; mod markcompact; -mod marksweep; +pub mod marksweep; mod nogc; mod pageprotect; mod semispace; diff --git a/src/plan/mutator_context.rs b/src/plan/mutator_context.rs index 59dfd9f21d..4bad0cbb5e 100644 --- a/src/plan/mutator_context.rs +++ b/src/plan/mutator_context.rs @@ -97,18 +97,13 @@ impl MutatorContext for Mutator { } // Note that this method is slow, and we expect VM bindings that care about performance to implement allocation fastpath sequence in their bindings. - fn post_alloc( - &mut self, - refer: ObjectReference, - _bytes: usize, - allocator: AllocationSemantics, - ) { + fn post_alloc(&mut self, refer: ObjectReference, bytes: usize, allocator: AllocationSemantics) { unsafe { self.allocators .get_allocator_mut(self.config.allocator_mapping[allocator]) } .get_space() - .initialize_object_metadata(refer, true) + .initialize_object_metadata(refer, bytes, true) } fn get_tls(&self) -> VMMutatorThread { diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index 34368e01f7..d56514e4b2 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -45,7 +45,7 @@ impl SFT for CopySpace { !self.is_from_space() } - fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index f625275312..99a4249a1a 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -75,7 +75,7 @@ impl SFT for ImmixSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index 68d699b295..aaefe148b4 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -52,7 +52,7 @@ impl SFT for ImmortalSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { let old_value = VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.load_atomic::( object, None, diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index cbb7d9a76a..b2bd9f7f0b 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -50,7 +50,7 @@ impl SFT for LargeObjectSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, alloc: bool) { let old_value = VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.load_atomic::( object, None, diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index c836b32982..f0c6e3c5ed 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -57,7 +57,7 @@ impl SFT for LockFreeImmortalSpace { fn is_sane(&self) -> bool { unimplemented!() } - fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 1a9c248091..220e588988 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -36,6 +36,7 @@ const ASSERT_ALLOCATION: bool = false; pub struct MallocSpace { phantom: PhantomData, active_bytes: AtomicUsize, + pub active_pages: AtomicUsize, pub chunk_addr_min: AtomicUsize, // XXX: have to use AtomicUsize to represent an Address pub chunk_addr_max: AtomicUsize, metadata: SideMetadataContext, @@ -87,10 +88,10 @@ impl SFT for MallocSpace { has_object_alloced_by_malloc(addr) } - fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, bytes: usize, _alloc: bool) { trace!("initialize_object_metadata for object {}", object); let page_addr = conversions::page_align_down(object.to_address()); - set_page_mark(page_addr); + self.set_page_mark(page_addr, bytes); set_alloc_bit(object); } @@ -177,7 +178,7 @@ impl Space for MallocSpace { fn reserved_pages(&self) -> usize { // TODO: figure out a better way to get the total number of active pages from the metadata - let data_pages = conversions::bytes_to_pages_up(self.active_bytes.load(Ordering::SeqCst)); + let data_pages = self.active_pages.load(Ordering::SeqCst); let meta_pages = self.metadata.calculate_reserved_pages(data_pages); data_pages + meta_pages } @@ -214,6 +215,7 @@ impl MallocSpace { MallocSpace { phantom: PhantomData, active_bytes: AtomicUsize::new(0), + active_pages: AtomicUsize::new(0), chunk_addr_min: AtomicUsize::new(usize::max_value()), // XXX: have to use AtomicUsize to represent an Address chunk_addr_max: AtomicUsize::new(0), metadata: SideMetadataContext { @@ -235,6 +237,26 @@ impl MallocSpace { } } + fn set_page_mark(&self, page_addr: Address, size: usize) { + let mut page = page_addr; + while page < page_addr + size { + if !is_page_marked(page) { + set_page_mark(page); + self.active_pages.fetch_add(1, Ordering::SeqCst); + } + + page += BYTES_IN_PAGE; + } + } + + fn unset_page_mark(&self, page_addr: Address) { + if unsafe { is_page_marked_unsafe(page_addr) } { + self.active_pages.fetch_sub(1, Ordering::SeqCst); + } + + unsafe { unset_page_mark_unsafe(page_addr) }; + } + pub fn alloc(&self, tls: VMThread, size: usize, align: usize, offset: isize) -> Address { // TODO: Should refactor this and Space.acquire() if VM::VMActivePlan::global().poll(false, Some(self)) { @@ -400,6 +422,12 @@ impl MallocSpace { unsafe { unset_chunk_mark_unsafe(chunk_start) }; // Clear the SFT entry unsafe { crate::mmtk::SFT_MAP.clear(chunk_start) }; + + let mut page = chunk_start; + while page < chunk_start + BYTES_IN_CHUNK { + self.unset_page_mark(page); + page += BYTES_IN_PAGE; + } } /// Sweep an object if it is dead, and unset page marks for empty pages before this object. @@ -428,7 +456,7 @@ impl MallocSpace { let mut page = *empty_page_start; while page < current_page { - unsafe { unset_page_mark_unsafe(page) }; + self.unset_page_mark(page); page += BYTES_IN_PAGE; } } @@ -595,7 +623,9 @@ impl MallocSpace { MallocObjectSize, false, >::new(chunk_start, chunk_start + BYTES_IN_CHUNK); - for object in chunk_linear_scan { + let mut chunk_linear_scan_peek = chunk_linear_scan.peekable(); + + while let Some(object) = chunk_linear_scan_peek.next() { #[cfg(debug_assertions)] if ASSERT_ALLOCATION { let (obj_start, _, bytes) = Self::get_malloc_addr_size(object); @@ -625,6 +655,21 @@ impl MallocSpace { live_bytes += bytes; } } + + if let None = chunk_linear_scan_peek.peek() { + // This is for the edge case where we have a live object and then no other live + // objects afterwards till the end of the chunk. For example consider chunk + // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits + // for 0x0-0x100 but then not unset it for the pages after 0x100. This if block + // will take care of this edge case + if !empty_page_start.is_zero() { + let mut page = empty_page_start; + while page < chunk_start + BYTES_IN_CHUNK { + self.unset_page_mark(page); + page += BYTES_IN_PAGE; + } + } + } } // If we never updated empty_page_start, the entire chunk is empty. diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index a6f3da1dd7..8e7c12d207 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -56,7 +56,7 @@ impl SFT for MarkCompactSpace { true } - fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { crate::util::alloc_bit::set_alloc_bit(object); } diff --git a/src/policy/sft.rs b/src/policy/sft.rs index 7036d34f16..8b42115ded 100644 --- a/src/policy/sft.rs +++ b/src/policy/sft.rs @@ -82,7 +82,7 @@ pub trait SFT { } /// Initialize object metadata (in the header, or in the side metadata). - fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool); + fn initialize_object_metadata(&self, object: ObjectReference, bytes: usize, alloc: bool); /// Trace objects through SFT. This along with [`SFTProcessEdges`](mmtk/scheduler/gc_work/SFTProcessEdges) /// provides an easy way for most plans to trace objects without the need to implement any plan-specific @@ -151,7 +151,7 @@ impl SFT for EmptySpaceSFT { false } - fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { panic!( "Called initialize_object_metadata() on {:x}, which maps to an empty space", object diff --git a/src/util/alloc/allocator.rs b/src/util/alloc/allocator.rs index 7ee0ab98ac..a87f42fe10 100644 --- a/src/util/alloc/allocator.rs +++ b/src/util/alloc/allocator.rs @@ -244,12 +244,13 @@ pub trait Allocator: Downcast { // This is the allocation hook for the analysis trait. If you want to call // an analysis counter specific allocation hook, then here is the place to do so #[cfg(feature = "analysis")] - if _allocation_bytes > *plan.options.analysis_factor { + if _allocation_bytes > *plan.options.stress_factor { trace!( "Analysis: allocation_bytes = {} more than analysis_factor = {}", _allocation_bytes, *plan.options.analysis_factor ); + plan.analysis_manager.alloc_hook(size, align, offset); } } diff --git a/src/util/analysis/mod.rs b/src/util/analysis/mod.rs index 4814afe880..2cd463be63 100644 --- a/src/util/analysis/mod.rs +++ b/src/util/analysis/mod.rs @@ -7,10 +7,12 @@ use std::sync::{Arc, Mutex}; pub mod gc_count; pub mod obj_num; pub mod obj_size; +pub mod reserved_pages; -use self::gc_count::GcCounter; -use self::obj_num::ObjectCounter; -use self::obj_size::PerSizeClassObjectCounter; +// use self::gc_count::GcCounter; +// use self::obj_num::ObjectCounter; +// use self::obj_size::PerSizeClassObjectCounter; +use self::reserved_pages::ReservedPagesCounter; /// /// This trait exposes hooks for developers to implement their own analysis routines. @@ -57,14 +59,14 @@ impl AnalysisManager { // Initializing all routines. If you want to add a new routine, here is the place // to do so fn initialize_routines(&mut self, stats: &Stats) { - let ctr = stats.new_event_counter("obj.num", true, true); - let gc_ctr = stats.new_event_counter("gc.num", true, true); - let obj_num = Arc::new(Mutex::new(ObjectCounter::new(true, ctr))); - let gc_count = Arc::new(Mutex::new(GcCounter::new(true, gc_ctr))); - let obj_size = Arc::new(Mutex::new(PerSizeClassObjectCounter::new(true))); - self.add_analysis_routine(obj_num); - self.add_analysis_routine(gc_count); - self.add_analysis_routine(obj_size); + let rss_max_ctr = stats.new_single_counter("reserved_pages.max", true, true); + let rss_avg_ctr = stats.new_single_counter("reserved_pages.avg", true, true); + let rss = Arc::new(Mutex::new(ReservedPagesCounter::new( + true, + rss_max_ctr, + rss_avg_ctr, + ))); + self.add_analysis_routine(rss); } pub fn add_analysis_routine(&mut self, routine: Arc + Send>>) { diff --git a/src/util/analysis/reserved_pages.rs b/src/util/analysis/reserved_pages.rs new file mode 100644 index 0000000000..50eb3547ce --- /dev/null +++ b/src/util/analysis/reserved_pages.rs @@ -0,0 +1,74 @@ +use crate::plan::marksweep::MarkSweep; +use crate::util::analysis::RtAnalysis; +use crate::util::statistics::counter::SingleCounter; +use crate::vm::{ActivePlan, VMBinding}; +use std::convert::TryInto; +use std::sync::{atomic::Ordering, Arc, Mutex}; + +pub struct ReservedPagesCounter { + running: bool, + reserved_pages_max: Mutex, + reserved_pages_max_ctr: Arc>, + reserved_pages_all_sum: Mutex, + reserved_pages_total: Mutex, + reserved_pages_avg_ctr: Arc>, +} + +impl ReservedPagesCounter { + pub fn new( + running: bool, + reserved_pages_max_ctr: Arc>, + reserved_pages_avg_ctr: Arc>, + ) -> Self { + Self { + running, + reserved_pages_max: Mutex::new(0), + reserved_pages_max_ctr, + reserved_pages_all_sum: Mutex::new(0), + reserved_pages_total: Mutex::new(0), + reserved_pages_avg_ctr, + } + } +} + +impl RtAnalysis for ReservedPagesCounter { + fn alloc_hook(&mut self, _size: usize, _align: usize, _offset: isize) { + if !self.running { + return; + } + + let plan = VM::VMActivePlan::global() + .downcast_ref::>() + .unwrap(); + let rss = plan.ms_space().active_pages.load(Ordering::SeqCst); + + { + let mut rss_max = self.reserved_pages_max.lock().unwrap(); + if rss > *rss_max { + *rss_max = rss; + self.reserved_pages_max_ctr + .lock() + .unwrap() + .set_count(rss.try_into().unwrap()); + } + } + + { + let mut rss_sum = self.reserved_pages_all_sum.lock().unwrap(); + let mut rss_total = self.reserved_pages_total.lock().unwrap(); + + *rss_sum += rss; + *rss_total += 1; + + let rss_avg = *rss_sum / *rss_total; + self.reserved_pages_avg_ctr + .lock() + .unwrap() + .set_count(rss_avg.try_into().unwrap()); + } + } + + fn set_running(&mut self, running: bool) { + self.running = running; + } +} diff --git a/src/util/statistics/counter/mod.rs b/src/util/statistics/counter/mod.rs index 2fdf53026f..358cd9938e 100644 --- a/src/util/statistics/counter/mod.rs +++ b/src/util/statistics/counter/mod.rs @@ -4,12 +4,14 @@ mod event_counter; mod long_counter; #[cfg(feature = "perf_counter")] mod perf_event; +mod single_counter; mod size_counter; pub use self::event_counter::EventCounter; pub use self::long_counter::{LongCounter, Timer}; #[cfg(feature = "perf_counter")] pub use self::perf_event::PerfEventDiffable; +pub use self::single_counter::SingleCounter; pub use self::size_counter::SizeCounter; /// An abstraction over how a specific Diffable value is counted diff --git a/src/util/statistics/counter/single_counter.rs b/src/util/statistics/counter/single_counter.rs new file mode 100644 index 0000000000..2397c33e5e --- /dev/null +++ b/src/util/statistics/counter/single_counter.rs @@ -0,0 +1,104 @@ +use super::*; +use crate::util::statistics::stats::SharedStats; +use std::sync::Arc; + +pub struct SingleCounter { + name: String, + pub implicitly_start: bool, + merge_phases: bool, + current_count: u64, + running: bool, + stats: Arc, +} + +impl SingleCounter { + pub fn new( + name: String, + stats: Arc, + implicitly_start: bool, + merge_phases: bool, + ) -> Self { + SingleCounter { + name, + implicitly_start, + merge_phases, + current_count: 0, + running: false, + stats, + } + } + + pub fn set_count(&mut self, value: u64) { + if self.running { + self.current_count = value; + } + } + + pub fn print_current(&self) { + self.print_value(self.current_count); + } + + fn print_value(&self, value: u64) { + print!("{}", value); + } +} + +impl Counter for SingleCounter { + fn start(&mut self) { + if !self.stats.get_gathering_stats() { + return; + } + debug_assert!(!self.running); + self.running = true; + } + + fn stop(&mut self) { + if !self.stats.get_gathering_stats() { + return; + } + debug_assert!(self.running); + self.running = false; + } + + /** + * The phase has changed (from GC to mutator or mutator to GC). + * Take action with respect to the last phase if necessary. + */ + fn phase_change(&mut self, _old_phase: usize) {} + + fn print_count(&self, _phase: usize) { + self.print_value(self.current_count); + } + + fn get_total(&self, _other: Option) -> u64 { + self.current_count + } + + fn print_total(&self, _mutator: Option) { + self.print_value(self.current_count); + } + + fn print_min(&self, _mutator: bool) { + self.print_value(self.current_count); + } + + fn print_max(&self, _mutator: bool) { + self.print_value(self.current_count); + } + + fn print_last(&self) { + self.print_value(self.current_count); + } + + fn merge_phases(&self) -> bool { + self.merge_phases + } + + fn implicitly_start(&self) -> bool { + self.implicitly_start + } + + fn name(&self) -> &String { + &self.name + } +} diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 6cf21985ff..1097bdc681 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -104,6 +104,23 @@ impl Stats { } } + pub fn new_single_counter( + &self, + name: &str, + implicit_start: bool, + merge_phases: bool, + ) -> Arc> { + let mut guard = self.counters.lock().unwrap(); + let counter = Arc::new(Mutex::new(SingleCounter::new( + name.to_string(), + self.shared.clone(), + implicit_start, + merge_phases, + ))); + guard.push(counter.clone()); + counter + } + pub fn new_event_counter( &self, name: &str, From 1a6e19e020aca9b6a7439ecc0901481c4364bd47 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 12:18:55 +1100 Subject: [PATCH 02/15] Allow sweep each object for side mark bit --- src/policy/mallocspace/global.rs | 185 +++++++++++++++++-------------- 1 file changed, 101 insertions(+), 84 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 220e588988..61b67e5933 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -36,7 +36,7 @@ const ASSERT_ALLOCATION: bool = false; pub struct MallocSpace { phantom: PhantomData, active_bytes: AtomicUsize, - pub active_pages: AtomicUsize, + active_pages: AtomicUsize, pub chunk_addr_min: AtomicUsize, // XXX: have to use AtomicUsize to represent an Address pub chunk_addr_max: AtomicUsize, metadata: SideMetadataContext, @@ -502,106 +502,119 @@ impl MallocSpace { /// non-atomic accesses should not have race conditions associated with them) /// as well as calls libc functions (`malloc_usable_size()`, `free()`) fn sweep_chunk_mark_on_side(&self, chunk_start: Address, mark_bit_spec: SideMetadataSpec) { - #[cfg(debug_assertions)] - let mut live_bytes = 0; - - debug!("Check active chunk {:?}", chunk_start); - let mut address = chunk_start; - let chunk_end = chunk_start + BYTES_IN_CHUNK; - - debug_assert!( - crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC.log_bytes_in_region - == mark_bit_spec.log_bytes_in_region, - "Alloc-bit and mark-bit metadata have different minimum object sizes!" - ); + // We can do xor on bulk for mark bits and valid object bits. If the result is zero, that means + // the objects in it are all alive (both valid object bit and mark bit is set), and we do not + // need to do anything for the region. Otherwise, we will sweep each single object in the region. + // Note: Enabling this would result in inaccurate page accounting. When the result is zero, + // we do not know whether the last live object in the region could span pages. But we are not checking + // for that specific case, so the page accounting could be inaccurate. If you care about + // page accounting, probably disable this, and we will sweep object one by one. + const BULK_XOR_ON_MARK_BITS: bool = false; + + if BULK_XOR_ON_MARK_BITS { + #[cfg(debug_assertions)] + let mut live_bytes = 0; - // For bulk xor'ing 128-bit vectors on architectures with vector instructions - // Each bit represents an object of LOG_MIN_OBJ_SIZE size - let bulk_load_size: usize = - 128 * (1 << crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC.log_bytes_in_region); + debug!("Check active chunk {:?}", chunk_start); + let mut address = chunk_start; + let chunk_end = chunk_start + BYTES_IN_CHUNK; - // The start of a possibly empty page. This will be updated during the sweeping, and always points to the next page of last live objects. - let mut empty_page_start = Address::ZERO; + debug_assert!( + crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC.log_bytes_in_region + == mark_bit_spec.log_bytes_in_region, + "Alloc-bit and mark-bit metadata have different minimum object sizes!" + ); - // Scan the chunk by every 'bulk_load_size' region. - while address < chunk_end { - let alloc_128: u128 = - unsafe { load128(&crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC, address) }; - let mark_128: u128 = unsafe { load128(&mark_bit_spec, address) }; + // For bulk xor'ing 128-bit vectors on architectures with vector instructions + // Each bit represents an object of LOG_MIN_OBJ_SIZE size + let bulk_load_size: usize = + 128 * (1 << crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC.log_bytes_in_region); + + // The start of a possibly empty page. This will be updated during the sweeping, and always points to the next page of last live objects. + let mut empty_page_start = Address::ZERO; + + // Scan the chunk by every 'bulk_load_size' region. + while address < chunk_end { + let alloc_128: u128 = + unsafe { load128(&crate::util::alloc_bit::ALLOC_SIDE_METADATA_SPEC, address) }; + let mark_128: u128 = unsafe { load128(&mark_bit_spec, address) }; + + // Check if there are dead objects in the bulk loaded region + if alloc_128 ^ mark_128 != 0 { + let end = address + bulk_load_size; + + // We will do non atomic load on the alloc bit, as this is the only thread that access the alloc bit for a chunk. + // Linear scan through the bulk load region. + let bulk_load_scan = crate::util::linear_scan::ObjectIterator::< + VM, + MallocObjectSize, + false, + >::new(address, end); + for object in bulk_load_scan { + self.sweep_object(object, &mut empty_page_start); + } + } else { + // TODO we aren't actually accounting for the case where an object is alive and spans + // a page boundary as we don't know what the object sizes are/what is alive in the bulk region + if alloc_128 != 0 { + empty_page_start = address + bulk_load_size; + } + } - // Check if there are dead objects in the bulk loaded region - if alloc_128 ^ mark_128 != 0 { - let end = address + bulk_load_size; + // We have processed this bulk load memory. Step to the next. + address += bulk_load_size; + debug_assert!(address.is_aligned_to(bulk_load_size)); + } - // We will do non atomic load on the alloc bit, as this is the only thread that access the alloc bit for a chunk. - // Linear scan through the bulk load region. - let bulk_load_scan = crate::util::linear_scan::ObjectIterator::< + // Linear scan through the chunk, and add up all the live object sizes. + // We have to do this as a separate pass, as in the above pass, we did not go through all the live objects + #[cfg(debug_assertions)] + { + let chunk_linear_scan = crate::util::linear_scan::ObjectIterator::< VM, MallocObjectSize, false, - >::new(address, end); - for object in bulk_load_scan { - self.sweep_object(object, &mut empty_page_start); - } - } else { - // TODO we aren't actually accounting for the case where an object is alive and spans - // a page boundary as we don't know what the object sizes are/what is alive in the bulk region - if alloc_128 != 0 { - empty_page_start = address + bulk_load_size; - } - } - - // We have processed this bulk load memory. Step to the next. - address += bulk_load_size; - debug_assert!(address.is_aligned_to(bulk_load_size)); - } - - // Linear scan through the chunk, and add up all the live object sizes. - // We have to do this as a separate pass, as in the above pass, we did not go through all the live objects - #[cfg(debug_assertions)] - { - let chunk_linear_scan = crate::util::linear_scan::ObjectIterator::< - VM, - MallocObjectSize, - false, - >::new(chunk_start, chunk_end); - for object in chunk_linear_scan { - let (obj_start, _, bytes) = Self::get_malloc_addr_size(object); + >::new(chunk_start, chunk_end); + for object in chunk_linear_scan { + let (obj_start, _, bytes) = Self::get_malloc_addr_size(object); + + if ASSERT_ALLOCATION { + debug_assert!( + self.active_mem.lock().unwrap().contains_key(&obj_start), + "Address {} with alloc bit is not in active_mem", + obj_start + ); + debug_assert_eq!( + self.active_mem.lock().unwrap().get(&obj_start), + Some(&bytes), + "Address {} size in active_mem does not match the size from malloc_usable_size", + obj_start + ); + } - if ASSERT_ALLOCATION { debug_assert!( - self.active_mem.lock().unwrap().contains_key(&obj_start), - "Address {} with alloc bit is not in active_mem", - obj_start - ); - debug_assert_eq!( - self.active_mem.lock().unwrap().get(&obj_start), - Some(&bytes), - "Address {} size in active_mem does not match the size from malloc_usable_size", - obj_start + unsafe { is_marked_unsafe::(object) }, + "Dead object = {} found after sweep", + object ); + + live_bytes += bytes; } + } - debug_assert!( - unsafe { is_marked_unsafe::(object) }, - "Dead object = {} found after sweep", - object - ); + // Clear all the mark bits + mark_bit_spec.bzero_metadata(chunk_start, BYTES_IN_CHUNK); - live_bytes += bytes; + // If we never updated empty_page_start, the entire chunk is empty. + if empty_page_start.is_zero() { + self.clean_up_empty_chunk(chunk_start); } - } - // Clear all the mark bits - mark_bit_spec.bzero_metadata(chunk_start, BYTES_IN_CHUNK); - - // If we never updated empty_page_start, the entire chunk is empty. - if empty_page_start.is_zero() { - self.clean_up_empty_chunk(chunk_start); + #[cfg(debug_assertions)] + self.debug_sweep_chunk_done(live_bytes); + } else { + self.sweep_each_object_in_chunk(chunk_start); } - - #[cfg(debug_assertions)] - self.debug_sweep_chunk_done(live_bytes); } /// This sweep function is called when the mark bit sits in the object header @@ -610,6 +623,10 @@ impl MallocSpace { /// non-atomic accesses should not have race conditions associated with them) /// as well as calls libc functions (`malloc_usable_size()`, `free()`) fn sweep_chunk_mark_in_header(&self, chunk_start: Address) { + self.sweep_each_object_in_chunk(chunk_start) + } + + fn sweep_each_object_in_chunk(&self, chunk_start: Address) { #[cfg(debug_assertions)] let mut live_bytes = 0; From b867bc2342edb321fc182e08ce90178ae520e41e Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 13:15:44 +1100 Subject: [PATCH 03/15] Measure mimalloc 64K pages. --- src/policy/mallocspace/global.rs | 17 +++++++++-------- src/policy/mallocspace/metadata.rs | 2 +- src/util/malloc/library.rs | 13 +++++++++++++ src/util/metadata/side_metadata/spec_defs.rs | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 61b67e5933..ff0efe1353 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -4,7 +4,6 @@ use crate::plan::VectorObjectQueue; use crate::policy::sft::GCWorkerMutRef; use crate::policy::sft::SFT; use crate::policy::space::CommonSpace; -use crate::util::constants::BYTES_IN_PAGE; use crate::util::heap::PageResource; use crate::util::malloc::malloc_ms_util::*; use crate::util::metadata::side_metadata::{ @@ -23,6 +22,7 @@ use std::marker::PhantomData; use std::sync::atomic::AtomicU32; use std::sync::atomic::{AtomicUsize, Ordering}; // only used for debugging +use crate::util::malloc::library::{BYTES_IN_MALLOC_PAGE, LOG_BYTES_IN_MALLOC_PAGE}; #[cfg(debug_assertions)] use std::collections::HashMap; #[cfg(debug_assertions)] @@ -178,7 +178,8 @@ impl Space for MallocSpace { fn reserved_pages(&self) -> usize { // TODO: figure out a better way to get the total number of active pages from the metadata - let data_pages = self.active_pages.load(Ordering::SeqCst); + let data_pages = self.active_pages.load(Ordering::SeqCst) + << (LOG_BYTES_IN_MALLOC_PAGE - crate::util::constants::LOG_BYTES_IN_PAGE); let meta_pages = self.metadata.calculate_reserved_pages(data_pages); data_pages + meta_pages } @@ -245,7 +246,7 @@ impl MallocSpace { self.active_pages.fetch_add(1, Ordering::SeqCst); } - page += BYTES_IN_PAGE; + page += BYTES_IN_MALLOC_PAGE; } } @@ -426,7 +427,7 @@ impl MallocSpace { let mut page = chunk_start; while page < chunk_start + BYTES_IN_CHUNK { self.unset_page_mark(page); - page += BYTES_IN_PAGE; + page += BYTES_IN_MALLOC_PAGE; } } @@ -452,17 +453,17 @@ impl MallocSpace { // Unset marks for free pages and update last_object_end if !empty_page_start.is_zero() { // unset marks for pages since last object - let current_page = object.to_address().align_down(BYTES_IN_PAGE); + let current_page = object.to_address().align_down(BYTES_IN_MALLOC_PAGE); let mut page = *empty_page_start; while page < current_page { self.unset_page_mark(page); - page += BYTES_IN_PAGE; + page += BYTES_IN_MALLOC_PAGE; } } // Update last_object_end - *empty_page_start = (obj_start + bytes).align_up(BYTES_IN_PAGE); + *empty_page_start = (obj_start + bytes).align_up(BYTES_IN_MALLOC_PAGE); false } @@ -683,7 +684,7 @@ impl MallocSpace { let mut page = empty_page_start; while page < chunk_start + BYTES_IN_CHUNK { self.unset_page_mark(page); - page += BYTES_IN_PAGE; + page += BYTES_IN_MALLOC_PAGE; } } } diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index 07c4b8e439..671fc8f984 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -48,7 +48,7 @@ pub(crate) const ACTIVE_CHUNK_METADATA_SPEC: SideMetadataSpec = // XXX: This metadata spec is currently unused as we need to add a performant way to calculate // how many pages are active in this metadata spec. Explore SIMD vectorization with 8-bit integers pub(crate) const ACTIVE_PAGE_METADATA_SPEC: SideMetadataSpec = - crate::util::metadata::side_metadata::spec_defs::MS_ACTIVE_PAGE; + crate::util::metadata::side_metadata::spec_defs::MALLOC_MS_ACTIVE_PAGE; pub(crate) const OFFSET_MALLOC_METADATA_SPEC: SideMetadataSpec = crate::util::metadata::side_metadata::spec_defs::MS_OFFSET_MALLOC; diff --git a/src/util/malloc/library.rs b/src/util/malloc/library.rs index 84e5bd3aef..77a3c0da62 100644 --- a/src/util/malloc/library.rs +++ b/src/util/malloc/library.rs @@ -13,12 +13,19 @@ pub use self::libc_malloc::*; #[cfg(feature = "malloc_mimalloc")] pub use self::mimalloc::*; +/// When we count page usage of library malloc, we assume they allocate in pages. For some malloc implementations, +/// they may use a larger page (e.g. mimalloc's 64K page). For libraries that we are not sure, we assume they use +/// normal 4k pages. +pub const BYTES_IN_MALLOC_PAGE: usize = 1 << LOG_BYTES_IN_MALLOC_PAGE; + // Different malloc libraries // TODO: We should conditinally include some methods in the module, such as posix extension and GNU extension. #[cfg(feature = "malloc_jemalloc")] mod jemalloc { + // Normal 4K page + pub const LOG_BYTES_IN_MALLOC_PAGE: u8 = crate::util::constants::LOG_BYTES_IN_PAGE; // ANSI C pub use jemalloc_sys::{calloc, free, malloc, realloc}; // Posix @@ -29,6 +36,8 @@ mod jemalloc { #[cfg(feature = "malloc_mimalloc")] mod mimalloc { + // MiMalloc 64K Page + pub const LOG_BYTES_IN_MALLOC_PAGE: u8 = 16; // ANSI C pub use mimalloc_sys::{ mi_calloc as calloc, mi_free as free, mi_malloc as malloc, mi_realloc as realloc, @@ -41,6 +50,8 @@ mod mimalloc { #[cfg(feature = "malloc_hoard")] mod hoard { + // Normal 4K page + pub const LOG_BYTES_IN_MALLOC_PAGE: u8 = crate::util::constants::LOG_BYTES_IN_PAGE; // ANSI C pub use hoard_sys::{calloc, free, malloc, realloc}; // Posix @@ -56,6 +67,8 @@ mod hoard { feature = "malloc_hoard", )))] mod libc_malloc { + // Normal 4K page + pub const LOG_BYTES_IN_MALLOC_PAGE: u8 = crate::util::constants::LOG_BYTES_IN_PAGE; // ANSI C pub use libc::{calloc, free, malloc, realloc}; // Posix diff --git a/src/util/metadata/side_metadata/spec_defs.rs b/src/util/metadata/side_metadata/spec_defs.rs index 52955fe4db..1d28c3bdde 100644 --- a/src/util/metadata/side_metadata/spec_defs.rs +++ b/src/util/metadata/side_metadata/spec_defs.rs @@ -66,7 +66,7 @@ define_side_metadata_specs!( define_side_metadata_specs!( last_spec_as LAST_LOCAL_SIDE_METADATA_SPEC, // Mark pages by (malloc) marksweep - MS_ACTIVE_PAGE = (global: false, log_num_of_bits: 3, log_bytes_in_region: LOG_BYTES_IN_PAGE as usize), + MALLOC_MS_ACTIVE_PAGE = (global: false, log_num_of_bits: 3, log_bytes_in_region: crate::util::malloc::library::LOG_BYTES_IN_MALLOC_PAGE as usize), // Record objects allocated with some offset MS_OFFSET_MALLOC = (global: false, log_num_of_bits: 0, log_bytes_in_region: LOG_MIN_OBJECT_SIZE as usize), // Mark lines by immix From 76fa950ff5092a74940ca3542509d3ea0c765854 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 13:23:45 +1100 Subject: [PATCH 04/15] Remove unneeded changes. --- src/policy/mallocspace/global.rs | 8 +- src/util/analysis/mod.rs | 24 ++-- src/util/analysis/reserved_pages.rs | 74 ------------- src/util/statistics/counter/mod.rs | 2 - src/util/statistics/counter/single_counter.rs | 104 ------------------ src/util/statistics/stats.rs | 17 --- 6 files changed, 14 insertions(+), 215 deletions(-) delete mode 100644 src/util/analysis/reserved_pages.rs delete mode 100644 src/util/statistics/counter/single_counter.rs diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index ff0efe1353..985d384141 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -506,10 +506,8 @@ impl MallocSpace { // We can do xor on bulk for mark bits and valid object bits. If the result is zero, that means // the objects in it are all alive (both valid object bit and mark bit is set), and we do not // need to do anything for the region. Otherwise, we will sweep each single object in the region. - // Note: Enabling this would result in inaccurate page accounting. When the result is zero, - // we do not know whether the last live object in the region could span pages. But we are not checking - // for that specific case, so the page accounting could be inaccurate. If you care about - // page accounting, probably disable this, and we will sweep object one by one. + // Note: Enabling this would result in inaccurate page accounting. We disable this by default, and + // we will sweep object one by one. const BULK_XOR_ON_MARK_BITS: bool = false; if BULK_XOR_ON_MARK_BITS { @@ -674,7 +672,7 @@ impl MallocSpace { } } - if let None = chunk_linear_scan_peek.peek() { + if chunk_linear_scan_peek.peek().is_none() { // This is for the edge case where we have a live object and then no other live // objects afterwards till the end of the chunk. For example consider chunk // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits diff --git a/src/util/analysis/mod.rs b/src/util/analysis/mod.rs index 2cd463be63..4814afe880 100644 --- a/src/util/analysis/mod.rs +++ b/src/util/analysis/mod.rs @@ -7,12 +7,10 @@ use std::sync::{Arc, Mutex}; pub mod gc_count; pub mod obj_num; pub mod obj_size; -pub mod reserved_pages; -// use self::gc_count::GcCounter; -// use self::obj_num::ObjectCounter; -// use self::obj_size::PerSizeClassObjectCounter; -use self::reserved_pages::ReservedPagesCounter; +use self::gc_count::GcCounter; +use self::obj_num::ObjectCounter; +use self::obj_size::PerSizeClassObjectCounter; /// /// This trait exposes hooks for developers to implement their own analysis routines. @@ -59,14 +57,14 @@ impl AnalysisManager { // Initializing all routines. If you want to add a new routine, here is the place // to do so fn initialize_routines(&mut self, stats: &Stats) { - let rss_max_ctr = stats.new_single_counter("reserved_pages.max", true, true); - let rss_avg_ctr = stats.new_single_counter("reserved_pages.avg", true, true); - let rss = Arc::new(Mutex::new(ReservedPagesCounter::new( - true, - rss_max_ctr, - rss_avg_ctr, - ))); - self.add_analysis_routine(rss); + let ctr = stats.new_event_counter("obj.num", true, true); + let gc_ctr = stats.new_event_counter("gc.num", true, true); + let obj_num = Arc::new(Mutex::new(ObjectCounter::new(true, ctr))); + let gc_count = Arc::new(Mutex::new(GcCounter::new(true, gc_ctr))); + let obj_size = Arc::new(Mutex::new(PerSizeClassObjectCounter::new(true))); + self.add_analysis_routine(obj_num); + self.add_analysis_routine(gc_count); + self.add_analysis_routine(obj_size); } pub fn add_analysis_routine(&mut self, routine: Arc + Send>>) { diff --git a/src/util/analysis/reserved_pages.rs b/src/util/analysis/reserved_pages.rs deleted file mode 100644 index 50eb3547ce..0000000000 --- a/src/util/analysis/reserved_pages.rs +++ /dev/null @@ -1,74 +0,0 @@ -use crate::plan::marksweep::MarkSweep; -use crate::util::analysis::RtAnalysis; -use crate::util::statistics::counter::SingleCounter; -use crate::vm::{ActivePlan, VMBinding}; -use std::convert::TryInto; -use std::sync::{atomic::Ordering, Arc, Mutex}; - -pub struct ReservedPagesCounter { - running: bool, - reserved_pages_max: Mutex, - reserved_pages_max_ctr: Arc>, - reserved_pages_all_sum: Mutex, - reserved_pages_total: Mutex, - reserved_pages_avg_ctr: Arc>, -} - -impl ReservedPagesCounter { - pub fn new( - running: bool, - reserved_pages_max_ctr: Arc>, - reserved_pages_avg_ctr: Arc>, - ) -> Self { - Self { - running, - reserved_pages_max: Mutex::new(0), - reserved_pages_max_ctr, - reserved_pages_all_sum: Mutex::new(0), - reserved_pages_total: Mutex::new(0), - reserved_pages_avg_ctr, - } - } -} - -impl RtAnalysis for ReservedPagesCounter { - fn alloc_hook(&mut self, _size: usize, _align: usize, _offset: isize) { - if !self.running { - return; - } - - let plan = VM::VMActivePlan::global() - .downcast_ref::>() - .unwrap(); - let rss = plan.ms_space().active_pages.load(Ordering::SeqCst); - - { - let mut rss_max = self.reserved_pages_max.lock().unwrap(); - if rss > *rss_max { - *rss_max = rss; - self.reserved_pages_max_ctr - .lock() - .unwrap() - .set_count(rss.try_into().unwrap()); - } - } - - { - let mut rss_sum = self.reserved_pages_all_sum.lock().unwrap(); - let mut rss_total = self.reserved_pages_total.lock().unwrap(); - - *rss_sum += rss; - *rss_total += 1; - - let rss_avg = *rss_sum / *rss_total; - self.reserved_pages_avg_ctr - .lock() - .unwrap() - .set_count(rss_avg.try_into().unwrap()); - } - } - - fn set_running(&mut self, running: bool) { - self.running = running; - } -} diff --git a/src/util/statistics/counter/mod.rs b/src/util/statistics/counter/mod.rs index 358cd9938e..2fdf53026f 100644 --- a/src/util/statistics/counter/mod.rs +++ b/src/util/statistics/counter/mod.rs @@ -4,14 +4,12 @@ mod event_counter; mod long_counter; #[cfg(feature = "perf_counter")] mod perf_event; -mod single_counter; mod size_counter; pub use self::event_counter::EventCounter; pub use self::long_counter::{LongCounter, Timer}; #[cfg(feature = "perf_counter")] pub use self::perf_event::PerfEventDiffable; -pub use self::single_counter::SingleCounter; pub use self::size_counter::SizeCounter; /// An abstraction over how a specific Diffable value is counted diff --git a/src/util/statistics/counter/single_counter.rs b/src/util/statistics/counter/single_counter.rs deleted file mode 100644 index 2397c33e5e..0000000000 --- a/src/util/statistics/counter/single_counter.rs +++ /dev/null @@ -1,104 +0,0 @@ -use super::*; -use crate::util::statistics::stats::SharedStats; -use std::sync::Arc; - -pub struct SingleCounter { - name: String, - pub implicitly_start: bool, - merge_phases: bool, - current_count: u64, - running: bool, - stats: Arc, -} - -impl SingleCounter { - pub fn new( - name: String, - stats: Arc, - implicitly_start: bool, - merge_phases: bool, - ) -> Self { - SingleCounter { - name, - implicitly_start, - merge_phases, - current_count: 0, - running: false, - stats, - } - } - - pub fn set_count(&mut self, value: u64) { - if self.running { - self.current_count = value; - } - } - - pub fn print_current(&self) { - self.print_value(self.current_count); - } - - fn print_value(&self, value: u64) { - print!("{}", value); - } -} - -impl Counter for SingleCounter { - fn start(&mut self) { - if !self.stats.get_gathering_stats() { - return; - } - debug_assert!(!self.running); - self.running = true; - } - - fn stop(&mut self) { - if !self.stats.get_gathering_stats() { - return; - } - debug_assert!(self.running); - self.running = false; - } - - /** - * The phase has changed (from GC to mutator or mutator to GC). - * Take action with respect to the last phase if necessary. - */ - fn phase_change(&mut self, _old_phase: usize) {} - - fn print_count(&self, _phase: usize) { - self.print_value(self.current_count); - } - - fn get_total(&self, _other: Option) -> u64 { - self.current_count - } - - fn print_total(&self, _mutator: Option) { - self.print_value(self.current_count); - } - - fn print_min(&self, _mutator: bool) { - self.print_value(self.current_count); - } - - fn print_max(&self, _mutator: bool) { - self.print_value(self.current_count); - } - - fn print_last(&self) { - self.print_value(self.current_count); - } - - fn merge_phases(&self) -> bool { - self.merge_phases - } - - fn implicitly_start(&self) -> bool { - self.implicitly_start - } - - fn name(&self) -> &String { - &self.name - } -} diff --git a/src/util/statistics/stats.rs b/src/util/statistics/stats.rs index 1097bdc681..6cf21985ff 100644 --- a/src/util/statistics/stats.rs +++ b/src/util/statistics/stats.rs @@ -104,23 +104,6 @@ impl Stats { } } - pub fn new_single_counter( - &self, - name: &str, - implicit_start: bool, - merge_phases: bool, - ) -> Arc> { - let mut guard = self.counters.lock().unwrap(); - let counter = Arc::new(Mutex::new(SingleCounter::new( - name.to_string(), - self.shared.clone(), - implicit_start, - merge_phases, - ))); - guard.push(counter.clone()); - counter - } - pub fn new_event_counter( &self, name: &str, From 65935dadacf4ec4f43ec19af40fdcc85c7d945c2 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 13:27:27 +1100 Subject: [PATCH 05/15] Minor clean up --- src/plan/mod.rs | 2 +- src/policy/mallocspace/global.rs | 4 +++- src/util/alloc/allocator.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/plan/mod.rs b/src/plan/mod.rs index acf74653c3..5888436f50 100644 --- a/src/plan/mod.rs +++ b/src/plan/mod.rs @@ -41,7 +41,7 @@ pub use tracing::{ObjectQueue, ObjectsClosure, VectorObjectQueue, VectorQueue}; mod generational; mod immix; mod markcompact; -pub mod marksweep; +mod marksweep; mod nogc; mod pageprotect; mod semispace; diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 985d384141..73adac2260 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -177,9 +177,11 @@ impl Space for MallocSpace { } fn reserved_pages(&self) -> usize { + use crate::util::constants::LOG_BYTES_IN_PAGE; + debug_assert!(LOG_BYTES_IN_MALLOC_PAGE >= LOG_BYTES_IN_PAGE); // TODO: figure out a better way to get the total number of active pages from the metadata let data_pages = self.active_pages.load(Ordering::SeqCst) - << (LOG_BYTES_IN_MALLOC_PAGE - crate::util::constants::LOG_BYTES_IN_PAGE); + << (LOG_BYTES_IN_MALLOC_PAGE - LOG_BYTES_IN_PAGE); let meta_pages = self.metadata.calculate_reserved_pages(data_pages); data_pages + meta_pages } diff --git a/src/util/alloc/allocator.rs b/src/util/alloc/allocator.rs index a87f42fe10..7f35496aa7 100644 --- a/src/util/alloc/allocator.rs +++ b/src/util/alloc/allocator.rs @@ -244,7 +244,7 @@ pub trait Allocator: Downcast { // This is the allocation hook for the analysis trait. If you want to call // an analysis counter specific allocation hook, then here is the place to do so #[cfg(feature = "analysis")] - if _allocation_bytes > *plan.options.stress_factor { + if _allocation_bytes > *plan.options.analysis_factor { trace!( "Analysis: allocation_bytes = {} more than analysis_factor = {}", _allocation_bytes, From 203becbeb4e999a18e773026e0a2feedc82c681f Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 15:07:08 +1100 Subject: [PATCH 06/15] Fix style check --- src/policy/mallocspace/global.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 73adac2260..44e8a5f330 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -176,6 +176,7 @@ impl Space for MallocSpace { "MallocSpace" } + #[allow(clippy::assertions_on_constants)] fn reserved_pages(&self) -> usize { use crate::util::constants::LOG_BYTES_IN_PAGE; debug_assert!(LOG_BYTES_IN_MALLOC_PAGE >= LOG_BYTES_IN_PAGE); From 0f73a12853cd252bcec1f80e37566d42353381cb Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 27 Oct 2022 15:13:16 +1100 Subject: [PATCH 07/15] Fix API check --- .github/workflows/api-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/api-check.yml b/.github/workflows/api-check.yml index 2af9f55dae..e4666d0914 100644 --- a/.github/workflows/api-check.yml +++ b/.github/workflows/api-check.yml @@ -28,4 +28,4 @@ jobs: - name: Install cargo-public-api run: cargo install cargo-public-api - name: API Diff - run: cargo public-api --diff-git-checkouts ${GITHUB_BASE_REF} ${{ github.event.pull_request.head.sha }} --deny=all + run: cargo public-api --diff-git-checkouts origin/${GITHUB_BASE_REF} ${{ github.event.pull_request.head.sha }} --deny=all From 1bd15ad7b23c0a4035bb887808a07bc91271a705 Mon Sep 17 00:00:00 2001 From: Kunal Sareen Date: Thu, 27 Oct 2022 04:30:49 +0000 Subject: [PATCH 08/15] Fix concurrency bug in accessing active page metadata Use a compare_exchange for accessing active page metadata instead of load followed by a store --- src/plan/mutator_context.rs | 4 ++-- src/policy/copyspace.rs | 2 +- src/policy/immix/immixspace.rs | 2 +- src/policy/immortalspace.rs | 2 +- src/policy/largeobjectspace.rs | 2 +- src/policy/lockfreeimmortalspace.rs | 2 +- src/policy/mallocspace/global.rs | 24 ++++++++++++++++-------- src/policy/mallocspace/metadata.rs | 6 ++++++ src/policy/markcompactspace.rs | 2 +- 9 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/plan/mutator_context.rs b/src/plan/mutator_context.rs index 4bad0cbb5e..d9257a7d74 100644 --- a/src/plan/mutator_context.rs +++ b/src/plan/mutator_context.rs @@ -97,13 +97,13 @@ impl MutatorContext for Mutator { } // Note that this method is slow, and we expect VM bindings that care about performance to implement allocation fastpath sequence in their bindings. - fn post_alloc(&mut self, refer: ObjectReference, bytes: usize, allocator: AllocationSemantics) { + fn post_alloc(&mut self, refer: ObjectReference, _bytes: usize, allocator: AllocationSemantics) { unsafe { self.allocators .get_allocator_mut(self.config.allocator_mapping[allocator]) } .get_space() - .initialize_object_metadata(refer, bytes, true) + .initialize_object_metadata(refer, true) } fn get_tls(&self) -> VMMutatorThread { diff --git a/src/policy/copyspace.rs b/src/policy/copyspace.rs index d56514e4b2..34368e01f7 100644 --- a/src/policy/copyspace.rs +++ b/src/policy/copyspace.rs @@ -45,7 +45,7 @@ impl SFT for CopySpace { !self.is_from_space() } - fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/immix/immixspace.rs b/src/policy/immix/immixspace.rs index 99a4249a1a..f625275312 100644 --- a/src/policy/immix/immixspace.rs +++ b/src/policy/immix/immixspace.rs @@ -75,7 +75,7 @@ impl SFT for ImmixSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/immortalspace.rs b/src/policy/immortalspace.rs index aaefe148b4..68d699b295 100644 --- a/src/policy/immortalspace.rs +++ b/src/policy/immortalspace.rs @@ -52,7 +52,7 @@ impl SFT for ImmortalSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { let old_value = VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.load_atomic::( object, None, diff --git a/src/policy/largeobjectspace.rs b/src/policy/largeobjectspace.rs index b2bd9f7f0b..cbb7d9a76a 100644 --- a/src/policy/largeobjectspace.rs +++ b/src/policy/largeobjectspace.rs @@ -50,7 +50,7 @@ impl SFT for LargeObjectSpace { fn is_sane(&self) -> bool { true } - fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool) { let old_value = VM::VMObjectModel::LOCAL_LOS_MARK_NURSERY_SPEC.load_atomic::( object, None, diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs index f0c6e3c5ed..c836b32982 100644 --- a/src/policy/lockfreeimmortalspace.rs +++ b/src/policy/lockfreeimmortalspace.rs @@ -57,7 +57,7 @@ impl SFT for LockFreeImmortalSpace { fn is_sane(&self) -> bool { unimplemented!() } - fn initialize_object_metadata(&self, _object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, _object: ObjectReference, _alloc: bool) { #[cfg(feature = "global_alloc_bit")] crate::util::alloc_bit::set_alloc_bit(_object); } diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 44e8a5f330..e8bac6b235 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -88,10 +88,8 @@ impl SFT for MallocSpace { has_object_alloced_by_malloc(addr) } - fn initialize_object_metadata(&self, object: ObjectReference, bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { trace!("initialize_object_metadata for object {}", object); - let page_addr = conversions::page_align_down(object.to_address()); - self.set_page_mark(page_addr, bytes); set_alloc_bit(object); } @@ -241,11 +239,18 @@ impl MallocSpace { } } - fn set_page_mark(&self, page_addr: Address, size: usize) { - let mut page = page_addr; - while page < page_addr + size { - if !is_page_marked(page) { - set_page_mark(page); + fn set_page_mark(&self, start: Address, size: usize) { + // Set first page + let mut page = start.align_down(BYTES_IN_MALLOC_PAGE); + if compare_exchange_page_mark(page) { + self.active_pages.fetch_add(1, Ordering::SeqCst); + } + + page += BYTES_IN_MALLOC_PAGE; + + // It is important to go to the end of the object, which may span a page boundary + while page < start + size { + if compare_exchange_page_mark(page) { self.active_pages.fetch_add(1, Ordering::SeqCst); } @@ -282,6 +287,9 @@ impl MallocSpace { assert!(crate::mmtk::SFT_MAP.has_sft_entry(address)); // make sure the address is okay with our SFT map unsafe { crate::mmtk::SFT_MAP.update(self, address, actual_size) }; } + + // Set page marks for current object + self.set_page_mark(address, actual_size); self.active_bytes.fetch_add(actual_size, Ordering::SeqCst); if is_offset_malloc { diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index 671fc8f984..d016fb9354 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -173,6 +173,11 @@ pub unsafe fn is_marked_unsafe(object: ObjectReference) -> bool { VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.load::(object, None) == 1 } +#[allow(unused)] +pub(super) fn compare_exchange_page_mark(page_addr: Address) -> bool { + side_metadata::compare_exchange_atomic(&ACTIVE_PAGE_METADATA_SPEC, page_addr, 0, 1, Ordering::SeqCst, Ordering::SeqCst) +} + #[allow(unused)] pub(super) fn is_page_marked(page_addr: Address) -> bool { ACTIVE_PAGE_METADATA_SPEC.load_atomic::(page_addr, Ordering::SeqCst) == 1 @@ -217,6 +222,7 @@ pub fn unset_alloc_bit(object: ObjectReference) { alloc_bit::unset_alloc_bit(object); } +#[allow(unused)] pub(super) fn set_page_mark(page_addr: Address) { ACTIVE_PAGE_METADATA_SPEC.store_atomic::(page_addr, 1, Ordering::SeqCst); } diff --git a/src/policy/markcompactspace.rs b/src/policy/markcompactspace.rs index 8e7c12d207..a6f3da1dd7 100644 --- a/src/policy/markcompactspace.rs +++ b/src/policy/markcompactspace.rs @@ -56,7 +56,7 @@ impl SFT for MarkCompactSpace { true } - fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { crate::util::alloc_bit::set_alloc_bit(object); } From a74ca187e422ef08e32b36a874e782fcdf1b1763 Mon Sep 17 00:00:00 2001 From: Kunal Sareen Date: Thu, 27 Oct 2022 05:38:08 +0000 Subject: [PATCH 09/15] Fix build errors --- src/plan/mutator_context.rs | 7 ++++++- src/policy/mallocspace/metadata.rs | 4 +++- src/policy/sft.rs | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/plan/mutator_context.rs b/src/plan/mutator_context.rs index d9257a7d74..59dfd9f21d 100644 --- a/src/plan/mutator_context.rs +++ b/src/plan/mutator_context.rs @@ -97,7 +97,12 @@ impl MutatorContext for Mutator { } // Note that this method is slow, and we expect VM bindings that care about performance to implement allocation fastpath sequence in their bindings. - fn post_alloc(&mut self, refer: ObjectReference, _bytes: usize, allocator: AllocationSemantics) { + fn post_alloc( + &mut self, + refer: ObjectReference, + _bytes: usize, + allocator: AllocationSemantics, + ) { unsafe { self.allocators .get_allocator_mut(self.config.allocator_mapping[allocator]) diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index d016fb9354..ab68008eb3 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -175,7 +175,9 @@ pub unsafe fn is_marked_unsafe(object: ObjectReference) -> bool { #[allow(unused)] pub(super) fn compare_exchange_page_mark(page_addr: Address) -> bool { - side_metadata::compare_exchange_atomic(&ACTIVE_PAGE_METADATA_SPEC, page_addr, 0, 1, Ordering::SeqCst, Ordering::SeqCst) + ACTIVE_PAGE_METADATA_SPEC + .compare_exchange_atomic::(page_addr, 0, 1, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() } #[allow(unused)] diff --git a/src/policy/sft.rs b/src/policy/sft.rs index 8b42115ded..7036d34f16 100644 --- a/src/policy/sft.rs +++ b/src/policy/sft.rs @@ -82,7 +82,7 @@ pub trait SFT { } /// Initialize object metadata (in the header, or in the side metadata). - fn initialize_object_metadata(&self, object: ObjectReference, bytes: usize, alloc: bool); + fn initialize_object_metadata(&self, object: ObjectReference, alloc: bool); /// Trace objects through SFT. This along with [`SFTProcessEdges`](mmtk/scheduler/gc_work/SFTProcessEdges) /// provides an easy way for most plans to trace objects without the need to implement any plan-specific @@ -151,7 +151,7 @@ impl SFT for EmptySpaceSFT { false } - fn initialize_object_metadata(&self, object: ObjectReference, _bytes: usize, _alloc: bool) { + fn initialize_object_metadata(&self, object: ObjectReference, _alloc: bool) { panic!( "Called initialize_object_metadata() on {:x}, which maps to an empty space", object From 710370a1a16f3cb535e95533f3903c3e9ab53ece Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 31 Oct 2022 10:26:19 +1100 Subject: [PATCH 10/15] Minor clean up --- src/policy/mallocspace/global.rs | 13 ++++++------- src/policy/mallocspace/metadata.rs | 6 ++++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index e8bac6b235..e61fb2cb16 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -177,8 +177,8 @@ impl Space for MallocSpace { #[allow(clippy::assertions_on_constants)] fn reserved_pages(&self) -> usize { use crate::util::constants::LOG_BYTES_IN_PAGE; + // Assume malloc pages are no smaller than 4K pages. Otherwise the substraction below will fail. debug_assert!(LOG_BYTES_IN_MALLOC_PAGE >= LOG_BYTES_IN_PAGE); - // TODO: figure out a better way to get the total number of active pages from the metadata let data_pages = self.active_pages.load(Ordering::SeqCst) << (LOG_BYTES_IN_MALLOC_PAGE - LOG_BYTES_IN_PAGE); let meta_pages = self.metadata.calculate_reserved_pages(data_pages); @@ -239,18 +239,15 @@ impl MallocSpace { } } + // This is called by mutators. We have to make sure this is correct if more than one threads are calling this method for + // objects in the same page. fn set_page_mark(&self, start: Address, size: usize) { // Set first page let mut page = start.align_down(BYTES_IN_MALLOC_PAGE); - if compare_exchange_page_mark(page) { - self.active_pages.fetch_add(1, Ordering::SeqCst); - } - - page += BYTES_IN_MALLOC_PAGE; // It is important to go to the end of the object, which may span a page boundary while page < start + size { - if compare_exchange_page_mark(page) { + if compare_exchange_set_page_mark(page) { self.active_pages.fetch_add(1, Ordering::SeqCst); } @@ -258,6 +255,8 @@ impl MallocSpace { } } + // This is called during GC when we sweep chunks. We have divided work by chunks, and each chunk is only swept by one GC thread. + // We can assume there is no race for the same page. fn unset_page_mark(&self, page_addr: Address) { if unsafe { is_page_marked_unsafe(page_addr) } { self.active_pages.fetch_sub(1, Ordering::SeqCst); diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index ab68008eb3..af4242da63 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -173,8 +173,10 @@ pub unsafe fn is_marked_unsafe(object: ObjectReference) -> bool { VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.load::(object, None) == 1 } -#[allow(unused)] -pub(super) fn compare_exchange_page_mark(page_addr: Address) -> bool { +/// Set the page mark from 0 to 1. Return true if we set it successfully in this call. +pub(super) fn compare_exchange_set_page_mark(page_addr: Address) -> bool { + // The spec has 1 byte per each page. So it won't be the case that other threads may race and access other bits for the spec. + // If the compare-exchange fails, we know the byte was set to 1 before this call. ACTIVE_PAGE_METADATA_SPEC .compare_exchange_atomic::(page_addr, 0, 1, Ordering::SeqCst, Ordering::SeqCst) .is_ok() From a834d57610370ebc310b6eece3d7b45847051a5a Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 1 Nov 2022 11:05:30 +1100 Subject: [PATCH 11/15] Optimize set/unset page mark, according to the reviews --- src/policy/mallocspace/global.rs | 71 +++++++++++++++++------------- src/policy/mallocspace/metadata.rs | 5 +++ 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index e61fb2cb16..f12b8de458 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -5,6 +5,7 @@ use crate::policy::sft::GCWorkerMutRef; use crate::policy::sft::SFT; use crate::policy::space::CommonSpace; use crate::util::heap::PageResource; +use crate::util::malloc::library::{BYTES_IN_MALLOC_PAGE, LOG_BYTES_IN_MALLOC_PAGE}; use crate::util::malloc::malloc_ms_util::*; use crate::util::metadata::side_metadata::{ SideMetadataContext, SideMetadataSanity, SideMetadataSpec, @@ -17,14 +18,12 @@ use crate::util::{conversions, metadata}; use crate::vm::VMBinding; use crate::vm::{ActivePlan, Collection, ObjectModel}; use crate::{policy::space::Space, util::heap::layout::vm_layout_constants::BYTES_IN_CHUNK}; +#[cfg(debug_assertions)] +use std::collections::HashMap; use std::marker::PhantomData; #[cfg(debug_assertions)] use std::sync::atomic::AtomicU32; use std::sync::atomic::{AtomicUsize, Ordering}; -// only used for debugging -use crate::util::malloc::library::{BYTES_IN_MALLOC_PAGE, LOG_BYTES_IN_MALLOC_PAGE}; -#[cfg(debug_assertions)] -use std::collections::HashMap; #[cfg(debug_assertions)] use std::sync::Mutex; @@ -239,30 +238,47 @@ impl MallocSpace { } } - // This is called by mutators. We have to make sure this is correct if more than one threads are calling this method for - // objects in the same page. + /// Set multiple pages, starting from the given address, for the given size, and increase the active page count if we set any page mark in the region. + /// This is a thread-safe method, and can be used during mutator phase when mutators may access the same page. + /// Performance-wise, this method may impose overhead, as we are doing a compare-exchange for every page in the range. fn set_page_mark(&self, start: Address, size: usize) { // Set first page let mut page = start.align_down(BYTES_IN_MALLOC_PAGE); + let mut used_pages = 0; // It is important to go to the end of the object, which may span a page boundary while page < start + size { if compare_exchange_set_page_mark(page) { - self.active_pages.fetch_add(1, Ordering::SeqCst); + used_pages += 1; } page += BYTES_IN_MALLOC_PAGE; } + + if used_pages != 0 { + self.active_pages.fetch_add(used_pages, Ordering::SeqCst); + } } - // This is called during GC when we sweep chunks. We have divided work by chunks, and each chunk is only swept by one GC thread. - // We can assume there is no race for the same page. - fn unset_page_mark(&self, page_addr: Address) { - if unsafe { is_page_marked_unsafe(page_addr) } { - self.active_pages.fetch_sub(1, Ordering::SeqCst); + /// Unset multiple pages, starting from the given address, for the given size, and decrease the active page count if we unset any page mark in the region + /// + /// # Safety + /// We need to ensure that only one GC thread is accessing the range. + unsafe fn unset_page_mark(&self, start: Address, size: usize) { + debug_assert!(start.is_aligned_to(BYTES_IN_MALLOC_PAGE)); + let mut page = start; + let mut cleared_pages = 0; + while page < start + size { + if is_page_marked_unsafe(page) { + cleared_pages += 1; + } + page += BYTES_IN_MALLOC_PAGE; } - unsafe { unset_page_mark_unsafe(page_addr) }; + if cleared_pages != 0 { + self.active_pages.fetch_sub(cleared_pages, Ordering::SeqCst); + bulk_zero_page_mark(start, size); + } } pub fn alloc(&self, tls: VMThread, size: usize, align: usize, offset: isize) -> Address { @@ -433,12 +449,8 @@ impl MallocSpace { unsafe { unset_chunk_mark_unsafe(chunk_start) }; // Clear the SFT entry unsafe { crate::mmtk::SFT_MAP.clear(chunk_start) }; - - let mut page = chunk_start; - while page < chunk_start + BYTES_IN_CHUNK { - self.unset_page_mark(page); - page += BYTES_IN_MALLOC_PAGE; - } + // Clear the page marks - we are the only GC thread that is accessing this chunk + unsafe { self.unset_page_mark(chunk_start, BYTES_IN_CHUNK) }; } /// Sweep an object if it is dead, and unset page marks for empty pages before this object. @@ -464,12 +476,10 @@ impl MallocSpace { if !empty_page_start.is_zero() { // unset marks for pages since last object let current_page = object.to_address().align_down(BYTES_IN_MALLOC_PAGE); - - let mut page = *empty_page_start; - while page < current_page { - self.unset_page_mark(page); - page += BYTES_IN_MALLOC_PAGE; - } + // we are the only GC thread that is accessing this chunk + unsafe { + self.unset_page_mark(*empty_page_start, current_page - *empty_page_start) + }; } // Update last_object_end @@ -689,11 +699,12 @@ impl MallocSpace { // for 0x0-0x100 but then not unset it for the pages after 0x100. This if block // will take care of this edge case if !empty_page_start.is_zero() { - let mut page = empty_page_start; - while page < chunk_start + BYTES_IN_CHUNK { - self.unset_page_mark(page); - page += BYTES_IN_MALLOC_PAGE; - } + unsafe { + self.unset_page_mark( + empty_page_start, + chunk_start + BYTES_IN_CHUNK - empty_page_start, + ) + }; } } } diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index af4242da63..45c8a238ea 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -256,10 +256,15 @@ pub unsafe fn unset_mark_bit(object: ObjectReference) { VM::VMObjectModel::LOCAL_MARK_BIT_SPEC.store::(object, 0, None); } +#[allow(unused)] pub(super) unsafe fn unset_page_mark_unsafe(page_addr: Address) { ACTIVE_PAGE_METADATA_SPEC.store::(page_addr, 0) } +pub(super) unsafe fn bulk_zero_page_mark(page_addr: Address, size: usize) { + ACTIVE_PAGE_METADATA_SPEC.bzero_metadata(page_addr, size) +} + pub(super) unsafe fn unset_chunk_mark_unsafe(chunk_start: Address) { ACTIVE_CHUNK_METADATA_SPEC.store::(chunk_start, 0) } From 70a17ed3094c7c2233e9e8c990928b3afba05347 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 1 Nov 2022 12:11:00 +1100 Subject: [PATCH 12/15] Install mdbook using stable toolchain --- .github/scripts/ci-doc.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/ci-doc.sh b/.github/scripts/ci-doc.sh index dde33c298d..f650685bd4 100755 --- a/.github/scripts/ci-doc.sh +++ b/.github/scripts/ci-doc.sh @@ -20,6 +20,7 @@ if ! cat $project_root/src/plan/mod.rs | grep -q "pub mod mygc;"; then fi cargo build -cargo install mdbook +# Install mdbook using the stable toolchain (mdbook uses scoped-tls which requires rust 1.59.0) +cargo +stable install mdbook mdbook build $project_root/docs/portingguide mdbook build $project_root/docs/tutorial \ No newline at end of file From 8b978b20a0be9618ccb03b3538ec977e9f6ded3e Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 1 Nov 2022 14:32:11 +1100 Subject: [PATCH 13/15] Avoid subtraction overflow --- src/policy/mallocspace/global.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index f12b8de458..43ff207554 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -476,10 +476,12 @@ impl MallocSpace { if !empty_page_start.is_zero() { // unset marks for pages since last object let current_page = object.to_address().align_down(BYTES_IN_MALLOC_PAGE); - // we are the only GC thread that is accessing this chunk - unsafe { - self.unset_page_mark(*empty_page_start, current_page - *empty_page_start) - }; + if current_page > *empty_page_start { + // we are the only GC thread that is accessing this chunk + unsafe { + self.unset_page_mark(*empty_page_start, current_page - *empty_page_start) + }; + } } // Update last_object_end From 39b87f791ab5c1552fb4ee9673c0cee29fc6fb87 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 1 Nov 2022 18:46:16 +1300 Subject: [PATCH 14/15] Avoid subtraction overflow --- src/policy/mallocspace/global.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index 43ff207554..e49da46842 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -700,7 +700,7 @@ impl MallocSpace { // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits // for 0x0-0x100 but then not unset it for the pages after 0x100. This if block // will take care of this edge case - if !empty_page_start.is_zero() { + if !empty_page_start.is_zero() && empty_page_start < chunk_start + BYTES_IN_CHUNK { unsafe { self.unset_page_mark( empty_page_start, From c473fbd18a5763dc93c4c7fb558e56a8626388d5 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 3 Nov 2022 10:59:39 +1100 Subject: [PATCH 15/15] Unset each page mark rather than bulk zero. No need to use peek for iterating objects. --- src/policy/mallocspace/global.rs | 37 +++++++++++++++--------------- src/policy/mallocspace/metadata.rs | 4 ---- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/policy/mallocspace/global.rs b/src/policy/mallocspace/global.rs index e49da46842..c861bf240d 100644 --- a/src/policy/mallocspace/global.rs +++ b/src/policy/mallocspace/global.rs @@ -266,18 +266,22 @@ impl MallocSpace { /// We need to ensure that only one GC thread is accessing the range. unsafe fn unset_page_mark(&self, start: Address, size: usize) { debug_assert!(start.is_aligned_to(BYTES_IN_MALLOC_PAGE)); + debug_assert!(crate::util::conversions::raw_is_aligned( + size, + BYTES_IN_MALLOC_PAGE + )); let mut page = start; let mut cleared_pages = 0; while page < start + size { if is_page_marked_unsafe(page) { cleared_pages += 1; + unset_page_mark_unsafe(page); } page += BYTES_IN_MALLOC_PAGE; } if cleared_pages != 0 { self.active_pages.fetch_sub(cleared_pages, Ordering::SeqCst); - bulk_zero_page_mark(start, size); } } @@ -661,9 +665,8 @@ impl MallocSpace { MallocObjectSize, false, >::new(chunk_start, chunk_start + BYTES_IN_CHUNK); - let mut chunk_linear_scan_peek = chunk_linear_scan.peekable(); - while let Some(object) = chunk_linear_scan_peek.next() { + for object in chunk_linear_scan { #[cfg(debug_assertions)] if ASSERT_ALLOCATION { let (obj_start, _, bytes) = Self::get_malloc_addr_size(object); @@ -693,27 +696,23 @@ impl MallocSpace { live_bytes += bytes; } } - - if chunk_linear_scan_peek.peek().is_none() { - // This is for the edge case where we have a live object and then no other live - // objects afterwards till the end of the chunk. For example consider chunk - // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits - // for 0x0-0x100 but then not unset it for the pages after 0x100. This if block - // will take care of this edge case - if !empty_page_start.is_zero() && empty_page_start < chunk_start + BYTES_IN_CHUNK { - unsafe { - self.unset_page_mark( - empty_page_start, - chunk_start + BYTES_IN_CHUNK - empty_page_start, - ) - }; - } - } } // If we never updated empty_page_start, the entire chunk is empty. if empty_page_start.is_zero() { self.clean_up_empty_chunk(chunk_start); + } else if empty_page_start < chunk_start + BYTES_IN_CHUNK { + // This is for the edge case where we have a live object and then no other live + // objects afterwards till the end of the chunk. For example consider chunk + // 0x0-0x400000 where only one object at 0x100 is alive. We will unset page bits + // for 0x0-0x100 but then not unset it for the pages after 0x100. This checks + // if we have empty pages at the end of a chunk that needs to be cleared. + unsafe { + self.unset_page_mark( + empty_page_start, + chunk_start + BYTES_IN_CHUNK - empty_page_start, + ) + }; } #[cfg(debug_assertions)] diff --git a/src/policy/mallocspace/metadata.rs b/src/policy/mallocspace/metadata.rs index 45c8a238ea..3c5b5989e2 100644 --- a/src/policy/mallocspace/metadata.rs +++ b/src/policy/mallocspace/metadata.rs @@ -261,10 +261,6 @@ pub(super) unsafe fn unset_page_mark_unsafe(page_addr: Address) { ACTIVE_PAGE_METADATA_SPEC.store::(page_addr, 0) } -pub(super) unsafe fn bulk_zero_page_mark(page_addr: Address, size: usize) { - ACTIVE_PAGE_METADATA_SPEC.bzero_metadata(page_addr, size) -} - pub(super) unsafe fn unset_chunk_mark_unsafe(chunk_start: Address) { ACTIVE_CHUNK_METADATA_SPEC.store::(chunk_start, 0) }