Skip to content

Commit a4a4b1d

Browse files
colesburyDinoV
authored andcommitted
mimalloc: faster block visitor
1 parent 14ad422 commit a4a4b1d

File tree

3 files changed

+142
-46
lines changed

3 files changed

+142
-46
lines changed

Include/mimalloc/mimalloc/internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap);
179179
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
180180
void _mi_heap_unsafe_destroy_all(void);
181181
void _mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from);
182+
bool _mi_abandoned_visit_blocks(int page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
182183

183184
// "stats.c"
184185
void _mi_stats_done(mi_stats_t* stats);
@@ -456,6 +457,18 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
456457
return mi_slice_to_page(slice);
457458
}
458459

460+
static inline const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) {
461+
return &segment->slices[segment->slice_entries];
462+
}
463+
464+
static inline mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
465+
mi_slice_t* slice = &segment->slices[0];
466+
*end = mi_segment_slices_end(segment);
467+
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
468+
slice = slice + slice->slice_count; // skip the first segment allocated page
469+
return slice;
470+
}
471+
459472
// Quick page start for initialized pages
460473
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
461474
return _mi_segment_page_start(segment, page, page_size);

Objects/mimalloc/heap.c

Lines changed: 129 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
2626
// Visit all pages in a heap; returns `false` if break was called.
2727
static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
2828
{
29-
if (heap==NULL || heap->page_count==0) return 0;
29+
if (heap==NULL || heap->page_count==0) return true;
3030

3131
// visit all pages
3232
#if MI_DEBUG>1
3333
size_t total = heap->page_count;
3434
size_t count = 0;
35-
#endif
35+
#endif
3636

3737
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
3838
mi_page_queue_t* pq = &heap->pages[i];
@@ -120,11 +120,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
120120
{
121121
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
122122

123-
const bool force = collect >= MI_FORCE;
123+
const bool force = collect >= MI_FORCE;
124124
_mi_deferred_free(heap, force);
125125

126-
// note: never reclaim on collect but leave it to threads that need storage to reclaim
127-
const bool force_main =
126+
// note: never reclaim on collect but leave it to threads that need storage to reclaim
127+
const bool force_main =
128128
#ifdef NDEBUG
129129
collect == MI_FORCE
130130
#else
@@ -513,6 +513,17 @@ typedef struct mi_heap_area_ex_s {
513513
mi_page_t* page;
514514
} mi_heap_area_ex_t;
515515

516+
static void mi_fast_divisor(size_t divisor, size_t* magic, size_t* shift) {
517+
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
518+
*shift = MI_INTPTR_BITS - mi_clz(divisor - 1);
519+
*magic = (size_t)(((1ULL << 32) * ((1ULL << *shift) - divisor)) / divisor + 1);
520+
}
521+
522+
static size_t mi_fast_divide(size_t n, size_t magic, size_t shift) {
523+
mi_assert_internal(n <= UINT32_MAX);
524+
return ((((uint64_t) n * magic) >> 32) + n) >> shift;
525+
}
526+
516527
static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
517528
mi_assert(xarea != NULL);
518529
if (xarea==NULL) return true;
@@ -536,10 +547,31 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
536547
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
537548
}
538549

539-
// create a bitmap of free blocks.
550+
if (page->used == page->capacity) {
551+
// optimize full pages
552+
uint8_t* block = pstart;
553+
for (size_t i = 0; i < page->capacity; i++) {
554+
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
555+
block += bsize;
556+
}
557+
return true;
558+
}
559+
560+
// zero-initialize a bitmap of free blocks.
540561
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
541-
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
542-
memset(free_map, 0, sizeof(free_map));
562+
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
563+
size_t bmapsize = (page->capacity + MI_INTPTR_BITS - 1) / MI_INTPTR_BITS;
564+
memset(free_map, 0, bmapsize * sizeof(uintptr_t));
565+
566+
if (page->capacity % MI_INTPTR_BITS != 0) {
567+
size_t shift = (page->capacity % MI_INTPTR_BITS);
568+
uintptr_t mask = (UINTPTR_MAX << shift);
569+
free_map[bmapsize-1] = mask;
570+
}
571+
572+
// fast repeated division by an integer
573+
size_t magic, shift;
574+
mi_fast_divisor(bsize, &magic, &shift);
543575

544576
#if MI_DEBUG>1
545577
size_t free_count = 0;
@@ -551,10 +583,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
551583
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
552584
size_t offset = (uint8_t*)block - pstart;
553585
mi_assert_internal(offset % bsize == 0);
554-
size_t blockidx = offset / bsize; // Todo: avoid division?
555-
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
556-
size_t bitidx = (blockidx / sizeof(uintptr_t));
557-
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
586+
size_t blockidx = mi_fast_divide(offset, magic, shift);
587+
mi_assert_internal(blockidx == offset / bsize);
588+
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
589+
size_t bitidx = (blockidx / MI_INTPTR_BITS);
590+
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
558591
free_map[bitidx] |= ((uintptr_t)1 << bit);
559592
}
560593
mi_assert_internal(page->capacity == (free_count + page->used));
@@ -563,19 +596,29 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
563596
#if MI_DEBUG>1
564597
size_t used_count = 0;
565598
#endif
566-
for (size_t i = 0; i < page->capacity; i++) {
567-
size_t bitidx = (i / sizeof(uintptr_t));
568-
size_t bit = i - (bitidx * sizeof(uintptr_t));
569-
uintptr_t m = free_map[bitidx];
570-
if (bit == 0 && m == UINTPTR_MAX) {
571-
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
599+
uint8_t* block = pstart;
600+
for (size_t i = 0; i < bmapsize; i++) {
601+
if (free_map[i] == 0) {
602+
// every block is in use
603+
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
604+
#if MI_DEBUG>1
605+
used_count++;
606+
#endif
607+
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
608+
block += bsize;
609+
}
572610
}
573-
else if ((m & ((uintptr_t)1 << bit)) == 0) {
574-
#if MI_DEBUG>1
575-
used_count++;
576-
#endif
577-
uint8_t* block = pstart + (i * bsize);
578-
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
611+
else {
612+
uintptr_t m = ~free_map[i];
613+
while (m) {
614+
#if MI_DEBUG>1
615+
used_count++;
616+
#endif
617+
size_t bitidx = mi_ctz(m);
618+
if (!visitor(mi_page_heap(page), area, block + (bitidx * bsize), ubsize, arg)) return false;
619+
m &= m - 1;
620+
}
621+
block += bsize * MI_INTPTR_BITS;
579622
}
580623
}
581624
mi_assert_internal(page->used == used_count);
@@ -584,21 +627,24 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
584627

585628
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
586629

630+
static void mi_heap_xarea_init(mi_heap_area_ex_t* xarea, mi_page_t* page) {
631+
const size_t bsize = mi_page_block_size(page);
632+
const size_t ubsize = mi_page_usable_block_size(page);
633+
xarea->page = page;
634+
xarea->area.reserved = page->reserved * bsize;
635+
xarea->area.committed = page->capacity * bsize;
636+
xarea->area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
637+
xarea->area.used = page->used; // number of blocks in use (#553)
638+
xarea->area.block_size = ubsize;
639+
xarea->area.full_block_size = bsize;
640+
}
587641

588642
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
589643
MI_UNUSED(heap);
590644
MI_UNUSED(pq);
591645
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
592646
mi_heap_area_ex_t xarea;
593-
const size_t bsize = mi_page_block_size(page);
594-
const size_t ubsize = mi_page_usable_block_size(page);
595-
xarea.page = page;
596-
xarea.area.reserved = page->reserved * bsize;
597-
xarea.area.committed = page->capacity * bsize;
598-
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
599-
xarea.area.used = page->used; // number of blocks in use (#553)
600-
xarea.area.block_size = ubsize;
601-
xarea.area.full_block_size = bsize;
647+
mi_heap_xarea_init(&xarea, page);
602648
return fun(heap, &xarea, arg);
603649
}
604650

@@ -629,5 +675,55 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
629675
// Visit all blocks in a heap
630676
bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
631677
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
678+
_mi_heap_delayed_free_partial((mi_heap_t *)heap);
632679
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
633680
}
681+
682+
typedef bool (segment_page_visitor_fun)(mi_segment_t* segment, mi_page_t* page, void* arg);
683+
684+
// Visit all pages in a heap; returns `false` if break was called.
685+
static bool mi_segment_visit_pages(int page_tag, mi_segment_t *segment, segment_page_visitor_fun* fn, void* arg)
686+
{
687+
while (segment) {
688+
const mi_slice_t* end;
689+
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
690+
while (slice < end) {
691+
if (slice->xblock_size > 0) {
692+
mi_page_t* const page = mi_slice_to_page(slice);
693+
if (page->tag == page_tag) {
694+
if (!fn(segment, page, arg)) return false;
695+
}
696+
}
697+
slice = slice + slice->slice_count;
698+
}
699+
segment = segment->abandoned_next;
700+
}
701+
return true;
702+
}
703+
704+
// Visit all pages in a heap; returns `false` if break was called.
705+
static bool mi_abandoned_visit_pages(int page_tag, segment_page_visitor_fun* fn, void* arg)
706+
{
707+
if (!mi_segment_visit_pages(page_tag, _mi_segment_abandoned(), fn, arg)) return false;
708+
return mi_segment_visit_pages(page_tag, _mi_segment_abandoned_visited(), fn, arg);
709+
}
710+
711+
static bool mi_segment_visitor(mi_segment_t* segment, mi_page_t* page, void* arg)
712+
{
713+
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
714+
mi_heap_area_ex_t xarea;
715+
mi_heap_xarea_init(&xarea, page);
716+
if (!args->visitor(NULL, &xarea.area, NULL, xarea.area.block_size, args->arg)) return false;
717+
if (args->visit_blocks) {
718+
return mi_heap_area_visit_blocks(&xarea, args->visitor, args->arg);
719+
}
720+
else {
721+
return true;
722+
}
723+
}
724+
725+
// Visit all blocks in a abandoned segments
726+
bool _mi_abandoned_visit_blocks(int page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
727+
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
728+
return mi_abandoned_visit_pages(page_tag, &mi_segment_visitor, &args);
729+
}

Objects/mimalloc/segment.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,6 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) {
157157
Slices
158158
----------------------------------------------------------- */
159159

160-
161-
static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) {
162-
return &segment->slices[segment->slice_entries];
163-
}
164-
165160
static uint8_t* mi_slice_start(const mi_slice_t* slice) {
166161
mi_segment_t* segment = _mi_ptr_segment(slice);
167162
mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment));
@@ -1271,14 +1266,6 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
12711266
Reclaim abandoned pages
12721267
----------------------------------------------------------- */
12731268

1274-
static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
1275-
mi_slice_t* slice = &segment->slices[0];
1276-
*end = mi_segment_slices_end(segment);
1277-
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
1278-
slice = slice + slice->slice_count; // skip the first segment allocated page
1279-
return slice;
1280-
}
1281-
12821269
// Possibly free pages and check if free space is available
12831270
static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, int tag, mi_segments_tld_t* tld)
12841271
{

0 commit comments

Comments
 (0)