Skip to content

Commit 319e0bc

Browse files
committed
Merge remote-tracking branch 'origin/develop'
2 parents ed75d34 + 0f72ec9 commit 319e0bc

File tree

13 files changed

+154
-59
lines changed

13 files changed

+154
-59
lines changed

config/llvm_header.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ declare void @write_configuration_to_proof_trace(ptr, ptr, i1)
202202
@current_interval = thread_local global i64 0
203203
@GC_THRESHOLD = thread_local global i64 @GC_THRESHOLD@
204204
205-
@gc_roots = global [256 x ptr] zeroinitializer
205+
@gc_roots = thread_local global [256 x ptr] zeroinitializer
206206
207207
define i64 @get_gc_threshold() {
208208
%threshold = load i64, ptr @GC_THRESHOLD

include/runtime/arena.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,19 @@ using memory_block_header = struct {
3030
// Macro to define a new arena with the given ID. Supports IDs ranging from 0 to
3131
// 127.
3232
#define REGISTER_ARENA(name, id) \
33-
static struct arena name = {.allocation_semispace_id = (id)}
33+
static thread_local struct arena name = {.allocation_semispace_id = (id)}
3434

3535
#define MEM_BLOCK_START(ptr) \
3636
((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)))
3737

38+
#ifdef __MACH__
39+
//
40+
// thread_local disabled for Apple
41+
//
3842
extern bool time_for_collection;
43+
#else
44+
extern thread_local bool time_for_collection;
45+
#endif
3946

4047
size_t get_gc_threshold();
4148

include/runtime/collect.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ using set_node = set::iterator::node_t;
2626
using set_impl = set::iterator::tree_t;
2727

2828
extern "C" {
29-
extern size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
30-
extern bool collect_old;
29+
extern thread_local size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
30+
extern thread_local bool collect_old;
3131
size_t get_size(uint64_t, uint16_t);
3232
void migrate_static_roots(void);
3333
void migrate(block **block_ptr);

include/runtime/header.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,14 @@ size_t hash_k(block *);
4747
void k_hash(block *, void *);
4848
bool hash_enter(void);
4949
void hash_exit(void);
50-
50+
#ifdef __MACH__
51+
//
52+
// thread_local disabled for Apple
53+
//
5154
extern bool gc_enabled;
55+
#else
56+
extern thread_local bool gc_enabled;
57+
#endif
5258
}
5359

5460
class k_elem {

lib/codegen/CreateTerm.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -782,18 +782,47 @@ llvm::Value *create_term::disable_gc() {
782782
llvm::Constant *global
783783
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
784784
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
785+
#ifdef __MACH__
786+
//
787+
// thread_local disabled for Apple
788+
//
789+
/*
790+
global_var->setThreadLocal(true);
791+
llvm::IRBuilder b(current_block_);
792+
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
793+
*/
794+
auto *global_var_address = global_var;
795+
#else
796+
global_var->setThreadLocal(true);
797+
auto *global_var_address = global_var;
798+
#endif
785799
auto *old_val = new llvm::LoadInst(
786-
llvm::Type::getInt1Ty(ctx_), global_var, "was_enabled", current_block_);
800+
llvm::Type::getInt1Ty(ctx_), global_var_address, "was_enabled",
801+
current_block_);
787802
new llvm::StoreInst(
788-
llvm::ConstantInt::getFalse(ctx_), global_var, current_block_);
803+
llvm::ConstantInt::getFalse(ctx_), global_var_address, current_block_);
789804
return old_val;
790805
}
791806

792807
void create_term::enable_gc(llvm::Value *was_enabled) {
793808
llvm::Constant *global
794809
= module_->getOrInsertGlobal("gc_enabled", llvm::Type::getInt1Ty(ctx_));
795810
auto *global_var = llvm::cast<llvm::GlobalVariable>(global);
796-
new llvm::StoreInst(was_enabled, global_var, current_block_);
811+
#ifdef __MACH__
812+
//
813+
// thread_local disabled for Apple
814+
//
815+
/*
816+
global_var->setThreadLocal(true);
817+
llvm::IRBuilder b(current_block_);
818+
auto *global_var_address = b.CreateThreadLocalAddress(global_var);
819+
*/
820+
auto *global_var_address = global_var;
821+
#else
822+
global_var->setThreadLocal(true);
823+
auto *global_var_address = global_var;
824+
#endif
825+
new llvm::StoreInst(was_enabled, global_var_address, current_block_);
797826
}
798827

799828
// We use tailcc calling convention for apply_rule_* and eval_* functions to

lib/codegen/Decision.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "kllvm/codegen/ProofEvent.h"
66
#include "kllvm/codegen/Util.h"
77

8+
#include "llvm/IR/IRBuilder.h"
89
#include <llvm/ADT/APInt.h>
910
#include <llvm/ADT/SmallString.h>
1011
#include <llvm/ADT/StringMap.h>
@@ -1006,9 +1007,25 @@ std::pair<std::vector<llvm::Value *>, llvm::BasicBlock *> step_function_header(
10061007

10071008
auto *collection = module->getOrInsertGlobal(
10081009
"time_for_collection", llvm::Type::getInt1Ty(module->getContext()));
1010+
1011+
#ifdef __MACH__
1012+
//
1013+
// thread_local disabled for Apple
1014+
//
1015+
/*
1016+
llvm::cast<llvm::GlobalVariable>(collection)->setThreadLocal(true);
1017+
llvm::IRBuilder b(check_collect);
1018+
auto *collection_address = b.CreateThreadLocalAddress(collection);
1019+
*/
1020+
auto *collection_address = collection;
1021+
#else
1022+
llvm::cast<llvm::GlobalVariable>(collection)->setThreadLocal(true);
1023+
auto *collection_address = collection;
1024+
#endif
1025+
10091026
auto *is_collection = new llvm::LoadInst(
1010-
llvm::Type::getInt1Ty(module->getContext()), collection, "is_collection",
1011-
check_collect);
1027+
llvm::Type::getInt1Ty(module->getContext()), collection_address,
1028+
"is_collection", check_collect);
10121029
set_debug_loc(is_collection);
10131030
auto *collect = llvm::BasicBlock::Create(
10141031
module->getContext(), "isCollect", block->getParent());

runtime/alloc/arena.cpp

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#include <cstdio>
44
#include <cstdlib>
55
#include <cstring>
6+
#include <memory>
7+
#include <sys/mman.h>
68

79
#include "runtime/alloc.h"
810
#include "runtime/arena.h"
@@ -47,36 +49,68 @@ get_arena_semispace_id_of_object(void *ptr) {
4749
return mem_block_header(ptr)->semispace;
4850
}
4951

50-
static void *first_superblock_ptr = nullptr;
51-
static void *superblock_ptr = nullptr;
52-
static char **next_superblock_ptr = nullptr;
53-
static unsigned blocks_left = 0;
52+
//
53+
// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server.
54+
//
55+
size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024;
56+
static thread_local void *hyperblock_ptr = nullptr; // only needed for munmap()
5457

5558
static void *megabyte_malloc() {
56-
if (blocks_left == 0) {
57-
blocks_left = 15;
58-
if (int result
59-
= posix_memalign(&superblock_ptr, BLOCK_SIZE, BLOCK_SIZE * 15)) {
60-
errno = result;
61-
perror("posix_memalign");
62-
}
63-
if (!first_superblock_ptr) {
64-
first_superblock_ptr = superblock_ptr;
65-
}
66-
if (next_superblock_ptr) {
67-
*next_superblock_ptr = (char *)superblock_ptr;
59+
//
60+
// Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment.
61+
//
62+
static thread_local char *currentblock_ptr
63+
= nullptr; // char* rather than void* to permit pointer arithmetic
64+
if (currentblock_ptr) {
65+
//
66+
// We expect an page fault due to not being able to map physical memory to this block or the
67+
// process to be killed by the OOM killer long before we run off the end of our address space.
68+
//
69+
currentblock_ptr += BLOCK_SIZE;
70+
} else {
71+
//
72+
// First call - need to reserve the address space.
73+
//
74+
size_t request = HYPERBLOCK_SIZE;
75+
void *addr = mmap(
76+
nullptr, // let OS choose the address
77+
request, // Linux and MacOS both allow up to 64TB
78+
PROT_READ | PROT_WRITE, // read, write but not execute
79+
MAP_ANONYMOUS | MAP_PRIVATE
80+
| MAP_NORESERVE, // allocate address space only
81+
-1, // no file backing
82+
0); // no offset
83+
if (addr == MAP_FAILED) {
84+
perror("mmap()");
85+
abort();
6886
}
69-
auto *hdr = (memory_block_header *)superblock_ptr;
70-
next_superblock_ptr = &hdr->next_superblock;
71-
hdr->next_superblock = nullptr;
87+
hyperblock_ptr = addr;
88+
//
89+
// We ask for one block worth of address space less than we allocated so alignment will always succeed.
90+
// We don't worry about unused address space either side of our aligned address space because there will be no
91+
// memory mapped to it.
92+
//
93+
currentblock_ptr = reinterpret_cast<char *>(
94+
std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request));
7295
}
73-
blocks_left--;
74-
void *result = superblock_ptr;
75-
superblock_ptr = (char *)superblock_ptr + BLOCK_SIZE;
76-
return result;
96+
return currentblock_ptr;
7797
}
7898

99+
void free_all_memory() {
100+
//
101+
// Frees all memory that was demand paged into this address range.
102+
//
103+
munmap(hyperblock_ptr, HYPERBLOCK_SIZE);
104+
}
105+
106+
#ifdef __MACH__
107+
//
108+
// thread_local disabled for Apple
109+
//
79110
bool time_for_collection;
111+
#else
112+
thread_local bool time_for_collection;
113+
#endif
80114

81115
static void fresh_block(struct arena *arena) {
82116
char *next_block = nullptr;
@@ -122,7 +156,14 @@ static void fresh_block(struct arena *arena) {
122156
BLOCK_SIZE - sizeof(memory_block_header));
123157
}
124158

159+
#ifdef __MACH__
160+
//
161+
// thread_local disabled for Apple
162+
//
125163
bool gc_enabled = true;
164+
#else
165+
thread_local bool gc_enabled = true;
166+
#endif
126167

127168
__attribute__((noinline)) void *
128169
do_alloc_slow(size_t requested, struct arena *arena) {
@@ -229,16 +270,3 @@ size_t arena_size(const struct arena *arena) {
229270
: arena->num_collection_blocks)
230271
* (BLOCK_SIZE - sizeof(memory_block_header));
231272
}
232-
233-
void free_all_memory() {
234-
auto *superblock = (memory_block_header *)first_superblock_ptr;
235-
while (superblock) {
236-
auto *next_superblock = (memory_block_header *)superblock->next_superblock;
237-
free(superblock);
238-
superblock = next_superblock;
239-
}
240-
first_superblock_ptr = nullptr;
241-
superblock_ptr = nullptr;
242-
next_superblock_ptr = nullptr;
243-
blocks_left = 0;
244-
}

runtime/alloc/register_gc_roots_enum.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include "runtime/collect.h"
44
#include "runtime/header.h"
55

6-
std::vector<BlockEnumerator> block_enumerators;
6+
thread_local std::vector<BlockEnumerator> block_enumerators;
77

88
void register_gc_roots_enumerator(BlockEnumerator f) {
99
block_enumerators.push_back(f);

runtime/arithmetic/int.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,8 @@ void int_hash(mpz_t i, void *hasher) {
373373
}
374374
}
375375

376-
gmp_randstate_t kllvm_rand_state;
377-
bool kllvm_rand_state_initialized = false;
376+
thread_local gmp_randstate_t kllvm_rand_state;
377+
thread_local bool kllvm_rand_state_initialized = false;
378378

379379
SortK hook_INT_srand(SortInt seed) {
380380
if (!kllvm_rand_state_initialized) {

runtime/collect/collect.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ char **old_alloc_ptr(void);
1616
char *youngspace_ptr(void);
1717
char *oldspace_ptr(void);
1818

19-
static bool is_gc = false;
20-
bool collect_old = false;
19+
static thread_local bool is_gc = false;
20+
bool thread_local collect_old = false;
2121
#ifndef GC_DBG
22-
static uint8_t num_collection_only_young = 0;
22+
static thread_local uint8_t num_collection_only_young = 0;
2323
#else
24-
static char *last_alloc_ptr;
24+
static thread_local char *last_alloc_ptr;
2525
#endif
2626

27-
size_t numBytesLiveAtCollection[1 << AGE_WIDTH];
27+
size_t thread_local numBytesLiveAtCollection[1 << AGE_WIDTH];
2828

2929
bool during_gc() {
3030
return is_gc;

runtime/collect/migrate_static_roots.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
#include "runtime/collect.h"
44

5-
extern std::vector<BlockEnumerator> block_enumerators;
5+
extern thread_local std::vector<BlockEnumerator> block_enumerators;
66

7-
extern gmp_randstate_t kllvm_rand_state;
8-
extern bool kllvm_rand_state_initialized;
7+
extern thread_local gmp_randstate_t kllvm_rand_state;
8+
extern thread_local bool kllvm_rand_state_initialized;
99

1010
extern "C" {
1111

runtime/lto/alloc.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ static inline void *kore_alloc_collection(kllvm::sort_category cat) {
159159
void *mem
160160
= kore_alloc(sizeof(blockheader) + sizeof(collection) + sizeof(uint64_t));
161161
auto *hdr = (blockheader *)mem;
162-
static std::string name = get_raw_symbol_name(cat) + "{}";
163-
static blockheader hdr_val
162+
static thread_local std::string name = get_raw_symbol_name(cat) + "{}";
163+
static thread_local blockheader hdr_val
164164
= get_block_header_for_symbol(get_tag_for_symbol_name(name.c_str()));
165165
*hdr = hdr_val;
166166
auto *offset = (uint64_t *)(hdr + 1);

unittests/runtime-collections/lists.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,15 @@ block D1 = {{1}};
6262
block *DUMMY1 = &D1;
6363
}
6464

65+
#ifdef __MACH__
66+
//
67+
// thread_local disabled for Apple
68+
//
6569
bool gc_enabled;
70+
#else
71+
thread_local bool gc_enabled;
72+
#endif
73+
6674
size_t get_gc_threshold() {
6775
return SIZE_MAX;
6876
}

0 commit comments

Comments
 (0)