diff --git a/NOTICE b/NOTICE
index 93a368305e..d745fe7ceb 100644
--- a/NOTICE
+++ b/NOTICE
@@ -29,6 +29,7 @@ under the licensing terms detailed in LICENSE:
 * Valeria Viana Gusmao <valeria.viana.gusmao@gmail.com>
 * Gabor Greif <ggreif@gmail.com>
 * Martin Fredriksson <martin.fredriksson@vikinganalytics.se>
+* David Schneider <dave@praisingod.com>
 * forcepusher <bionitsoup@gmail.com>
 * Piotr Oleś <piotrek.oles@gmail.com>
 * Saúl Cabrera <saulecabrera@gmail.com>
diff --git a/cli/index.js b/cli/index.js
index 36eeb4715e..ad0eb45af6 100644
--- a/cli/index.js
+++ b/cli/index.js
@@ -300,6 +300,7 @@ export async function main(argv, options) {
   switch (opts.runtime) {
     case "stub": runtime = 0; break;
     case "minimal": runtime = 1; break;
+    case "minimal-mt": runtime = 4; break;
     /* incremental */
     default: runtime = 2; break;
   }
diff --git a/std/assembly/rt/index-incremental.ts b/std/assembly/rt/index-incremental.ts
index 4730344b4f..f18c8791db 100644
--- a/std/assembly/rt/index-incremental.ts
+++ b/std/assembly/rt/index-incremental.ts
@@ -1,2 +1,3 @@
-import "rt/tlsf";
-import "rt/itcms";
+import "rt/tlsf-base";
+import "rt/tlsf-st";
+import "rt/itcms";
diff --git a/std/assembly/rt/index-minimal-mt.ts b/std/assembly/rt/index-minimal-mt.ts
new file mode 100644
index 0000000000..3595ba2dd6
--- /dev/null
+++ b/std/assembly/rt/index-minimal-mt.ts
@@ -0,0 +1,3 @@
+import "rt/tlsf-base";
+import "rt/tlsf-mt";
+import "rt/tcms";
diff --git a/std/assembly/rt/index-minimal.ts b/std/assembly/rt/index-minimal.ts
index cf88ee158f..c020f7ed40 100644
--- a/std/assembly/rt/index-minimal.ts
+++ b/std/assembly/rt/index-minimal.ts
@@ -1,2 +1,3 @@
-import "rt/tlsf";
-import "rt/tcms";
+import "rt/tlsf-base";
+import "rt/tlsf-st";
+import "rt/tcms";
diff --git a/std/assembly/rt/tlsf.ts b/std/assembly/rt/tlsf-base.ts
similarity index 96%
rename from std/assembly/rt/tlsf.ts
rename to std/assembly/rt/tlsf-base.ts
index df437b82cb..f325252ea9 100644
--- a/std/assembly/rt/tlsf.ts
+++ b/std/assembly/rt/tlsf-base.ts
@@ -5,6 +5,9 @@ import { E_ALLOCATION_TOO_LARGE } from "../util/error";
 // === The TLSF (Two-Level Segregate Fit) memory allocator ===
 // see: http://www.gii.upv.es/tlsf/
 
+// Split into single- and multi-threaded versions, the multi-threaded version just adds basic locks around
+//  allocation and deallocation.
+
 // - `ffs(x)` is equivalent to `ctz(x)` with x != 0
 // - `fls(x)` is equivalent to `sizeof(x) * 8 - clz(x) - 1`
 
@@ -137,7 +140,10 @@ import { E_ALLOCATION_TOO_LARGE } from "../util/error";
 @inline const ROOT_SIZE: usize = HL_END + sizeof<usize>();
 
 // @ts-ignore: decorator
-@lazy export let ROOT: Root = changetype<Root>(0); // unsafe initializion below
+@lazy export let ROOT: Root = changetype<Root>(memory.data(ROOT_SIZE)); // unsafe initializion below
+
+// @ts-ignore: decorator
+@inline export const ROOT_INIT: usize = memory.data(4);
 
 /** Gets the second level map of the specified first level. */
 // @ts-ignore: decorator
@@ -460,13 +466,13 @@ function prepareSize(size: usize): usize {
 }
 
 /** Initializes the root structure. */
-function initialize(): void {
+export function TLSFinitialize(): void {
   if (isDefined(ASC_RTRACE)) oninit(__heap_base);
   let rootOffset = (__heap_base + AL_MASK) & ~AL_MASK;
   let pagesBefore = memory.size();
   let pagesNeeded = <i32>((((rootOffset + ROOT_SIZE) + 0xffff) & ~0xffff) >>> 16);
   if (pagesNeeded > pagesBefore && memory.grow(pagesNeeded - pagesBefore) < 0) unreachable();
-  let root = changetype<Root>(rootOffset);
+  let root = ROOT;
   root.flMap = 0;
   SETTAIL(root, changetype<Block>(0));
   for (let fl: usize = 0; fl < FL_BITS; ++fl) {
@@ -483,7 +489,7 @@ function initialize(): void {
   } else {
     addMemory(root, memStart, memory.size() << 16);
   }
-  ROOT = root;
+  store<i32>(ROOT_INIT, 1);
 }
 
 /** Allocates a block of the specified size. */
@@ -536,7 +542,7 @@ export function reallocateBlock(root: Root, block: Block, size: usize): Block {
 }
 
 /** Moves a block to a new one of the specified size. */
-function moveBlock(root: Root, block: Block, newSize: usize): Block {
+export function moveBlock(root: Root, block: Block, newSize: usize): Block {
   let newBlock = allocateBlock(root, newSize);
   memory.copy(changetype<usize>(newBlock) + BLOCK_OVERHEAD, changetype<usize>(block) + BLOCK_OVERHEAD, block.mmInfo & ~TAGS_MASK);
   if (changetype<usize>(block) >= __heap_base) {
@@ -554,7 +560,7 @@ export function freeBlock(root: Root, block: Block): void {
 }
 
 /** Checks that a used block is valid to be freed or reallocated. */
-function checkUsedBlock(ptr: usize): Block {
+export function checkUsedBlock(ptr: usize): Block {
   let block = changetype<Block>(ptr - BLOCK_OVERHEAD);
   assert(
     ptr != 0 && !(ptr & AL_MASK) &&  // must exist and be aligned
@@ -563,27 +569,3 @@ function checkUsedBlock(ptr: usize): Block {
   return block;
 }
 
-// @ts-ignore: decorator
-@global @unsafe
-export function __alloc(size: usize): usize {
-  if (!ROOT) initialize();
-  return changetype<usize>(allocateBlock(ROOT, size)) + BLOCK_OVERHEAD;
-}
-
-// @ts-ignore: decorator
-@global @unsafe
-export function __realloc(ptr: usize, size: usize): usize {
-  if (!ROOT) initialize();
-  return (ptr < __heap_base
-    ? changetype<usize>(moveBlock(ROOT, checkUsedBlock(ptr), size))
-    : changetype<usize>(reallocateBlock(ROOT, checkUsedBlock(ptr), size))
-  ) + BLOCK_OVERHEAD;
-}
-
-// @ts-ignore: decorator
-@global @unsafe
-export function __free(ptr: usize): void {
-  if (ptr < __heap_base) return;
-  if (!ROOT) initialize();
-  freeBlock(ROOT, checkUsedBlock(ptr));
-}
diff --git a/std/assembly/rt/tlsf-mt.ts b/std/assembly/rt/tlsf-mt.ts
new file mode 100644
index 0000000000..c1d3a1155a
--- /dev/null
+++ b/std/assembly/rt/tlsf-mt.ts
@@ -0,0 +1,45 @@
+import {BLOCK_OVERHEAD} from "./common";
+import {allocateBlock, freeBlock, reallocateBlock, ROOT, ROOT_INIT, TLSFinitialize, checkUsedBlock, moveBlock} from "./tlsf-base";
+import {TlsfMutex_lock, TlsfMutex_unlock} from './tlsf-mutex'
+
+const mutex_ptr = memory.data(4, 16);
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __alloc(size: usize): usize {
+  TlsfMutex_lock(mutex_ptr);
+
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+  let r: usize = changetype<usize>(allocateBlock(ROOT, size)) + BLOCK_OVERHEAD;
+
+  TlsfMutex_unlock(mutex_ptr);
+  return r;
+}
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __realloc(ptr: usize, size: usize): usize {
+  TlsfMutex_lock(mutex_ptr);
+
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+  let r: usize = (ptr < __heap_base
+    ? changetype<usize>(moveBlock(ROOT, checkUsedBlock(ptr), size))
+    : changetype<usize>(reallocateBlock(ROOT, checkUsedBlock(ptr), size))
+  ) + BLOCK_OVERHEAD;
+
+  TlsfMutex_unlock(mutex_ptr);
+  return r;
+}
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __free(ptr: usize): void {
+  if (ptr < __heap_base) return;
+
+  TlsfMutex_lock(mutex_ptr);
+
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+  freeBlock(ROOT, checkUsedBlock(ptr));
+
+  TlsfMutex_unlock(mutex_ptr);
+}
diff --git a/std/assembly/rt/tlsf-mutex.ts b/std/assembly/rt/tlsf-mutex.ts
new file mode 100644
index 0000000000..d323b209b8
--- /dev/null
+++ b/std/assembly/rt/tlsf-mutex.ts
@@ -0,0 +1,30 @@
+// This just implements a super-simple lock for tlsf-mt.ts
+
+enum TlsfMutexState {
+    unlocked,
+    locked
+}
+
+// Basic spinlock. Spinning is not a performance issue since this only takes as long as an allocation
+// @ts-ignore: decorator
+@inline
+export function TlsfMutex_lock(mutex_ptr: usize): void {
+    for (; ;) {
+        // If we succesfully atomically compare and exchange unlocked for locked, we have the mutex
+        if (atomic.cmpxchg<i32>(mutex_ptr, TlsfMutexState.unlocked, TlsfMutexState.locked) === TlsfMutexState.unlocked)
+            return;
+        // Wait for unlocked state to try for locked
+        for (; ;) {
+            if (atomic.load<i32>(mutex_ptr) === TlsfMutexState.unlocked) break;
+        }
+    }
+}
+
+// @ts-ignore: decorator
+@inline
+export function TlsfMutex_unlock(mutex_ptr: usize): void {
+    if (atomic.cmpxchg<i32>(mutex_ptr, TlsfMutexState.locked, TlsfMutexState.unlocked) !== TlsfMutexState.locked) {
+        // This only happens if someone else unlocked our mutex, or we did it more than once...
+        throw new Error('Is this the right thing to do here? Mutex in inconsistent state');
+    }
+}
diff --git a/std/assembly/rt/tlsf-st.ts b/std/assembly/rt/tlsf-st.ts
new file mode 100644
index 0000000000..95dd0e1600
--- /dev/null
+++ b/std/assembly/rt/tlsf-st.ts
@@ -0,0 +1,28 @@
+import {BLOCK_OVERHEAD} from "./common";
+import {allocateBlock, freeBlock, reallocateBlock, ROOT, ROOT_INIT, TLSFinitialize, checkUsedBlock, moveBlock} from "./tlsf-base";
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __alloc(size: usize): usize {
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+
+  return changetype<usize>(allocateBlock(ROOT, size)) + BLOCK_OVERHEAD;
+}
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __realloc(ptr: usize, size: usize): usize {
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+  return (ptr < __heap_base
+    ? changetype<usize>(moveBlock(ROOT, checkUsedBlock(ptr), size))
+    : changetype<usize>(reallocateBlock(ROOT, checkUsedBlock(ptr), size))
+  ) + BLOCK_OVERHEAD;
+}
+
+// @ts-ignore: decorator
+@global @unsafe
+export function __free(ptr: usize): void {
+  if (ptr < __heap_base) return;
+  if (!load<i32>(ROOT_INIT)) TLSFinitialize();
+  freeBlock(ROOT, checkUsedBlock(ptr));
+}