Skip to content

Commit c1fa952

Browse files
committed
multithreading for chrome and safari
1 parent a3816e0 commit c1fa952

16 files changed

+59
-58
lines changed

Diff for: README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ cd llama-cpp-wasm
1818
./build-multi-thread.sh
1919
```
2020

21-
Once build is complete you can find `llama.cpp` built in `docs/llama-st` and `docs/llama-mt` directory.
21+
Once build is complete you can find `llama.cpp` built in `dist/llama-st` and `dist/llama-mt` directory.
2222

2323

2424
## Deploy
2525

26-
Basically, you can copy/paste `docs/llama-st` or `docs/llama-mt` directory after build to your project and use as vanilla JavaScript library/module.
26+
Basically, you can copy/paste `dist/llama-st` or `dist/llama-mt` directory after build to your project and use as vanilla JavaScript library/module.
2727

2828

2929
**index.html**

Diff for: build-multi-thread.sh

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ set -e
33

44
LLAMA_CPP_WASM_BUILD_DIR=build
55
LLAMA_CPP_WASM_DIST_DIR=dist
6+
LLAMA_CPP_WASM_DIST_LLAMA_DIR=$LLAMA_CPP_WASM_DIST_DIR/llama-mt
67
LLAMA_CPP_GIT_HASH="8c933b7"
78
LLAMA_CPP_SOURCE_DIR=$LLAMA_CPP_WASM_BUILD_DIR/llama.cpp
89
LLAMA_CPP_BUILD_DIR=$LLAMA_CPP_WASM_BUILD_DIR/build
@@ -27,20 +28,20 @@ cd $LLAMA_CPP_BUILD_DIR
2728
emcc --clear-cache
2829
emcmake cmake ../../$LLAMA_CPP_SOURCE_DIR
2930
# export EMCC_CFLAGS="-O3 -pthread -DNDEBUG -flto -s SHARED_MEMORY=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
30-
export EMCC_CFLAGS="-O3 -pthread -DNDEBUG -flto -s SHARED_MEMORY=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=800MB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
31+
export EMCC_CFLAGS="-O3 -msimd128 -pthread -fno-rtti -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=32 -DNDEBUG -flto=full -s SHARED_MEMORY=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=800MB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
3132
emmake make main -j
3233
cd ../..
3334

3435
#
3536
# bundle llama-cpp-wasm dist
3637
#
37-
if [ -d $LLAMA_CPP_WASM_DIST_DIR ]; then
38-
rm -rf $LLAMA_CPP_WASM_DIST_DIR
38+
if [ -d $LLAMA_CPP_WASM_DIST_LLAMA_DIR ]; then
39+
rm -rf $LLAMA_CPP_WASM_DIST_LLAMA_DIR
3940
fi
4041

41-
mkdir -p $LLAMA_CPP_WASM_DIST_DIR
42-
cp -r src/* $LLAMA_CPP_WASM_DIST_DIR
43-
cp $LLAMA_CPP_BUILD_DIR/bin/main.* $LLAMA_CPP_WASM_DIST_DIR/llama
42+
mkdir -p $LLAMA_CPP_WASM_DIST_LLAMA_DIR
43+
cp -rv src/llama/* $LLAMA_CPP_WASM_DIST_LLAMA_DIR
44+
cp $LLAMA_CPP_BUILD_DIR/bin/main.* $LLAMA_CPP_WASM_DIST_LLAMA_DIR
4445

4546
rm -rf docs/llama-mt
46-
mv $LLAMA_CPP_WASM_DIST_DIR/llama docs/llama-mt
47+
cp -rv $LLAMA_CPP_WASM_DIST_LLAMA_DIR docs/

Diff for: build-single-thread.sh

+8-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ set -e
33

44
LLAMA_CPP_WASM_BUILD_DIR=build
55
LLAMA_CPP_WASM_DIST_DIR=dist
6+
LLAMA_CPP_WASM_DIST_LLAMA_DIR=$LLAMA_CPP_WASM_DIST_DIR/llama-st
67
LLAMA_CPP_GIT_HASH="8c933b7"
78
LLAMA_CPP_SOURCE_DIR=$LLAMA_CPP_WASM_BUILD_DIR/llama.cpp
89
LLAMA_CPP_BUILD_DIR=$LLAMA_CPP_WASM_BUILD_DIR/build
@@ -27,20 +28,20 @@ cd $LLAMA_CPP_BUILD_DIR
2728
emcc --clear-cache
2829
emcmake cmake ../../$LLAMA_CPP_SOURCE_DIR
2930
# export EMCC_CFLAGS="-O3 -DNDEBUG -flto -s BUILD_AS_WORKER=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=2GB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
30-
export EMCC_CFLAGS="-O3 -DNDEBUG -flto -s BUILD_AS_WORKER=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=800MB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
31+
export EMCC_CFLAGS="-O3 -msimd128 -fno-rtti -DNDEBUG -flto=full -s BUILD_AS_WORKER=1 -s EXPORT_ALL=1 -s EXPORT_ES6=1 -s MODULARIZE=1 -s INITIAL_MEMORY=800MB -s MAXIMUM_MEMORY=4GB -s ALLOW_MEMORY_GROWTH -s FORCE_FILESYSTEM=1 -s EXPORTED_FUNCTIONS=_main -s EXPORTED_RUNTIME_METHODS=callMain -s NO_EXIT_RUNTIME=1"
3132
emmake make main -j
3233
cd ../..
3334

3435
#
3536
# bundle llama-cpp-wasm dist
3637
#
37-
if [ -d $LLAMA_CPP_WASM_DIST_DIR ]; then
38-
rm -rf $LLAMA_CPP_WASM_DIST_DIR
38+
if [ -d $LLAMA_CPP_WASM_DIST_LLAMA_DIR ]; then
39+
rm -rf $LLAMA_CPP_WASM_DIST_LLAMA_DIR
3940
fi
4041

41-
mkdir -p $LLAMA_CPP_WASM_DIST_DIR
42-
cp -r src/* $LLAMA_CPP_WASM_DIST_DIR
43-
cp $LLAMA_CPP_BUILD_DIR/bin/main.* $LLAMA_CPP_WASM_DIST_DIR/llama
42+
mkdir -p $LLAMA_CPP_WASM_DIST_LLAMA_DIR
43+
cp -rv src/llama/* $LLAMA_CPP_WASM_DIST_LLAMA_DIR
44+
cp $LLAMA_CPP_BUILD_DIR/bin/main.* $LLAMA_CPP_WASM_DIST_LLAMA_DIR
4445

4546
rm -rf docs/llama-st
46-
mv $LLAMA_CPP_WASM_DIST_DIR/llama docs/llama-st
47+
cp -rv $LLAMA_CPP_WASM_DIST_LLAMA_DIR docs/

Diff for: build.sh

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env bash
2+
set -e
3+
4+
./build-single-thread.sh
5+
./build-multi-thread.sh

Diff for: docs/example-multi-thread.html

+5-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<head>
44
<meta charset="utf-8">
55
<meta name="viewport" content="width=device-width, initial-scale=1">
6-
<title>llama-cpp-wasm wasm32 multithreading</title>
6+
<title>llama-cpp-wasm multithreading</title>
77
<link rel="icon" type="image/png" href="favicon.png" />
88

99
<!-- picocss -->
@@ -16,10 +16,7 @@
1616
<body>
1717
<header class="container">
1818
<hgroup>
19-
<h1><a href="/">llama-cpp-wasm</a></h1>
20-
<br />
21-
22-
<h1><mark>multi-threaded</mark> wasm32 </h1>
19+
<h1><a href="/">llama-cpp-wasm</a> &nbsp; &#128007; <mark>multithreading</mark> wasm32 </h1>
2320
<br />
2421

2522
<p> WebAssembly (Wasm) Build and Bindings for <a href="https://github.com/ggerganov/llama.cpp" target="_blank">llama.cpp</a>. </p>
@@ -31,9 +28,6 @@ <h1><mark>multi-threaded</mark> wasm32 </h1>
3128
<p> Repository: <a href="https://github.com/tangledgroup/llama-cpp-wasm"> https://github.com/tangledgroup/llama-cpp-wasm </a></p>
3229
<br />
3330

34-
<p> Recommended browser: &#129395; <b>Firefox</b></p>
35-
<br />
36-
3731
<p> When you click <b>Run</b>, model will be first downloaded and cached in browser. </p>
3832
</hgroup>
3933
</header>
@@ -60,7 +54,8 @@ <h2> Demo </h2>
6054

6155
<label> Result: </label>
6256

63-
<textarea id="result" name="result" rows="10" autocomplete="off"></textarea>
57+
<!-- <textarea id="result" name="result" rows="10" autocomplete="off"></textarea> -->
58+
<pre id="result" name="result"></pre>
6459
</section>
6560

6661
<section>
@@ -79,6 +74,6 @@ <h2> Demo </h2>
7974
</main>
8075

8176
<!-- example -->
82-
<script type="module" src="example-multi-thread.js?v=240213-4"></script>
77+
<script type="module" src="example-multi-thread.js?v=240213-5"></script>
8378
</body>
8479
</html>

Diff for: docs/example-multi-thread.js

+5-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const buttonRunProgressGenerating = document.querySelector("#run-progress-genera
88
const selectModel = document.querySelector("select#model");
99
const modelProgress = document.querySelector("#model-progress");
1010
const textareaPrompt = document.querySelector("textarea#prompt");
11-
const textareaResult = document.querySelector("textarea#result");
11+
const textareaResult = document.querySelector("#result");
1212

1313
const onModelLoaded = () => {
1414
const prompt = textareaPrompt.value;
@@ -34,7 +34,8 @@ const onMessageChunk = (text) => {
3434
buttonRunProgressGenerating.removeAttribute("hidden");
3535
}
3636

37-
textareaResult.value += text;
37+
// textareaResult.value += text;
38+
textareaResult.innerText += text;
3839
};
3940

4041
const onComplete = () => {
@@ -50,7 +51,8 @@ buttonRun.addEventListener("click", (e) => {
5051
buttonRun.setAttribute("hidden", "hidden");
5152
buttonRunProgressLoadingModel.removeAttribute("hidden");
5253
modelProgress.removeAttribute("hidden");
53-
textareaResult.value = "";
54+
// textareaResult.value = "";
55+
textareaResult.innerText = "";
5456

5557
if (app && app.url == selectModel.value) {
5658
onModelLoaded();

Diff for: docs/example-single-thread.html

+5-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<head>
44
<meta charset="utf-8">
55
<meta name="viewport" content="width=device-width, initial-scale=1">
6-
<title>llama-cpp-wasm wasm32 single thread</title>
6+
<title>llama-cpp-wasm single thread</title>
77
<link rel="icon" type="image/png" href="favicon.png" />
88

99
<!-- picocss -->
@@ -16,10 +16,7 @@
1616
<body>
1717
<header class="container">
1818
<hgroup>
19-
<h1><a href="/">llama-cpp-wasm</a></h1>
20-
<br />
21-
22-
<h2><mark>single-threaded</mark> wasm32 </h2>
19+
<h1><a href="/">llama-cpp-wasm</a> &nbsp; &#128034; <mark>single thread</mark> wasm32 </h2>
2320
<br />
2421

2522
<p> WebAssembly (Wasm) Build and Bindings for <a href="https://github.com/ggerganov/llama.cpp" target="_blank">llama.cpp</a>. </p>
@@ -31,9 +28,6 @@ <h2><mark>single-threaded</mark> wasm32 </h2>
3128
<p> Repository: <a href="https://github.com/tangledgroup/llama-cpp-wasm"> https://github.com/tangledgroup/llama-cpp-wasm </a></p>
3229
<br />
3330

34-
<p> Recommended browsers: &#128549; <b>Chrome</b>, &#128549; <b>Safari</b>, &#129395; <b>Firefox</b></p>
35-
<br />
36-
3731
<p> When you click <b>Run</b>, model will be first downloaded and cached in browser. </p>
3832
</hgroup>
3933
</header>
@@ -60,7 +54,8 @@ <h2> Demo </h2>
6054

6155
<label> Result: </label>
6256

63-
<textarea id="result" name="result" rows="10" autocomplete="off"></textarea>
57+
<!-- <textarea id="result" name="result" rows="10" autocomplete="off"></textarea> -->
58+
<pre id="result" name="result"></pre>
6459
</section>
6560

6661
<section>
@@ -79,6 +74,6 @@ <h2> Demo </h2>
7974
</main>
8075

8176
<!-- example -->
82-
<script type="module" src="example-single-thread.js?v=240213-4"></script>
77+
<script type="module" src="example-single-thread.js?v=240213-5"></script>
8378
</body>
8479
</html>

Diff for: docs/example-single-thread.js

+5-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const buttonRunProgressGenerating = document.querySelector("#run-progress-genera
88
const selectModel = document.querySelector("select#model");
99
const modelProgress = document.querySelector("#model-progress");
1010
const textareaPrompt = document.querySelector("textarea#prompt");
11-
const textareaResult = document.querySelector("textarea#result");
11+
const textareaResult = document.querySelector("#result");
1212

1313
const onModelLoaded = () => {
1414
const prompt = textareaPrompt.value;
@@ -34,7 +34,8 @@ const onMessageChunk = (text) => {
3434
buttonRunProgressGenerating.removeAttribute("hidden");
3535
}
3636

37-
textareaResult.value += text;
37+
// textareaResult.value += text;
38+
textareaResult.innerText += text;
3839
};
3940

4041
const onComplete = () => {
@@ -50,7 +51,8 @@ buttonRun.addEventListener("click", (e) => {
5051
buttonRun.setAttribute("hidden", "hidden");
5152
buttonRunProgressLoadingModel.removeAttribute("hidden");
5253
modelProgress.removeAttribute("hidden");
53-
textareaResult.value = "";
54+
// textareaResult.value = "";
55+
textareaResult.innerText = "";
5456

5557
if (app && app.url == selectModel.value) {
5658
onModelLoaded();

Diff for: docs/index.html

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ <h1><a href="/">llama-cpp-wasm</a></h1>
4040
<h2> In-Browser Demos </h2>
4141

4242
<ul>
43-
<li><a href="/example-single-thread.html"> <b> single-threaded </b> wasm32 </a> (Recommended browsers: &#128549; <b>Chrome</b>, &#128549; <b>Safari</b>, &#129395; <b>Firefox</b>) </li>
44-
<li><a href="/example-multi-thread.html"> <b> multi-threading </b> wasm32 </a> (Recommended browser: &#129395; <b>Firefox</b>)</li>
43+
<li><a href="/example-single-thread.html"> &#128034; &nbsp; <b> single thread </b> wasm32 </a></li>
44+
<li><a href="/example-multi-thread.html"> &#128007; &nbsp; <b> multithreading </b> wasm32 </a></li>
4545
</ul>
4646
</section>
4747

Diff for: docs/llama-mt/main-worker.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ const run_main = (
103103
"--prompt", prompt.toString(),
104104
];
105105

106-
// if (!!globalThis.SharedArrayBuffer) {
107-
// args.push("--threads");
108-
// args.push((navigator.hardwareConcurrency).toString());
109-
// }
106+
if (!!globalThis.SharedArrayBuffer) {
107+
args.push("--threads");
108+
args.push((navigator.hardwareConcurrency).toString());
109+
}
110110

111111
if (chatml) {
112112
args.push("--chatml");

Diff for: docs/llama-mt/main.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: docs/llama-mt/main.wasm

53.2 KB
Binary file not shown.

Diff for: docs/llama-st/main-worker.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ const run_main = (
103103
"--prompt", prompt.toString(),
104104
];
105105

106-
// if (!!globalThis.SharedArrayBuffer) {
107-
// args.push("--threads");
108-
// args.push((navigator.hardwareConcurrency).toString());
109-
// }
106+
if (!!globalThis.SharedArrayBuffer) {
107+
args.push("--threads");
108+
args.push((navigator.hardwareConcurrency).toString());
109+
}
110110

111111
if (chatml) {
112112
args.push("--chatml");

Diff for: docs/llama-st/main.wasm

53.1 KB
Binary file not shown.

Diff for: package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "llama-cpp-wasm",
3-
"version": "0.2.0",
3+
"version": "0.3.0",
44
"description": "WebAssembly (Wasm) Build and Bindings for llama.cpp",
55
"scripts": {
66
"test": "echo \"Error: no test specified\" && exit 1"

Diff for: src/llama/main-worker.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ const run_main = (
103103
"--prompt", prompt.toString(),
104104
];
105105

106-
// if (!!globalThis.SharedArrayBuffer) {
107-
// args.push("--threads");
108-
// args.push((navigator.hardwareConcurrency).toString());
109-
// }
106+
if (!!globalThis.SharedArrayBuffer) {
107+
args.push("--threads");
108+
args.push((navigator.hardwareConcurrency).toString());
109+
}
110110

111111
if (chatml) {
112112
args.push("--chatml");

0 commit comments

Comments
 (0)