Skip to content

Commit bd3a8ed

Browse files
CharlieFRuanjingyi-zhao-01
authored andcommitted
[Models] Add Phi3-mini, StableLM 1.6B, Qwen 1.8B, update MLC runtime (mlc-ai#433)
This PR updates models to v0.2.39 compiled with mlc-ai/binary-mlc-llm-libs#123 The main change is the new MLC-LLM runtime, which supports grammar (i.e. json mode) for Llama3. - Hence we now read in field `tokenizer_info` (or deprecated `token_table_postproc_method`) from `mlc-chat-config.json` when post processing token table for Grammar - If neither is available, we use the default `byte_fallback` New prebuilt models introduced: - Phi3-mini-4k - Hermes-2-Pro-Llama-3-8B - Qwen1.5-1.8B - StableLM-2-zephyr_1.6B Updates on examples: - json-mode and json-schema now use Llama3 to demonstrate - Function calling inside json-schema now uses `Hermes-2-Pro-Llama-3-8B` instead of `Hermes-2-Pro-Mistral`
1 parent d628a98 commit bd3a8ed

File tree

7 files changed

+327
-190
lines changed

7 files changed

+327
-190
lines changed

examples/json-mode/src/json_mode.ts

+29-26
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,40 @@
11
import * as webllm from "@mlc-ai/web-llm";
22

33
function setLabel(id: string, text: string) {
4-
const label = document.getElementById(id);
5-
if (label == null) {
6-
throw Error("Cannot find label " + id);
7-
}
8-
label.innerText = text;
4+
const label = document.getElementById(id);
5+
if (label == null) {
6+
throw Error("Cannot find label " + id);
7+
}
8+
label.innerText = text;
99
}
1010

1111
async function main() {
12-
const initProgressCallback = (report: webllm.InitProgressReport) => {
13-
setLabel("init-label", report.text);
14-
};
15-
const selectedModel = "Llama-2-7b-chat-hf-q4f32_1";
16-
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
17-
selectedModel,
18-
{ initProgressCallback: initProgressCallback }
19-
);
12+
const initProgressCallback = (report: webllm.InitProgressReport) => {
13+
setLabel("init-label", report.text);
14+
};
15+
const selectedModel = "Llama-3-8B-Instruct-q4f32_1";
16+
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
17+
selectedModel,
18+
{ initProgressCallback: initProgressCallback },
19+
);
2020

21-
const request: webllm.ChatCompletionRequest = {
22-
stream: false, // works with streaming, logprobs, top_logprobs as well
23-
messages: [
24-
{ "role": "user", "content": "Write a short JSON file introducing yourself." }
25-
],
26-
n: 2,
27-
max_gen_len: 128,
28-
response_format: { type: "json_object" } as webllm.ResponseFormat
29-
};
21+
const request: webllm.ChatCompletionRequest = {
22+
stream: false, // works with streaming, logprobs, top_logprobs as well
23+
messages: [
24+
{
25+
role: "user",
26+
content: "Write a short JSON file introducing yourself.",
27+
},
28+
],
29+
n: 2,
30+
max_gen_len: 128,
31+
response_format: { type: "json_object" } as webllm.ResponseFormat,
32+
};
3033

31-
const reply0 = await engine.chatCompletion(request);
32-
console.log(reply0);
33-
console.log("First reply's last choice:\n" + await engine.getMessage());
34-
console.log(await engine.runtimeStatsText());
34+
const reply0 = await engine.chatCompletion(request);
35+
console.log(reply0);
36+
console.log("First reply's last choice:\n" + (await engine.getMessage()));
37+
console.log(await engine.runtimeStatsText());
3538
}
3639

3740
main();

examples/json-schema/src/json_schema.ts

+9-9
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ async function simpleStructuredTextExample() {
3838
setLabel("init-label", report.text);
3939
};
4040
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
41-
"Llama-2-7b-chat-hf-q4f16_1",
42-
{ initProgressCallback: initProgressCallback }
41+
"Llama-3-8B-Instruct-q4f16_1",
42+
{ initProgressCallback: initProgressCallback },
4343
);
4444

4545
const request: webllm.ChatCompletionRequest = {
@@ -105,8 +105,8 @@ async function harryPotterExample() {
105105
};
106106

107107
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
108-
"Llama-2-7b-chat-hf-q4f16_1",
109-
{ initProgressCallback: initProgressCallback }
108+
"Llama-3-8B-Instruct-q4f16_1",
109+
{ initProgressCallback: initProgressCallback },
110110
);
111111

112112
const request: webllm.ChatCompletionRequest = {
@@ -138,7 +138,7 @@ async function functionCallingExample() {
138138
Type.Object({
139139
arguments: Type.Any(),
140140
name: Type.String(),
141-
})
141+
}),
142142
),
143143
});
144144
type T = Static<typeof T>;
@@ -170,12 +170,12 @@ async function functionCallingExample() {
170170
setLabel("init-label", report.text);
171171
};
172172

173-
const selectedModel = "Hermes-2-Pro-Mistral-7B-q4f16_1";
173+
const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1";
174174
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
175175
selectedModel,
176176
{
177177
initProgressCallback: initProgressCallback,
178-
}
178+
},
179179
);
180180

181181
const request: webllm.ChatCompletionRequest = {
@@ -184,12 +184,12 @@ async function functionCallingExample() {
184184
{
185185
role: "system",
186186
content: `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> ${JSON.stringify(
187-
tools
187+
tools,
188188
)} </tools>. Do not stop calling functions until the task has been accomplished or you've reached max iteration of 10.
189189
Calling multiple functions at once can overload the system and increase cost so call one function at a time please.
190190
If you plan to continue with analysis, always call another function.
191191
Return a valid json object (using double quotes) in the following schema: ${JSON.stringify(
192-
schema
192+
schema,
193193
)}.`,
194194
},
195195
{

0 commit comments

Comments
 (0)