Skip to content

Commit 8747473

Browse files
committed
gemma syntax, chat, fixed isolate
1 parent c82b144 commit 8747473

9 files changed

+427
-61
lines changed

Diff for: example/apple_pie.dart

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// ignore_for_file: avoid_print
2+
3+
import 'dart:io';
4+
5+
import 'package:llama_cpp_dart/llama_cpp_dart.dart';
6+
7+
void main() async {
8+
try {
9+
ContextParams contextParams = ContextParams();
10+
contextParams.nPredict = 8192;
11+
contextParams.nCtx = 8192;
12+
contextParams.nBatch = 8192;
13+
14+
final samplerParams = SamplerParams();
15+
samplerParams.temp = 1.0;
16+
samplerParams.topK = 64;
17+
samplerParams.topP = 0.95;
18+
samplerParams.penaltyRepeat = 1.1;
19+
20+
Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
21+
String modelPath = "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf";
22+
Llama llama = Llama(modelPath, ModelParams(), contextParams, samplerParams);
23+
24+
llama.setPrompt(
25+
"<start_of_turn>apple pie recipe?<end_of_turn>\n<start_of_turn>model\n");
26+
while (true) {
27+
var (token, done) = llama.getNext();
28+
stdout.write(token);
29+
if (done) break;
30+
}
31+
stdout.write("\n");
32+
33+
llama.dispose();
34+
} catch (e) {
35+
print("Error: ${e.toString()}");
36+
}
37+
}

Diff for: example/chat_cli.dart

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// ignore_for_file: avoid_print
2+
3+
import 'dart:io';
4+
5+
import 'package:llama_cpp_dart/llama_cpp_dart.dart';
6+
import 'package:llama_cpp_dart/src/chat.dart';
7+
8+
void main() async {
9+
try {
10+
print("Starting LLM CLI Chat App...");
11+
12+
// Initialize model parameters
13+
ContextParams contextParams = ContextParams();
14+
contextParams.nPredict = 8192;
15+
contextParams.nCtx = 8192;
16+
contextParams.nBatch = 8192;
17+
18+
final samplerParams = SamplerParams();
19+
samplerParams.temp =
20+
0.7; // Slightly lower temperature for more focused responses
21+
samplerParams.topK = 64;
22+
samplerParams.topP = 0.95;
23+
samplerParams.penaltyRepeat = 1.1;
24+
25+
// Load the LLM model
26+
print("Loading model, please wait...");
27+
Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
28+
String modelPath = "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf";
29+
Llama llama =
30+
Llama(modelPath, ModelParams(), contextParams, samplerParams, false);
31+
print("Model loaded successfully! ${llama.status}");
32+
33+
// Initialize chat history with system prompt
34+
ChatHistory chatHistory = ChatHistory();
35+
chatHistory.addMessage(
36+
role: Role.system,
37+
content:
38+
"You are a helpful, concise assistant. Keep your answers informative but brief.");
39+
40+
print("\n=== Chat started (type 'exit' to quit) ===\n");
41+
42+
// Chat loop
43+
bool chatActive = true;
44+
while (chatActive) {
45+
// Get user input
46+
stdout.write("\nYou: ");
47+
String? userInput = stdin.readLineSync();
48+
49+
// Check for exit command
50+
if (userInput == null || userInput.toLowerCase() == 'exit') {
51+
chatActive = false;
52+
print("\nExiting chat. Goodbye!");
53+
break;
54+
}
55+
56+
// Add user message to history
57+
chatHistory.addMessage(role: Role.user, content: userInput);
58+
59+
// Add empty assistant message that will be filled by the model
60+
chatHistory.addMessage(role: Role.assistant, content: "");
61+
62+
// Prepare prompt for the model
63+
String prompt = chatHistory.exportFormat(ChatFormat.gemini,
64+
leaveLastAssistantOpen: true);
65+
66+
// Send to model
67+
llama.setPrompt(prompt);
68+
69+
// Collect the response
70+
stdout.write("\nAssistant: ");
71+
StringBuffer responseBuffer = StringBuffer();
72+
bool endOfTurnFound = false;
73+
74+
while (!endOfTurnFound) {
75+
var (token, done) = llama.getNext();
76+
77+
// Check if we've found the end marker
78+
if (token.contains("<end_of_turn>")) {
79+
endOfTurnFound = true;
80+
// Only print up to the end marker
81+
String cleanToken =
82+
token.substring(0, token.indexOf("<end_of_turn>"));
83+
if (cleanToken.isNotEmpty) {
84+
stdout.write(cleanToken);
85+
responseBuffer.write(cleanToken);
86+
}
87+
break;
88+
}
89+
90+
// Print and collect the token
91+
stdout.write(token);
92+
responseBuffer.write(token);
93+
94+
// Break if the model is done
95+
if (done) break;
96+
}
97+
98+
// Update the last assistant message with the generated content
99+
String assistantResponse = responseBuffer.toString();
100+
chatHistory.messages.last =
101+
Message(role: Role.assistant, content: assistantResponse);
102+
103+
print(""); // Add a newline after the response
104+
}
105+
106+
// Clean up
107+
llama.dispose();
108+
} catch (e) {
109+
print("\nError: ${e.toString()}");
110+
}
111+
}

Diff for: example/chat_cli_isolated.dart

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// ignore_for_file: avoid_print
2+
3+
import 'dart:io';
4+
import 'dart:async';
5+
6+
import 'package:llama_cpp_dart/llama_cpp_dart.dart';
7+
import 'package:llama_cpp_dart/src/chat.dart';
8+
9+
void main() async {
10+
print("Starting LLM CLI Chat App with Isolates...");
11+
12+
// Library path setup
13+
Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
14+
15+
// Setup parameters
16+
ContextParams contextParams = ContextParams();
17+
contextParams.nPredict = 8192;
18+
contextParams.nCtx = 8192;
19+
contextParams.nBatch = 8192;
20+
21+
final samplerParams = SamplerParams();
22+
samplerParams.temp = 0.7;
23+
samplerParams.topK = 64;
24+
samplerParams.topP = 0.95;
25+
samplerParams.penaltyRepeat = 1.1;
26+
27+
// Initialize load command for the isolate
28+
final loadCommand = LlamaLoad(
29+
path: "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf",
30+
modelParams: ModelParams(),
31+
contextParams: contextParams,
32+
samplingParams: samplerParams,
33+
);
34+
35+
print("Loading model, please wait...");
36+
37+
// Create the LLM parent that will spawn an isolate
38+
final llamaParent = LlamaParent(loadCommand);
39+
40+
try {
41+
await llamaParent.init();
42+
43+
// Add a timeout to prevent infinite waiting
44+
int attempts = 0;
45+
const maxAttempts = 60;
46+
47+
print("Waiting for model to be ready...");
48+
while (llamaParent.status != LlamaStatus.ready && attempts < maxAttempts) {
49+
await Future.delayed(Duration(milliseconds: 500));
50+
attempts++;
51+
52+
if (attempts % 10 == 0) {
53+
print("Still waiting... Status: ${llamaParent.status}");
54+
}
55+
56+
if (llamaParent.status == LlamaStatus.error) {
57+
print("Error loading model. Exiting.");
58+
exit(1);
59+
}
60+
}
61+
62+
if (attempts >= maxAttempts && llamaParent.status != LlamaStatus.ready) {
63+
print(
64+
"Timeout waiting for model to be ready. Current status: ${llamaParent.status}");
65+
print(
66+
"Continuing anyway as the model might be ready despite status not being updated...");
67+
}
68+
69+
print(
70+
"Model loaded successfully in isolate! Status: ${llamaParent.status}");
71+
} catch (e) {
72+
print("Error initializing model: $e");
73+
exit(1);
74+
}
75+
76+
// Initialize chat history with system prompt
77+
ChatHistory chatHistory = ChatHistory();
78+
chatHistory.addMessage(
79+
role: Role.system,
80+
content:
81+
"You are a helpful, concise assistant. Keep your answers informative but brief.");
82+
print("Chat history initialized with system prompt");
83+
84+
print("\n=== Chat started (type 'exit' to quit) ===\n");
85+
86+
// Set up a completer to help manage when completions are finished
87+
Completer<void> completionDone = Completer<void>();
88+
StringBuffer currentResponse = StringBuffer();
89+
// bool processingMessage = false;
90+
91+
llamaParent.stream.listen((token) {
92+
stdout
93+
..write(token)
94+
..flush();
95+
96+
currentResponse.write(token);
97+
}, onError: (e) {
98+
print("\nSTREAM ERROR: $e");
99+
});
100+
101+
// Listen for completion events
102+
llamaParent.completions.listen((event) {
103+
if (event.success) {
104+
if (chatHistory.messages.isNotEmpty &&
105+
chatHistory.messages.last.role == Role.assistant) {
106+
chatHistory.messages.last =
107+
Message(role: Role.assistant, content: currentResponse.toString());
108+
}
109+
currentResponse.clear();
110+
if (!completionDone.isCompleted) {
111+
completionDone.complete();
112+
}
113+
} else {
114+
print("Completion failed for prompt: ${event.promptId}");
115+
}
116+
});
117+
118+
// Chat loop
119+
bool chatActive = true;
120+
while (chatActive) {
121+
// Get user input
122+
stdout.write("\nYou: ");
123+
String? userInput = stdin.readLineSync();
124+
125+
// Check for exit command
126+
if (userInput == null || userInput.toLowerCase() == 'exit') {
127+
chatActive = false;
128+
print("\nExiting chat. bye!");
129+
print(chatHistory.exportFormat(ChatFormat.gemini));
130+
break;
131+
}
132+
133+
// Add user message to history
134+
chatHistory.addMessage(role: Role.user, content: userInput);
135+
136+
// Add empty assistant message
137+
chatHistory.addMessage(role: Role.assistant, content: "");
138+
139+
// Create a new completer for this message
140+
completionDone = Completer<void>();
141+
142+
// Prepare prompt for the model
143+
String prompt = chatHistory.exportFormat(ChatFormat.gemini,
144+
leaveLastAssistantOpen: true);
145+
146+
await llamaParent.sendPrompt(prompt);
147+
148+
// Indicate that we're about to process a new message
149+
stdout.write("\nAssistant: ");
150+
151+
// processingMessage = true;
152+
153+
// Wait for completion before continuing to next message
154+
try {
155+
await completionDone.future.timeout(Duration(seconds: 60), onTimeout: () {
156+
print("\nTimeout waiting for response. Continuing anyway...");
157+
});
158+
} catch (e) {
159+
print("\nError waiting for completion: $e");
160+
}
161+
print(""); // Add a newline after the response
162+
}
163+
164+
// Clean up
165+
llamaParent.dispose();
166+
}

Diff for: lib/src/chat.dart

+20-9
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,15 @@ class ChatHistory {
7575
}
7676

7777
/// Exports chat history in the specified format
78-
String exportFormat(ChatFormat format) {
78+
String exportFormat(ChatFormat format,
79+
{bool leaveLastAssistantOpen = false}) {
7980
switch (format) {
8081
case ChatFormat.chatml:
8182
return _exportChatML();
8283
case ChatFormat.alpaca:
8384
return _exportAlpaca();
8485
case ChatFormat.gemini:
85-
return _exportGemini();
86+
return _exportGemini(leaveLastAssistantOpen: leaveLastAssistantOpen);
8687
}
8788
}
8889

@@ -123,10 +124,21 @@ class ChatHistory {
123124
}
124125

125126
/// Exports chat history in Gemini format
126-
String _exportGemini() {
127+
/// If leaveLastAssistantOpen is true and the last message is an empty assistant message,
128+
/// it will not add the closing tag for that message
129+
String _exportGemini({bool leaveLastAssistantOpen = false}) {
127130
final buffer = StringBuffer();
128131

129-
for (final message in messages) {
132+
for (int i = 0; i < messages.length; i++) {
133+
final message = messages[i];
134+
final isLastMessage = i == messages.length - 1;
135+
136+
// Handle special case for the last assistant message
137+
final isEmptyAssistant =
138+
message.role == Role.assistant && message.content.isEmpty;
139+
final shouldLeaveOpen =
140+
leaveLastAssistantOpen && isLastMessage && isEmptyAssistant;
141+
130142
switch (message.role) {
131143
case Role.user:
132144
buffer.write('<start_of_turn>user\n');
@@ -135,16 +147,15 @@ class ChatHistory {
135147
case Role.assistant:
136148
buffer.write('<start_of_turn>model\n');
137149
buffer.write(message.content);
138-
buffer.writeln('<end_of_turn>');
150+
// Only add end tag if we're not leaving this message open
151+
if (!shouldLeaveOpen) {
152+
buffer.writeln('<end_of_turn>');
153+
}
139154
case Role.system:
140-
// Gemini doesn't formally support system messages in this format
141-
// System messages are typically handled differently or incorporated into user messages
142-
// For backward compatibility, we'll include it with a comment
143155
buffer.write('<start_of_turn>user\n');
144156
buffer.write('System instruction: ${message.content}');
145157
buffer.writeln('<end_of_turn>');
146158
case Role.unknown:
147-
// Skip unknown roles or handle as needed
148159
break;
149160
}
150161
}

Diff for: lib/src/gemini_format.dart

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ class GeminiFormat extends PromptFormat {
2121
String formattedMessages = '';
2222

2323
// First, check for and handle system message
24-
bool hasSystemMessage = false;
24+
// bool hasSystemMessage = false;
2525
for (var message in messages) {
2626
if (message['role'] == 'system') {
2727
formattedMessages +=
2828
'$inputSequence$systemPrefix${message['content']}$stopSequence';
29-
hasSystemMessage = true;
29+
// hasSystemMessage = true;
3030
break;
3131
}
3232
}

0 commit comments

Comments
 (0)