-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathchat_cli_isolated.dart
165 lines (132 loc) · 4.64 KB
/
chat_cli_isolated.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
// ignore_for_file: avoid_print
import 'dart:io';
import 'dart:async';
import 'package:llama_cpp_dart/llama_cpp_dart.dart';
void main() async {
print("Starting LLM CLI Chat App with Isolates...");
// Library path setup
Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
// Setup parameters
ContextParams contextParams = ContextParams();
contextParams.nPredict = 8192;
contextParams.nCtx = 8192;
contextParams.nBatch = 8192;
final samplerParams = SamplerParams();
samplerParams.temp = 0.7;
samplerParams.topK = 64;
samplerParams.topP = 0.95;
samplerParams.penaltyRepeat = 1.1;
// Initialize load command for the isolate
final loadCommand = LlamaLoad(
path: "/Users/adel/Workspace/gguf/gemma-3-12b-it-Q4_K_M.gguf",
modelParams: ModelParams(),
contextParams: contextParams,
samplingParams: samplerParams,
);
print("Loading model, please wait...");
// Create the LLM parent that will spawn an isolate
final llamaParent = LlamaParent(loadCommand);
try {
await llamaParent.init();
// Add a timeout to prevent infinite waiting
int attempts = 0;
const maxAttempts = 60;
print("Waiting for model to be ready...");
while (llamaParent.status != LlamaStatus.ready && attempts < maxAttempts) {
await Future.delayed(Duration(milliseconds: 500));
attempts++;
if (attempts % 10 == 0) {
print("Still waiting... Status: ${llamaParent.status}");
}
if (llamaParent.status == LlamaStatus.error) {
print("Error loading model. Exiting.");
exit(1);
}
}
if (attempts >= maxAttempts && llamaParent.status != LlamaStatus.ready) {
print(
"Timeout waiting for model to be ready. Current status: ${llamaParent.status}");
print(
"Continuing anyway as the model might be ready despite status not being updated...");
}
print(
"Model loaded successfully in isolate! Status: ${llamaParent.status}");
} catch (e) {
print("Error initializing model: $e");
exit(1);
}
// Initialize chat history with system prompt
ChatHistory chatHistory = ChatHistory();
chatHistory.addMessage(
role: Role.system,
content:
"You are a helpful, concise assistant. Keep your answers informative but brief.");
print("Chat history initialized with system prompt");
print("\n=== Chat started (type 'exit' to quit) ===\n");
// Set up a completer to help manage when completions are finished
Completer<void> completionDone = Completer<void>();
StringBuffer currentResponse = StringBuffer();
// bool processingMessage = false;
llamaParent.stream.listen((token) {
stdout
..write(token)
..flush();
currentResponse.write(token);
}, onError: (e) {
print("\nSTREAM ERROR: $e");
});
// Listen for completion events
llamaParent.completions.listen((event) {
if (event.success) {
if (chatHistory.messages.isNotEmpty &&
chatHistory.messages.last.role == Role.assistant) {
chatHistory.messages.last =
Message(role: Role.assistant, content: currentResponse.toString());
}
currentResponse.clear();
if (!completionDone.isCompleted) {
completionDone.complete();
}
} else {
print("Completion failed for prompt: ${event.promptId}");
}
});
// Chat loop
bool chatActive = true;
while (chatActive) {
// Get user input
stdout.write("\nYou: ");
String? userInput = stdin.readLineSync();
// Check for exit command
if (userInput == null || userInput.toLowerCase() == 'exit') {
chatActive = false;
print("\nExiting chat. bye!");
print(chatHistory.exportFormat(ChatFormat.gemini));
break;
}
// Add user message to history
chatHistory.addMessage(role: Role.user, content: userInput);
// Add empty assistant message
chatHistory.addMessage(role: Role.assistant, content: "");
// Create a new completer for this message
completionDone = Completer<void>();
// Prepare prompt for the model
String prompt = chatHistory.exportFormat(ChatFormat.gemini,
leaveLastAssistantOpen: true);
await llamaParent.sendPrompt(prompt);
// Indicate that we're about to process a new message
stdout.write("\nAssistant: ");
// processingMessage = true;
// Wait for completion before continuing to next message
try {
await completionDone.future.timeout(Duration(seconds: 60), onTimeout: () {
print("\nTimeout waiting for response. Continuing anyway...");
});
} catch (e) {
print("\nError waiting for completion: $e");
}
print(""); // Add a newline after the response
}
// Clean up
llamaParent.dispose();
}