netdur
diff --git a/Diff for: ‎example/apple_pie.dart
+37 b/Diff for: ‎example/apple_pie.dart
+37
diff --git a/Diff for: ‎example/chat_cli.dart
+111 b/Diff for: ‎example/chat_cli.dart
+111
diff --git a/Diff for: ‎example/chat_cli_isolated.dart
+166 b/Diff for: ‎example/chat_cli_isolated.dart
+166
diff --git a/Diff for: ‎lib/src/chat.dart
+20-9 b/Diff for: ‎lib/src/chat.dart
+20-9
diff --git a/Diff for: ‎lib/src/gemini_format.dart
+2-2 b/Diff for: ‎lib/src/gemini_format.dart
+2-2
@@ -0,0 +1,37 @@
+// ignore_for_file: avoid_print
+
+import 'dart:io';
+
+import 'package:llama_cpp_dart/llama_cpp_dart.dart';
+
+void main() async {
+  try {
+    ContextParams contextParams = ContextParams();
+    contextParams.nPredict = 8192;
+    contextParams.nCtx = 8192;
+    contextParams.nBatch = 8192;
+
+    final samplerParams = SamplerParams();
+    samplerParams.temp = 1.0;
+    samplerParams.topK = 64;
+    samplerParams.topP = 0.95;
+    samplerParams.penaltyRepeat = 1.1;
+
+    Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
+    String modelPath = "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf";
+    Llama llama = Llama(modelPath, ModelParams(), contextParams, samplerParams);
+
+    llama.setPrompt(
+        "<start_of_turn>apple pie recipe?<end_of_turn>\n<start_of_turn>model\n");
+    while (true) {
+      var (token, done) = llama.getNext();
+      stdout.write(token);
+      if (done) break;
+    }
+    stdout.write("\n");
+
+    llama.dispose();
+  } catch (e) {
+    print("Error: ${e.toString()}");
+  }
+}
@@ -0,0 +1,111 @@
+// ignore_for_file: avoid_print
+
+import 'dart:io';
+
+import 'package:llama_cpp_dart/llama_cpp_dart.dart';
+import 'package:llama_cpp_dart/src/chat.dart';
+
+void main() async {
+  try {
+    print("Starting LLM CLI Chat App...");
+
+    // Initialize model parameters
+    ContextParams contextParams = ContextParams();
+    contextParams.nPredict = 8192;
+    contextParams.nCtx = 8192;
+    contextParams.nBatch = 8192;
+
+    final samplerParams = SamplerParams();
+    samplerParams.temp =
+        0.7; // Slightly lower temperature for more focused responses
+    samplerParams.topK = 64;
+    samplerParams.topP = 0.95;
+    samplerParams.penaltyRepeat = 1.1;
+
+    // Load the LLM model
+    print("Loading model, please wait...");
+    Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
+    String modelPath = "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf";
+    Llama llama =
+        Llama(modelPath, ModelParams(), contextParams, samplerParams, false);
+    print("Model loaded successfully! ${llama.status}");
+
+    // Initialize chat history with system prompt
+    ChatHistory chatHistory = ChatHistory();
+    chatHistory.addMessage(
+        role: Role.system,
+        content:
+            "You are a helpful, concise assistant. Keep your answers informative but brief.");
+
+    print("\n=== Chat started (type 'exit' to quit) ===\n");
+
+    // Chat loop
+    bool chatActive = true;
+    while (chatActive) {
+      // Get user input
+      stdout.write("\nYou: ");
+      String? userInput = stdin.readLineSync();
+
+      // Check for exit command
+      if (userInput == null || userInput.toLowerCase() == 'exit') {
+        chatActive = false;
+        print("\nExiting chat. Goodbye!");
+        break;
+      }
+
+      // Add user message to history
+      chatHistory.addMessage(role: Role.user, content: userInput);
+
+      // Add empty assistant message that will be filled by the model
+      chatHistory.addMessage(role: Role.assistant, content: "");
+
+      // Prepare prompt for the model
+      String prompt = chatHistory.exportFormat(ChatFormat.gemini,
+          leaveLastAssistantOpen: true);
+
+      // Send to model
+      llama.setPrompt(prompt);
+
+      // Collect the response
+      stdout.write("\nAssistant: ");
+      StringBuffer responseBuffer = StringBuffer();
+      bool endOfTurnFound = false;
+
+      while (!endOfTurnFound) {
+        var (token, done) = llama.getNext();
+
+        // Check if we've found the end marker
+        if (token.contains("<end_of_turn>")) {
+          endOfTurnFound = true;
+          // Only print up to the end marker
+          String cleanToken =
+              token.substring(0, token.indexOf("<end_of_turn>"));
+          if (cleanToken.isNotEmpty) {
+            stdout.write(cleanToken);
+            responseBuffer.write(cleanToken);
+          }
+          break;
+        }
+
+        // Print and collect the token
+        stdout.write(token);
+        responseBuffer.write(token);
+
+        // Break if the model is done
+        if (done) break;
+      }
+
+      // Update the last assistant message with the generated content
+      String assistantResponse = responseBuffer.toString();
+      chatHistory.messages.last =
+          Message(role: Role.assistant, content: assistantResponse);
+
+      print(""); // Add a newline after the response
+    }
+
+    // Clean up
+    llama.dispose();
+  } catch (e) {
+    print("\nError: ${e.toString()}");
+  }
+}
@@ -0,0 +1,166 @@
+// ignore_for_file: avoid_print
+
+import 'dart:io';
+import 'dart:async';
+
+import 'package:llama_cpp_dart/llama_cpp_dart.dart';
+import 'package:llama_cpp_dart/src/chat.dart';
+
+void main() async {
+  print("Starting LLM CLI Chat App with Isolates...");
+
+  // Library path setup
+  Llama.libraryPath = "bin/MAC_ARM64/libllama.dylib";
+
+  // Setup parameters
+  ContextParams contextParams = ContextParams();
+  contextParams.nPredict = 8192;
+  contextParams.nCtx = 8192;
+  contextParams.nBatch = 8192;
+
+  final samplerParams = SamplerParams();
+  samplerParams.temp = 0.7;
+  samplerParams.topK = 64;
+  samplerParams.topP = 0.95;
+  samplerParams.penaltyRepeat = 1.1;
+
+  // Initialize load command for the isolate
+  final loadCommand = LlamaLoad(
+    path: "/Users/adel/Downloads/gemma-3-12b-it-Q4_K_M.gguf",
+    modelParams: ModelParams(),
+    contextParams: contextParams,
+    samplingParams: samplerParams,
+  );
+
+  print("Loading model, please wait...");
+
+  // Create the LLM parent that will spawn an isolate
+  final llamaParent = LlamaParent(loadCommand);
+
+  try {
+    await llamaParent.init();
+
+    // Add a timeout to prevent infinite waiting
+    int attempts = 0;
+    const maxAttempts = 60;
+
+    print("Waiting for model to be ready...");
+    while (llamaParent.status != LlamaStatus.ready && attempts < maxAttempts) {
+      await Future.delayed(Duration(milliseconds: 500));
+      attempts++;
+
+      if (attempts % 10 == 0) {
+        print("Still waiting... Status: ${llamaParent.status}");
+      }
+
+      if (llamaParent.status == LlamaStatus.error) {
+        print("Error loading model. Exiting.");
+        exit(1);
+      }
+    }
+
+    if (attempts >= maxAttempts && llamaParent.status != LlamaStatus.ready) {
+      print(
+          "Timeout waiting for model to be ready. Current status: ${llamaParent.status}");
+      print(
+          "Continuing anyway as the model might be ready despite status not being updated...");
+    }
+
+    print(
+        "Model loaded successfully in isolate! Status: ${llamaParent.status}");
+  } catch (e) {
+    print("Error initializing model: $e");
+    exit(1);
+  }
+
+  // Initialize chat history with system prompt
+  ChatHistory chatHistory = ChatHistory();
+  chatHistory.addMessage(
+      role: Role.system,
+      content:
+          "You are a helpful, concise assistant. Keep your answers informative but brief.");
+  print("Chat history initialized with system prompt");
+
+  print("\n=== Chat started (type 'exit' to quit) ===\n");
+
+  // Set up a completer to help manage when completions are finished
+  Completer<void> completionDone = Completer<void>();
+  StringBuffer currentResponse = StringBuffer();
+  // bool processingMessage = false;
+
+  llamaParent.stream.listen((token) {
+    stdout
+      ..write(token)
+      ..flush();
+
+    currentResponse.write(token);
+  }, onError: (e) {
+    print("\nSTREAM ERROR: $e");
+  });
+
+  // Listen for completion events
+  llamaParent.completions.listen((event) {
+    if (event.success) {
+      if (chatHistory.messages.isNotEmpty &&
+          chatHistory.messages.last.role == Role.assistant) {
+        chatHistory.messages.last =
+            Message(role: Role.assistant, content: currentResponse.toString());
+      }
+      currentResponse.clear();
+      if (!completionDone.isCompleted) {
+        completionDone.complete();
+      }
+    } else {
+      print("Completion failed for prompt: ${event.promptId}");
+    }
+  });
+
+  // Chat loop
+  bool chatActive = true;
+  while (chatActive) {
+    // Get user input
+    stdout.write("\nYou: ");
+    String? userInput = stdin.readLineSync();
+
+    // Check for exit command
+    if (userInput == null || userInput.toLowerCase() == 'exit') {
+      chatActive = false;
+      print("\nExiting chat. bye!");
+      print(chatHistory.exportFormat(ChatFormat.gemini));
+      break;
+    }
+
+    // Add user message to history
+    chatHistory.addMessage(role: Role.user, content: userInput);
+
+    // Add empty assistant message
+    chatHistory.addMessage(role: Role.assistant, content: "");
+
+    // Create a new completer for this message
+    completionDone = Completer<void>();
+
+    // Prepare prompt for the model
+    String prompt = chatHistory.exportFormat(ChatFormat.gemini,
+        leaveLastAssistantOpen: true);
+
+    await llamaParent.sendPrompt(prompt);
+
+    // Indicate that we're about to process a new message
+    stdout.write("\nAssistant: ");
+
+    // processingMessage = true;
+
+    // Wait for completion before continuing to next message
+    try {
+      await completionDone.future.timeout(Duration(seconds: 60), onTimeout: () {
+        print("\nTimeout waiting for response. Continuing anyway...");
+      });
+    } catch (e) {
+      print("\nError waiting for completion: $e");
+    }
+    print(""); // Add a newline after the response
+  }
+
+  // Clean up
+  llamaParent.dispose();
+}
@@ -75,14 +75,15 @@ class ChatHistory {
   }
 
   /// Exports chat history in the specified format
-  String exportFormat(ChatFormat format) {
+  String exportFormat(ChatFormat format,
+      {bool leaveLastAssistantOpen = false}) {
     switch (format) {
       case ChatFormat.chatml:
         return _exportChatML();
       case ChatFormat.alpaca:
         return _exportAlpaca();
       case ChatFormat.gemini:
-        return _exportGemini();
+        return _exportGemini(leaveLastAssistantOpen: leaveLastAssistantOpen);
     }
   }
 
@@ -123,10 +124,21 @@ class ChatHistory {
   }
 
   /// Exports chat history in Gemini format
-  String _exportGemini() {
+  /// If leaveLastAssistantOpen is true and the last message is an empty assistant message,
+  /// it will not add the closing tag for that message
+  String _exportGemini({bool leaveLastAssistantOpen = false}) {
     final buffer = StringBuffer();
 
-    for (final message in messages) {
+    for (int i = 0; i < messages.length; i++) {
+      final message = messages[i];
+      final isLastMessage = i == messages.length - 1;
+
+      // Handle special case for the last assistant message
+      final isEmptyAssistant =
+          message.role == Role.assistant && message.content.isEmpty;
+      final shouldLeaveOpen =
+          leaveLastAssistantOpen && isLastMessage && isEmptyAssistant;
+
       switch (message.role) {
         case Role.user:
           buffer.write('<start_of_turn>user\n');
@@ -135,16 +147,15 @@ class ChatHistory {
         case Role.assistant:
           buffer.write('<start_of_turn>model\n');
           buffer.write(message.content);
-          buffer.writeln('<end_of_turn>');
+          // Only add end tag if we're not leaving this message open
+          if (!shouldLeaveOpen) {
+            buffer.writeln('<end_of_turn>');
+          }
         case Role.system:
-          // Gemini doesn't formally support system messages in this format
-          // System messages are typically handled differently or incorporated into user messages
-          // For backward compatibility, we'll include it with a comment
           buffer.write('<start_of_turn>user\n');
           buffer.write('System instruction: ${message.content}');
           buffer.writeln('<end_of_turn>');
         case Role.unknown:
-          // Skip unknown roles or handle as needed
           break;
       }
     }
 
@@ -21,12 +21,12 @@ class GeminiFormat extends PromptFormat {
     String formattedMessages = '';
 
     // First, check for and handle system message
-    bool hasSystemMessage = false;
+    // bool hasSystemMessage = false;
     for (var message in messages) {
       if (message['role'] == 'system') {
         formattedMessages +=
             '$inputSequence$systemPrefix${message['content']}$stopSequence';
-        hasSystemMessage = true;
+        // hasSystemMessage = true;
         break;
       }
     }
Original file line number	Diff line number	Diff line change
`@@ -21,12 +21,12 @@ class GeminiFormat extends PromptFormat {`
`21`	`21`	`String formattedMessages = '';`
`22`	`22`
`23`	`23`	`// First, check for and handle system message`
`24`		`- bool hasSystemMessage = false;`
	`24`	`+ // bool hasSystemMessage = false;`
`25`	`25`	`for (var message in messages) {`
`26`	`26`	`if (message['role'] == 'system') {`
`27`	`27`	`formattedMessages +=`
`28`	`28`	`'$inputSequence$systemPrefix${message['content']}$stopSequence';`
`29`		`- hasSystemMessage = true;`
	`29`	`+ // hasSystemMessage = true;`
`30`	`30`	`break;`
`31`	`31`	`}`
`32`	`32`	`}`