cont

ggerganov · ggerganov · commit bebf5d741b53 · 2024-08-06T18:31:19.000+03:00
diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift
@@ -27,7 +27,6 @@ guard let model = llama_load_model_from_file(modelPath.cString(using: .utf8), mo
     print("Failed to load model")
     exit(1)
 }
-
 defer {
     llama_free_model(model)
 }
@@ -44,17 +43,23 @@ context_params.n_threads = 8
 context_params.n_threads_batch = 8
 
 let context = llama_new_context_with_model(model, context_params)
-let smpl = llama_get_sampling(context)
-
 guard context != nil else {
     print("Failed to initialize context")
     exit(1)
 }
-
 defer {
     llama_free(context)
 }
 
+let smpl = llama_sampling_init(model, nil, nil)
+guard smpl != nil else {
+    print("Failed to initialize sampling")
+    exit(1)
+}
+defer {
+    llama_sampling_free(smpl)
+}
+
 let n_ctx = llama_n_ctx(context)
 
 print("\nn_len = \(n_len), n_ctx = \(n_ctx), n_batch = \(context_params.n_batch), n_parallel = \(n_parallel), n_kv_req = \(n_kv_req)\n")
diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp
@@ -380,12 +380,13 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
         JNIEnv * env,
         jobject,
         jlong context_pointer,
+        jlong sampling_pointer,
         jlong batch_pointer,
         jint n_len,
         jobject intvar_ncur
 ) {
     const auto context = reinterpret_cast<llama_context *>(context_pointer);
-    const auto sampling = reinterpret_cast<llama_sampling *>(llama_get_sampling(context));
+    const auto sampling = reinterpret_cast<llama_sampling *>(sampling_pointer);
     const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
     const auto model = llama_get_model(context);
 
diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift
@@ -43,14 +43,14 @@ actor LlamaContext {
         self.tokens_list = []
         self.batch = llama_batch_init(512, 0, 1)
         self.temporary_invalid_cchars = []
-        self.sampling = llama_get_sampling(context)
+        self.sampling = llama_sampling_init(context, nil, nil);
     }
 
     deinit {
+        llama_sampling_free(sampling)
         llama_batch_free(batch)
         llama_free(context)
         llama_free_model(model)
-        llama_sampling_free(sampling)
         llama_backend_free()
     }
 
diff --git a/include/llama.h b/include/llama.h
@@ -406,6 +406,7 @@ extern "C" {
 
     LLAMA_API void llama_free_model(struct llama_model * model);
 
+    // TODO: rename to llama_init_from_model
     LLAMA_API struct llama_context * llama_new_context_with_model(
                      struct llama_model * model,
             struct llama_context_params   params);