Skip to content

Commit 0aefa6a

Browse files
committed
Merge 'origin/master' into hipblas
2 parents baeb482 + 1b0fd45 commit 0aefa6a

File tree

6 files changed

+17
-13
lines changed

6 files changed

+17
-13
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ build-sanitize-addr/
2121
build-sanitize-thread/
2222

2323
models/*
24+
*.bin
2425

2526
/main
2627
/quantize

examples/main/main.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,10 @@ int main(int argc, char ** argv) {
444444
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
445445
} else {
446446
// Temperature sampling
447-
llama_sample_top_k(ctx, &candidates_p, top_k);
448-
llama_sample_tail_free(ctx, &candidates_p, tfs_z);
449-
llama_sample_typical(ctx, &candidates_p, typical_p);
450-
llama_sample_top_p(ctx, &candidates_p, top_p);
447+
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
448+
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
449+
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
450+
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
451451
llama_sample_temperature(ctx, &candidates_p, temp);
452452
id = llama_sample_token(ctx, &candidates_p);
453453
}

ggml.c

+3
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ inline static void* ggml_aligned_malloc(size_t size) {
137137

138138
#if defined(GGML_USE_ACCELERATE)
139139
#include <Accelerate/Accelerate.h>
140+
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
141+
#include "ggml-opencl.h"
142+
#endif
140143
#elif defined(GGML_USE_OPENBLAS)
141144
#include <cblas.h>
142145
#elif defined(GGML_USE_CUBLAS) | defined(GGML_USE_HIPBLAS)

llama.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,7 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
17911791
float k = powf((epsilon_hat * powf(2, *mu)) / (1 - powf(N, -epsilon_hat)), 1 / s_hat);
17921792

17931793
// Sample the next word X using top-k sampling
1794-
llama_sample_top_k(nullptr, candidates, int(k));
1794+
llama_sample_top_k(nullptr, candidates, int(k), 1);
17951795
if (ctx) {
17961796
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
17971797
}

llama.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -202,16 +202,16 @@ extern "C" {
202202
LLAMA_API void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * candidates);
203203

204204
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
205-
LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep = 1);
205+
LLAMA_API void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep);
206206

207207
/// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
208-
LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
208+
LLAMA_API void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
209209

210210
/// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
211-
LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep = 1);
211+
LLAMA_API void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep);
212212

213213
/// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
214-
LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep = 1);
214+
LLAMA_API void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep);
215215
LLAMA_API void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates, float temp);
216216

217217
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.

tests/test-sampling.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void test_top_k(const std::vector<float> & probs,
3232
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
3333
llama_sample_softmax(nullptr, &candidates_p);
3434
DUMP(&candidates_p);
35-
llama_sample_top_k(nullptr, &candidates_p, k);
35+
llama_sample_top_k(nullptr, &candidates_p, k, 1);
3636
DUMP(&candidates_p);
3737

3838
assert(candidates_p.size == expected_probs.size());
@@ -57,7 +57,7 @@ void test_top_p(const std::vector<float> & probs,
5757
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
5858
llama_sample_softmax(nullptr, &candidates_p);
5959
DUMP(&candidates_p);
60-
llama_sample_top_p(nullptr, &candidates_p, p);
60+
llama_sample_top_p(nullptr, &candidates_p, p, 1);
6161
DUMP(&candidates_p);
6262

6363
assert(candidates_p.size == expected_probs.size());
@@ -80,7 +80,7 @@ void test_tfs(const std::vector<float> & probs,
8080

8181
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
8282
DUMP(&candidates_p);
83-
llama_sample_tail_free(nullptr, &candidates_p, z);
83+
llama_sample_tail_free(nullptr, &candidates_p, z, 1);
8484
DUMP(&candidates_p);
8585

8686
assert(candidates_p.size == expected_probs.size());
@@ -103,7 +103,7 @@ void test_typical(const std::vector<float> & probs,
103103

104104
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
105105
DUMP(&candidates_p);
106-
llama_sample_typical(nullptr, &candidates_p, p);
106+
llama_sample_typical(nullptr, &candidates_p, p, 1);
107107
DUMP(&candidates_p);
108108

109109
assert(candidates_p.size == expected_probs.size());

0 commit comments

Comments
 (0)