src: remove duplicate function llama_should_add_bos_token

kylo5aby · kylo5aby · commit 4e440f2ea069 · 2024-07-31T00:18:37.000+08:00
diff --git a/common/common.cpp b/common/common.cpp
@@ -2662,12 +2662,6 @@ std::string llama_detokenize(llama_context * ctx, const std::vector<llama_token>
     return text;
 }
 
-bool llama_should_add_bos_token(const llama_model * model) {
-    const int add_bos = llama_add_bos_token(model);
-
-    return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
-}
-
 //
 // Chat template utils
 //
diff --git a/common/common.h b/common/common.h
@@ -361,10 +361,6 @@ std::string llama_detokenize(
         const std::vector<llama_token> & tokens,
                                   bool   special = true);
 
-// Uses the value from the model metadata if possible, otherwise
-// defaults to true when model type is SPM, otherwise false.
-bool llama_should_add_bos_token(const llama_model * model);
-
 //
 // Chat template utils
 //
diff --git a/examples/cvector-generator/cvector-generator.cpp b/examples/cvector-generator/cvector-generator.cpp
@@ -271,7 +271,7 @@ struct tokenized_prompt {
     size_t max_seq_len;
 
     tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
-        const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+        const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
         tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
         tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
         max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp
@@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
 }
 
 static bool run(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
 
     std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);
 
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
@@ -433,8 +433,8 @@ static void process_logits(
 }
 
 static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
     const int n_ctx = llama_n_ctx(ctx);
 
     auto tim1 = std::chrono::high_resolution_clock::now();
diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
@@ -200,8 +200,8 @@ int main(int argc, char ** argv) {
         LOG_TEE("\n");
         LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
     }
-    const bool add_bos = llama_should_add_bos_token(model);
-    GGML_ASSERT(llama_add_eos_token(model) != 1);
+    const bool add_bos = llama_add_bos_token(model);
+    GGML_ASSERT(!llama_add_eos_token(model));
     LOG("add_bos: %d\n", add_bos);
 
     std::vector<llama_token> embd_inp;
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -264,9 +264,9 @@ int main(int argc, char ** argv) {
         }
     }
 
-    const bool add_bos = llama_should_add_bos_token(model);
+    const bool add_bos = llama_add_bos_token(model);
     if (!llama_model_has_encoder(model)) {
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        GGML_ASSERT(!llama_add_eos_token(model));
     }
     LOG("add_bos: %d\n", add_bos);
 
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
@@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
     // Output: `perplexity: 13.5106 [114/114]`
     // BOS tokens will be added for each chunk before eval
 
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
 
     fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
 
@@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
     // Output: `perplexity: 13.5106 [114/114]`
     // BOS tokens will be added for each chunk before eval
 
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
 
     std::ofstream logits_stream;
     if (!params.logits_file.empty()) {
@@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
     const int n_batch = params.n_batch;
     const int num_batches = (n_ctx + n_batch - 1)/n_batch;
     const int nv = 2*((n_vocab + 1)/2) + 4;
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
 
     std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
     std::vector<float>    kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -686,8 +686,8 @@ struct server_context {
 
         n_ctx = llama_n_ctx(ctx);
 
-        add_bos_token = llama_should_add_bos_token(model);
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        add_bos_token = llama_add_bos_token(model);
+        GGML_ASSERT(!llama_add_eos_token(model));
 
         return true;
     }
@@ -2028,7 +2028,7 @@ struct server_context {
                         slot.t_start_generation = 0;
 
                         if (slot.infill) {
-                            const bool add_bos = llama_should_add_bos_token(model);
+                            const bool add_bos = llama_add_bos_token(model);
                             bool suff_rm_leading_spc = true;
                             if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
                                 params.input_suffix.erase(0, 1);
diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp
@@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
         prompt = stdin_buffer.str();
     }
 
-    const bool model_wants_add_bos = llama_should_add_bos_token(model);
+    const bool model_wants_add_bos = llama_add_bos_token(model);
     const bool add_bos = model_wants_add_bos && !no_bos;
     const bool parse_special = !no_parse_special;
 
diff --git a/include/llama.h b/include/llama.h
@@ -912,11 +912,8 @@ extern "C" {
     LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
     LLAMA_API llama_token llama_token_pad(const struct llama_model * model); // padding
 
-    // Returns -1 if unknown, 1 for true or 0 for false.
-    LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);
-
-    // Returns -1 if unknown, 1 for true or 0 for false.
-    LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model);
+    LLAMA_API bool llama_add_bos_token(const struct llama_model * model);
+    LLAMA_API bool llama_add_eos_token(const struct llama_model * model);
 
     // Codellama infill tokens
     LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -1476,11 +1476,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
     return vocab.special_pad_id;
 }
 
-int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
+bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
     return vocab.tokenizer_add_bos;
 }
 
-int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
+bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
     return vocab.tokenizer_add_eos;
 }
 
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
@@ -94,8 +94,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
 llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
 llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
 
-int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
-int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
+bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
+bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
 
 llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
 llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -18517,11 +18517,11 @@ llama_token llama_token_pad(const struct llama_model * model) {
     return llama_token_pad_impl(model->vocab);
 }
 
-int32_t llama_add_bos_token(const struct llama_model * model) {
+bool llama_add_bos_token(const struct llama_model * model) {
     return llama_add_bos_token_impl(model->vocab);
 }
 
-int32_t llama_add_eos_token(const struct llama_model * model) {
+bool llama_add_eos_token(const struct llama_model * model) {
     return llama_add_eos_token_impl(model->vocab);
 }
 

Original file line number	Diff line number	Diff line change
`@@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {`
`127`	`127`	`}`
`128`	`128`
`129`	`129`	`static bool run(llama_context * ctx, const gpt_params & params) {`
`130`		`- const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));`
	`130`	`+ const bool add_bos = llama_add_bos_token(llama_get_model(ctx));`
`131`	`131`
`132`	`132`	`std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);`
`133`	`133`
Original file line number	Diff line number	Diff line change
`@@ -433,8 +433,8 @@ static void process_logits(`
`433`	`433`	`}`
`434`	`434`
`435`	`435`	`static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {`
`436`		`- const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));`
`437`		`- GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);`
	`436`	`+ const bool add_bos = llama_add_bos_token(llama_get_model(ctx));`
	`437`	`+ GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));`
`438`	`438`	`const int n_ctx = llama_n_ctx(ctx);`
`439`	`439`
`440`	`440`	`auto tim1 = std::chrono::high_resolution_clock::now();`
Original file line number	Diff line number	Diff line change
`@@ -264,9 +264,9 @@ int main(int argc, char ** argv) {`
`264`	`264`	`}`
`265`	`265`	`}`
`266`	`266`
`267`		`- const bool add_bos = llama_should_add_bos_token(model);`
	`267`	`+ const bool add_bos = llama_add_bos_token(model);`
`268`	`268`	`if (!llama_model_has_encoder(model)) {`
`269`		`- GGML_ASSERT(llama_add_eos_token(model) != 1);`
	`269`	`+ GGML_ASSERT(!llama_add_eos_token(model));`
`270`	`270`	`}`
`271`	`271`	`LOG("add_bos: %d\n", add_bos);`
`272`	`272`
Original file line number	Diff line number	Diff line change
`@@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {`
`362`	`362`	`prompt = stdin_buffer.str();`
`363`	`363`	`}`
`364`	`364`
`365`		`- const bool model_wants_add_bos = llama_should_add_bos_token(model);`
	`365`	`+ const bool model_wants_add_bos = llama_add_bos_token(model);`
`366`	`366`	`const bool add_bos = model_wants_add_bos && !no_bos;`
`367`	`367`	`const bool parse_special = !no_parse_special;`
`368`	`368`
Original file line number	Diff line number	Diff line change
`@@ -1476,11 +1476,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {`
`1476`	`1476`	`return vocab.special_pad_id;`
`1477`	`1477`	`}`
`1478`	`1478`
`1479`		`-int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {`
	`1479`	`+bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {`
`1480`	`1480`	`return vocab.tokenizer_add_bos;`
`1481`	`1481`	`}`
`1482`	`1482`
`1483`		`-int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {`
	`1483`	`+bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {`
`1484`	`1484`	`return vocab.tokenizer_add_eos;`
`1485`	`1485`	`}`
`1486`	`1486`
Original file line number	Diff line number	Diff line change
`@@ -18517,11 +18517,11 @@ llama_token llama_token_pad(const struct llama_model * model) {`
`18517`	`18517`	`return llama_token_pad_impl(model->vocab);`
`18518`	`18518`	`}`
`18519`	`18519`
`18520`		`-int32_t llama_add_bos_token(const struct llama_model * model) {`
	`18520`	`+bool llama_add_bos_token(const struct llama_model * model) {`
`18521`	`18521`	`return llama_add_bos_token_impl(model->vocab);`
`18522`	`18522`	`}`
`18523`	`18523`
`18524`		`-int32_t llama_add_eos_token(const struct llama_model * model) {`
	`18524`	`+bool llama_add_eos_token(const struct llama_model * model) {`
`18525`	`18525`	`return llama_add_eos_token_impl(model->vocab);`
`18526`	`18526`	`}`
`18527`	`18527`