Support special tokens and not adding BOS to prompt in speculative

AutonomicPerfectionist · AutonomicPerfectionist · commit 5651be511735 · 2023-11-10T09:59:22.000-06:00
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
@@ -94,9 +94,13 @@ int main(int argc, char ** argv) {
         }
     }
 
-    // tokenize the prompt
+
+    // Tokenize the prompt
+    const bool add_bos = llama_vocab_type(llama_get_model(ctx_tgt)) == LLAMA_VOCAB_TYPE_SPM;
+    LOG("add_bos: %d\n", add_bos);
+
     std::vector<llama_token> inp;
-    inp = ::llama_tokenize(ctx_tgt, params.prompt, true);
+    inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos, true);
 
     const int max_context_size     = llama_n_ctx(ctx_tgt);
     const int max_tokens_list_size = max_context_size - 4;

Original file line number	Diff line number	Diff line change
`@@ -94,9 +94,13 @@ int main(int argc, char ** argv) {`
`94`	`94`	`}`
`95`	`95`	`}`
`96`	`96`
`97`		`- // tokenize the prompt`
	`97`	`+`
	`98`	`+ // Tokenize the prompt`
	`99`	`+ const bool add_bos = llama_vocab_type(llama_get_model(ctx_tgt)) == LLAMA_VOCAB_TYPE_SPM;`
	`100`	`+ LOG("add_bos: %d\n", add_bos);`
	`101`	`+`
`98`	`102`	`std::vector<llama_token> inp;`
`99`		`- inp = ::llama_tokenize(ctx_tgt, params.prompt, true);`
	`103`	`+ inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos, true);`
`100`	`104`
`101`	`105`	`const int max_context_size = llama_n_ctx(ctx_tgt);`
`102`	`106`	`const int max_tokens_list_size = max_context_size - 4;`