Revert "Improve llama_tokenize API"

j-f1 · j-f1 · commit a546adac8d41 · 2023-03-23T11:26:03.000-04:00
This reverts commit 5a78a04.
diff --git a/llama.cpp b/llama.cpp
@@ -1471,40 +1471,24 @@ int llama_eval(
     return 0;
 }
 
-struct llama_token_list {
-    std::vector<llama_vocab::id> tokens;
-    llama_token_list(std::vector<llama_vocab::id> tokens) : tokens(tokens) {}
-};
-
-llama_token_list * llama_tokenize(
-           struct llama_context * ctx,
-                     const char * text,
-                           bool   add_bos) {
-    return new llama_token_list(llama_tokenize(ctx->vocab, text, add_bos));
-}
-
-int llama_token_list_size(struct llama_token_list * token_list) {
-    return token_list->tokens.size();
-}
+int llama_tokenize(
+        struct llama_context * ctx,
+                  const char * text,
+                 llama_token * tokens,
+                         int   n_max_tokens,
+                        bool   add_bos) {
+    auto res = llama_tokenize(ctx->vocab, text, add_bos);
 
-bool llama_token_list_copy(
-    struct llama_token_list * token_list,
-                llama_token * tokens,
-                        int   n_tokens) {
-    if (n_tokens != token_list->tokens.size()) {
+    if (n_max_tokens < (int) res.size()) {
         fprintf(stderr, "%s: too many tokens\n", __func__);
-        return false;
+        return -((int) res.size());
     }
 
-    for (size_t i = 0; i < token_list->tokens.size(); i++) {
-        tokens[i] = token_list->tokens[i];
+    for (size_t i = 0; i < res.size(); i++) {
+        tokens[i] = res[i];
     }
 
-    return true;
-}
-
-int llama_token_list_free(struct llama_token_list * token_list) {
-    delete token_list;
+    return res.size();
 }
 
 int llama_n_vocab(struct llama_context * ctx) {
diff --git a/llama.h b/llama.h
@@ -60,8 +60,6 @@ extern "C" {
         void *ctx;
     };
 
-    struct llama_token_list;
-
     LLAMA_API struct llama_context_params llama_context_default_params();
 
     // Various functions for loading a ggml llama model.
@@ -98,17 +96,13 @@ extern "C" {
     // The tokens pointer must be large enough to hold the resulting tokens.
     // Returns the number of tokens on success, no more than n_max_tokens
     // Returns a negative number on failure - the number of tokens that would have been returned
-    LLAMA_API struct llama_token_list * llama_tokenize(
-               struct llama_context * ctx,
-                         const char * text,
-                               bool   add_bos);
-
-    LLAMA_API int llama_token_list_size(struct llama_token_list * token_list);
-    LLAMA_API bool llama_token_list_copy(
-        struct llama_token_list * token_list,
-                    llama_token * tokens,
-                            int   n_tokens);
-    LLAMA_API int llama_token_list_free(struct llama_token_list * token_list);
+    // TODO: not sure if correct
+    LLAMA_API int llama_tokenize(
+            struct llama_context * ctx,
+                      const char * text,
+                     llama_token * tokens,
+                             int   n_max_tokens,
+                            bool   add_bos);
 
     LLAMA_API int llama_n_vocab(struct llama_context * ctx);
     LLAMA_API int llama_n_ctx  (struct llama_context * ctx);
diff --git a/utils.cpp b/utils.cpp
@@ -146,9 +146,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
 
 // TODO: not great allocating this every time
 std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
-    llama_token_list * tokens = llama_tokenize(ctx, text.c_str(), add_bos);
-    std::vector<llama_token> res(llama_token_list_size(tokens));
-    llama_token_list_copy(tokens, res.data(), res.size());
+    std::vector<llama_token> res(8096);
+    int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    res.resize(n);
 
     return res;
 }