Skip to content

Commit 22185f0

Browse files
kylo5abyggerganov
authored andcommitted
vocab : refactor tokenizer to reduce init overhead (ggml-org#9449)
* refactor tokenizer * llama : make llm_tokenizer more private ggml-ci * refactor tokenizer * refactor tokenizer * llama : make llm_tokenizer more private ggml-ci * remove unused files * remove unused fileds to avoid unused filed build error * avoid symbol link error * Update src/llama.cpp * Update src/llama.cpp --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent f957027 commit 22185f0

File tree

5 files changed

+239
-142
lines changed

5 files changed

+239
-142
lines changed

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ static void print_sample_weights(TransformerWeights *w){
201201

202202
//////////////////////////////////////// ggml structs and functions required to load models, configs and save the model.
203203

204-
struct llama_vocab {
204+
struct my_llama_vocab {
205205
using id = int32_t;
206206
using token = std::string;
207207
using ttype = llama_token_type;
@@ -525,7 +525,7 @@ static std::string llama_escape_whitespaces(const std::string & text) {
525525
return out.str();
526526
}
527527

528-
static void load_vocab(const char * filename, const Config * config, struct llama_vocab * vocab) {
528+
static void load_vocab(const char * filename, const Config * config, struct my_llama_vocab * vocab) {
529529
if (is_ggml_file(filename)) {
530530
LOG_INF("%s: Loading vocabulary from gguf file %s\n", __func__, filename);
531531
struct ggml_context * ctx_data = NULL;
@@ -583,13 +583,13 @@ static void load_vocab(const char * filename, const Config * config, struct llam
583583
const int n_vocab = config->vocab_size;
584584
/* uint32_t max_token_length = */ file.read_u32(); // unused
585585
vocab->id_to_token.resize(n_vocab);
586-
for (llama_vocab::id id=0; id<n_vocab; ++id) {
586+
for (my_llama_vocab::id id=0; id<n_vocab; ++id) {
587587
float_t score = file.read_f32();
588588
uint32_t len = file.read_u32();
589589
std::string text = file.read_string(len);
590590

591591
unsigned char byte_val;
592-
llama_vocab::ttype type = LLAMA_TOKEN_TYPE_NORMAL;
592+
my_llama_vocab::ttype type = LLAMA_TOKEN_TYPE_NORMAL;
593593
if (id == UNKNOWN_TOKEN_ID) {
594594
text = "<unk>";
595595
type = LLAMA_TOKEN_TYPE_UNKNOWN;
@@ -631,7 +631,7 @@ static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const floa
631631
}
632632

633633
static void save_as_llama_model(
634-
struct llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename
634+
struct my_llama_vocab * vocab, struct my_llama_model * model, TransformerWeights* w, const char * filename
635635
) {
636636
// convert AK weights into GG weights one by one.
637637
// w->token_embedding_table -> model->tok_embeddings
@@ -671,7 +671,7 @@ static void save_as_llama_model(
671671
std::vector<const char*> tokens;
672672
std::vector<float> scores;
673673
std::vector<llama_token_type> token_types;
674-
for (const llama_vocab::token_data & token_data : vocab->id_to_token) {
674+
for (const my_llama_vocab::token_data & token_data : vocab->id_to_token) {
675675
tokens.push_back(token_data.text.c_str());
676676
scores.push_back(token_data.score);
677677
token_types.push_back(token_data.type);
@@ -905,7 +905,7 @@ int main(int argc, char ** argv) {
905905
fclose(file);
906906
}
907907

908-
struct llama_vocab vocab;
908+
struct my_llama_vocab vocab;
909909
load_vocab(params.fn_vocab_model, &config, &vocab);
910910

911911
struct my_llama_model model;

0 commit comments

Comments
 (0)