|
| 1 | +// XXX: CHECK REGEX |
1 | 2 | #include "llama_util.h"
|
2 | 3 | #include "llama.h"
|
3 | 4 |
|
@@ -148,6 +149,9 @@ struct llama_model {
|
148 | 149 | llama_mlock mlock_buf;
|
149 | 150 | llama_mlock mlock_mmap;
|
150 | 151 |
|
| 152 | + // for quantize-stats only |
| 153 | + std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name; |
| 154 | + |
151 | 155 | ~llama_model() {
|
152 | 156 | if (ctx) {
|
153 | 157 | ggml_free(ctx);
|
@@ -634,10 +638,13 @@ struct llama_model_loader {
|
634 | 638 | return tensor;
|
635 | 639 | }
|
636 | 640 |
|
637 |
| - void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { |
| 641 | + void done_getting_tensors() { |
638 | 642 | if (num_ggml_tensors_created != tensors_map.tensors.size()) {
|
639 | 643 | throw std::string("llama.cpp: file contained more tensors than expected");
|
640 | 644 | }
|
| 645 | + } |
| 646 | + |
| 647 | + void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { |
641 | 648 | size_t data_size = 0;
|
642 | 649 | for (const llama_load_tensor & lt : tensors_map.tensors) {
|
643 | 650 | data_size += lt.size;
|
@@ -928,6 +935,13 @@ static void llama_model_load_internal(
|
928 | 935 | }
|
929 | 936 | }
|
930 | 937 |
|
| 938 | + ml->done_getting_tensors(); |
| 939 | + |
| 940 | + // populate `tensors_by_name` |
| 941 | + for (llama_load_tensor & lt : ml->tensors_map.tensors) { |
| 942 | + model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor); |
| 943 | + } |
| 944 | + |
931 | 945 | ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
|
932 | 946 |
|
933 | 947 | model.mapping = std::move(ml->mapping);
|
@@ -1887,6 +1901,6 @@ const char * llama_print_system_info(void) {
|
1887 | 1901 | }
|
1888 | 1902 |
|
1889 | 1903 | // For internal test use
|
1890 |
| -std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx) { |
1891 |
| - return ctx->model.tensors; |
| 1904 | +std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) { |
| 1905 | + return ctx->model.tensors_by_name; |
1892 | 1906 | }
|
0 commit comments