Skip to content

Commit 4ae05a2

Browse files
committed
Update for compat with quantize-stats
1 parent 3bc64a6 commit 4ae05a2

File tree

3 files changed

+21
-9
lines changed

3 files changed

+21
-9
lines changed

examples/quantize-stats/quantize-stats.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -266,15 +266,13 @@ int main(int argc, char ** argv) {
266266
}
267267
}
268268

269-
// Sort tensors for consistent output
270-
const auto tensors = llama_internal_get_tensor_map(ctx);
271-
std::map<std::string, struct ggml_tensor *> tensors_sorted { tensors.begin(), tensors.end() };
269+
const auto &tensors = llama_internal_get_tensor_map(ctx);
272270

273271
// check layer tensors
274272
int included_layers = 0;
275273
int64_t max_nelements = 0;
276274
bool is_f16 = false;
277-
for (const auto& kv_tensor : tensors_sorted) {
275+
for (const auto& kv_tensor : tensors) {
278276
if (!layer_included(params, kv_tensor.first)) {
279277
continue;
280278
}
@@ -315,7 +313,7 @@ int main(int argc, char ** argv) {
315313

316314
error_stats global_stats {};
317315

318-
for (const auto& kv_tensor : tensors_sorted) {
316+
for (const auto& kv_tensor : tensors) {
319317
if (!layer_included(params, kv_tensor.first)) {
320318
continue;
321319
}

llama.cpp

+17-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// XXX: CHECK REGEX
12
#include "llama_util.h"
23
#include "llama.h"
34

@@ -148,6 +149,9 @@ struct llama_model {
148149
llama_mlock mlock_buf;
149150
llama_mlock mlock_mmap;
150151

152+
// for quantize-stats only
153+
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
154+
151155
~llama_model() {
152156
if (ctx) {
153157
ggml_free(ctx);
@@ -634,10 +638,13 @@ struct llama_model_loader {
634638
return tensor;
635639
}
636640

637-
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
641+
void done_getting_tensors() {
638642
if (num_ggml_tensors_created != tensors_map.tensors.size()) {
639643
throw std::string("llama.cpp: file contained more tensors than expected");
640644
}
645+
}
646+
647+
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
641648
size_t data_size = 0;
642649
for (const llama_load_tensor & lt : tensors_map.tensors) {
643650
data_size += lt.size;
@@ -928,6 +935,13 @@ static void llama_model_load_internal(
928935
}
929936
}
930937

938+
ml->done_getting_tensors();
939+
940+
// populate `tensors_by_name`
941+
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
942+
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
943+
}
944+
931945
ml->load_all_data(progress_callback, progress_callback_user_data, use_mlock ? &lctx.model.mlock_mmap : NULL);
932946

933947
model.mapping = std::move(ml->mapping);
@@ -1887,6 +1901,6 @@ const char * llama_print_system_info(void) {
18871901
}
18881902

18891903
// For internal test use
1890-
std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx) {
1891-
return ctx->model.tensors;
1904+
std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
1905+
return ctx->model.tensors_by_name;
18921906
}

llama.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ extern "C" {
174174
//
175175
// Internal function exposed for tests and benchmarks
176176
//
177-
std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx);
177+
std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
178178
#endif
179179

180180
#endif

0 commit comments

Comments
 (0)