Skip to content

Commit a5203b4

Browse files
lexasublexasubslaren
authored
llama : minor fixes for up llama load model speed (#11448)
* impl::load change map bpe_ranks to onordered map for reduce time of impl::load on 30% * llama_model_loader::init_mapping - replace new llama_mmap to std::make_unique<llama_mmap> for clean code & reduce (/2) time of running init_mappings * Update src/llama-vocab.cpp --------- Co-authored-by: lexasub <[email protected]> Co-authored-by: Diego Devesa <[email protected]>
1 parent df984e0 commit a5203b4

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

src/llama-model-loader.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
819819
for (const auto & file : files) {
820820
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
821821
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
822-
std::unique_ptr<llama_mmap> mapping(new llama_mmap(file.get(), prefetch ? -1 : 0, is_numa_fn()));
822+
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
823823
mmaps_used.emplace_back(mapping->size(), 0);
824824
if (mlock_mmaps) {
825825
std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());

src/llama-vocab.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -1245,8 +1245,13 @@ struct llama_vocab::impl {
12451245

12461246
std::vector<llama_token> cache_special_tokens;
12471247
std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true);
1248-
1249-
std::map<std::pair<std::string, std::string>, int> bpe_ranks;
1248+
struct pair_hash {
1249+
size_t operator()(const std::pair<std::string, std::string> & p) const {
1250+
return std::hash<std::string>{}(p.first) ^ //create some hash for pair
1251+
(std::hash<std::string>{}(p.second) << 1);
1252+
}
1253+
};
1254+
std::unordered_map<std::pair<std::string, std::string>, int, pair_hash> bpe_ranks;
12501255

12511256
// set of all tokens that cause "end of generation"
12521257
std::set<llama_token> special_eog_ids;

0 commit comments

Comments
 (0)