Skip to content

Commit 11474e7

Browse files
authored
examples: cache hf model when --model not provided (#7353)
* examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided
1 parent d8ee902 commit 11474e7

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

common/common.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1354,7 +1354,12 @@ void gpt_params_handle_model_default(gpt_params & params) {
13541354
}
13551355
params.hf_file = params.model;
13561356
} else if (params.model.empty()) {
1357-
params.model = "models/" + string_split(params.hf_file, '/').back();
1357+
std::string cache_directory = get_cache_directory();
1358+
const bool success = create_directory_with_parents(cache_directory);
1359+
if (!success) {
1360+
throw std::runtime_error("failed to create cache directory: " + cache_directory);
1361+
}
1362+
params.model = cache_directory + string_split(params.hf_file, '/').back();
13581363
}
13591364
} else if (!params.model_url.empty()) {
13601365
if (params.model.empty()) {
@@ -2516,6 +2521,31 @@ bool create_directory_with_parents(const std::string & path) {
25162521
#endif // _WIN32
25172522
}
25182523

2524+
std::string get_cache_directory() {
2525+
std::string cache_directory = "";
2526+
if (getenv("LLAMA_CACHE")) {
2527+
cache_directory = std::getenv("LLAMA_CACHE");
2528+
if (cache_directory.back() != DIRECTORY_SEPARATOR) {
2529+
cache_directory += DIRECTORY_SEPARATOR;
2530+
}
2531+
} else {
2532+
#ifdef __linux__
2533+
if (std::getenv("XDG_CACHE_HOME")) {
2534+
cache_directory = std::getenv("XDG_CACHE_HOME");
2535+
} else {
2536+
cache_directory = std::getenv("HOME") + std::string("/.cache/");
2537+
}
2538+
#elif defined(__APPLE__)
2539+
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
2540+
#elif defined(_WIN32)
2541+
cache_directory = std::getenv("APPDATA");
2542+
#endif // __linux__
2543+
cache_directory += "llama.cpp";
2544+
cache_directory += DIRECTORY_SEPARATOR;
2545+
}
2546+
return cache_directory;
2547+
}
2548+
25192549
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {
25202550
if (data.empty()) {
25212551
fprintf(stream, "%s:\n", prop_name);

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ bool llama_should_add_bos_token(const llama_model * model);
281281
//
282282

283283
bool create_directory_with_parents(const std::string & path);
284+
std::string get_cache_directory();
284285
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
285286
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
286287
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);

examples/main/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,3 +325,5 @@ These options provide extra functionality and customization when running the LLa
325325
- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance.
326326
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
327327
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
328+
329+
- `-hfr URL --hf-repo URL`: The url to the Hugging Face model repository. Used in conjunction with `--hf-file` or `-hff`. The model is downloaded and stored in the file provided by `-m` or `--model`. If `-m` is not provided, the model is auto-stored in the path specified by the `LLAMA_CACHE` environment variable or in an OS-specific local cache.

0 commit comments

Comments
 (0)