Skip to content

Commit 42415a4

Browse files
committed
auto scale
1 parent 703573f commit 42415a4

File tree

4 files changed

+36
-16
lines changed

4 files changed

+36
-16
lines changed

common/common.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
684684
}
685685
if (arg == "--lora") {
686686
CHECK_ARG
687-
params.lora_adapter.emplace_back(argv[i], 1.0f);
687+
params.lora_adapter.emplace_back(argv[i], 0.0f);
688688
return true;
689689
}
690690
if (arg == "--lora-scaled") {
@@ -2089,6 +2089,9 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
20892089
llama_free_model(model);
20902090
return std::make_tuple(nullptr, nullptr);
20912091
}
2092+
if (lora_scale == 0.0f) {
2093+
lora_scale = llama_lora_adapter_get_default_scale(adapter);
2094+
}
20922095
llama_lora_adapter_set(lctx, adapter, lora_scale);
20932096
}
20942097

convert_lora_to_gguf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,9 +366,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
366366
lparams: dict[str, Any] = json.load(f)
367367

368368
alpha = lparams["lora_alpha"]
369+
rank = lparams["r"]
369370

370371
model_instance.gguf_writer.add_string("training.type", "finetune_lora")
371372
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha))
373+
model_instance.gguf_writer.add_float32("training.lora.scale", float(alpha) / float(rank))
372374

373375
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
374376
logger.info("Exporting model...")

include/llama.h

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -513,12 +513,33 @@ extern "C" {
513513
const char * fname_out,
514514
const llama_model_quantize_params * params);
515515

516+
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
517+
// the currently loaded vector.
518+
// n_embd should be the size of a single layer's control, and data should point
519+
// to an n_embd x n_layers buffer starting from layer 1.
520+
// il_start and il_end are the layer range the vector should apply to (both inclusive)
521+
// See llama_control_vector_load in common to load a control vector.
522+
LLAMA_API int32_t llama_control_vector_apply(
523+
struct llama_context * lctx,
524+
const float * data,
525+
size_t len,
526+
int32_t n_embd,
527+
int32_t il_start,
528+
int32_t il_end);
529+
530+
//
531+
// LoRA
532+
//
533+
516534
// Load a LoRA adapter from file
517535
// The loaded adapter will be associated to the given model, and will be free when the model is deleted
518536
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
519537
struct llama_model * model,
520538
const char * path_lora);
521539

540+
// Get default scale of an adapter
541+
LLAMA_API float llama_lora_adapter_get_default_scale(struct llama_lora_adapter * adapter);
542+
522543
// Add a loaded LoRA adapter to given context
523544
// This will not modify model's weight
524545
LLAMA_API int32_t llama_lora_adapter_set(
@@ -536,20 +557,6 @@ extern "C" {
536557
// Note: loaded adapters will be free when the associated model is deleted
537558
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
538559

539-
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
540-
// the currently loaded vector.
541-
// n_embd should be the size of a single layer's control, and data should point
542-
// to an n_embd x n_layers buffer starting from layer 1.
543-
// il_start and il_end are the layer range the vector should apply to (both inclusive)
544-
// See llama_control_vector_load in common to load a control vector.
545-
LLAMA_API int32_t llama_control_vector_apply(
546-
struct llama_context * lctx,
547-
const float * data,
548-
size_t len,
549-
int32_t n_embd,
550-
int32_t il_start,
551-
int32_t il_end);
552-
553560
//
554561
// KV cache
555562
//

src/llama.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ enum llm_kv {
380380

381381
LLM_KV_TRAINING_TYPE,
382382
LLM_KV_TRAINING_LORA_ALPHA,
383+
LLM_KV_TRAINING_LORA_SCALE,
383384
};
384385

385386
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
@@ -476,6 +477,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
476477

477478
{ LLM_KV_TRAINING_TYPE, "training.type" },
478479
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" },
480+
{ LLM_KV_TRAINING_LORA_SCALE, "training.lora.scale" },
479481
};
480482

481483
struct LLM_KV {
@@ -2851,6 +2853,7 @@ struct llama_lora_adapter {
28512853
std::vector<ggml_backend_buffer_t> bufs;
28522854

28532855
float alpha;
2856+
float scale; // default scale
28542857

28552858
llama_lora_adapter(struct llama_model * base_model): base_model(base_model) {
28562859
base_model->lora_adapters.insert(this);
@@ -18578,7 +18581,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1857818581
}
1857918582

1858018583
static void llama_lora_adapter_init_internal(struct llama_model * model, const char * path_lora, struct llama_lora_adapter & adapter) {
18581-
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' ...\n", __func__, path_lora);
18584+
LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
1858218585

1858318586
ggml_context * ctx = nullptr;
1858418587
struct gguf_init_params meta_gguf_params = {
@@ -18615,6 +18618,7 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
1861518618
}
1861618619

1861718620
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA));
18621+
adapter.scale = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_SCALE));
1861818622
}
1861918623

1862018624
int n_tensors = gguf_get_n_tensors(ctx_gguf);
@@ -18749,6 +18753,10 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
1874918753
ggml_free(ctx);
1875018754
}
1875118755

18756+
float llama_lora_adapter_get_default_scale(struct llama_lora_adapter * adapter) {
18757+
return adapter->scale;
18758+
}
18759+
1875218760
int32_t llama_lora_adapter_set(
1875318761
struct llama_context * ctx,
1875418762
struct llama_lora_adapter * adapter,

0 commit comments

Comments
 (0)