improve detection for 70B.

LostRuins · LostRuins · commit 0e41b94f40e1 · 2023-08-07T10:43:06.000+08:00
diff --git a/llama.cpp b/llama.cpp
@@ -1076,7 +1076,7 @@ static void llama_model_load_internal(
         // LLaMAv2
         // TODO: temporary until GGUF
         //patch for llama2 gqa
-        if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) {
+        if (model.type == e_model::MODEL_65B && hparams.n_mult == 4096) {
             fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__);
             n_gqa = 8;
         }

Original file line number	Diff line number	Diff line change
`@@ -1076,7 +1076,7 @@ static void llama_model_load_internal(`
`1076`	`1076`	`// LLaMAv2`
`1077`	`1077`	`// TODO: temporary until GGUF`
`1078`	`1078`	`//patch for llama2 gqa`
`1079`		`- if (model.type == e_model::MODEL_65B && hparams.n_mult >= 4096) {`
	`1079`	`+ if (model.type == e_model::MODEL_65B && hparams.n_mult == 4096) {`
`1080`	`1080`	`fprintf(stderr, "%s: Applying KCPP Patch for 70B model, setting GQA to 8\n", __func__);`
`1081`	`1081`	`n_gqa = 8;`
`1082`	`1082`	`}`